In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch

In [25]:
train_csv = pd.read_csv('train.csv')

In [26]:
#train_csv.head()
#train_csv.info()
#train_csv.describe()

In [27]:
train_labels = train_csv['label']
train_csv.drop('label',axis =1, inplace =True)
train_features = train_csv
train_features = train_features.values.reshape(-1,1,28,28)
train_labels = train_labels.values

In [28]:
#train_features.shape
#train_labels.shape
#train_labels[0:4]

In [29]:
test_csv = pd.read_csv('test.csv')
#test_csv.info()
#test_csv.describe()
#test_csv.head()
test_features = test_csv.values.reshape(-1,1,28,28)
test_features.shape

(28000, 1, 28, 28)

In [30]:
def show_fashion_mnist(images, labels, figsize=(12,12),size = 10):
    # 这里的_表示我们忽略（不使用）的变量
    _, figs = plt.subplots(1, size, figsize=figsize)
    for f, i, j in zip(figs, range(size), range(size)):
        f.imshow(images[i][0,:,:])
        f.set_title(labels[j])
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)
    plt.show()

In [31]:
#plt.figure(figsize=(0.9,0.9))
#plt.imshow(train_features[1][0,:,:],cmap='gray')
#plt.show()
#show_fashion_mnist(train_features, train_labels)

In [32]:
#batch_size =256
train_features = torch.tensor(train_features,dtype = torch.float)
train_labels = torch.tensor(train_labels, dtype = torch.float)
test_features = torch.tensor(test_features, dtype = torch.float)

In [33]:
len(train_features)

42000

# 网络层

In [60]:
#net
class Flatten(torch.nn.Module):  #展平操作
    def forward(self, x):
        return x.view(x.shape[0], -1)

class Reshape(torch.nn.Module): #将图像大小重定型
    def forward(self, x):
        return x.view(-1,1,28,28)      #(B x C x H x W)

class digit_net(nn.Module):
    def __init__(self):
        super(digit_net,self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2), #b*1*28*28  =>b*6*28*28
            nn.Sigmoid(),                                                       
            nn.AvgPool2d(kernel_size=2, stride=2),                              #b*6*28*28  =>b*6*14*14
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),           #b*6*14*14  =>b*16*10*10
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2)                               #b*16*10*10  => b*16*5*5
            )   
        self.dense = nn.Sequential(                                                         #b*16*5*5   => b*400
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
            )
    def forward(self,x):
        return self.dense(self.conv(x).view(x.shape[0], -1))


In [35]:
def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None, y2_vals=None,
             legend=None, figsize=(3.5, 2.5)):
    plt.figure(figsize=figsize)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.semilogy(x_vals, y_vals)
    if x2_vals and y2_vals:
        plt.semilogy(x2_vals, y2_vals, linestyle=':')
        plt.legend(legend)

In [36]:
def evaluate_accuracy(data_iter, net,device=torch.device('cpu')):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = torch.tensor([0],dtype=torch.float32,device=device),0
    net.eval()
    for X,y in data_iter:
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y.squeeze()))  #[[0.2 ,0.4 ,0.5 ,0.6 ,0.8] ,[ 0.1,0.2 ,0.4 ,0.3 ,0.1]] => [ 4 , 2 ]
            n += y.shape[0]
    return acc_sum.item()/n

# 训练模型

In [68]:
def train(net, train_features,train_labels, test_features, test_labels, 
                  num_epochs,batch_size,lr,weight_decay,criterion ):
    optimizer = torch.optim.Adam(params = net.parameters(), lr = lr, weight_decay = weight_decay)
    train_dataset = torch.utils.data.TensorDataset(train_features,train_labels)
    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
    if test_features is not None:
        test_dataset = torch.utils.data.TensorDataset(test_features,test_labels)
        test_iter = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=True)
    train_acc_sum = torch.tensor([0.0],dtype=torch.float32)
    test_acc_sum = torch.tensor([0.0],dtype=torch.float32)
    for i in range(num_epochs):
        train_acc = torch.tensor([0.0],dtype=torch.float32)
        n = 0
        for x , y in train_iter:
            net.train()
            optimizer.zero_grad()
            y_hat = net(x)
            loss = criterion(y_hat, y.long())
            
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y = y.long()
                train_acc += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
                n += y.shape[0]
        if test_features is not None:
            test_acc = evaluate_accuracy(test_iter,net)
            test_acc_sum += test_acc
        train_acc_sum += train_acc/n
    return train_acc_sum/num_epochs,test_acc_sum/num_epochs

# K折

In [43]:
def get_k_fold(i,k,train_features, train_labels):
    fold_size = len(train_features)//k
    features_train, labels_train = None,None
    for j in range(k):
        idx = slice(j*fold_size, (j+1)*fold_size)
        features_part , labels_part = train_features[idx,:,:,:],train_labels[idx]
        if i == j:
            features_valid,labels_valid = features_part, labels_part
        elif features_train is  None:
            features_train, labels_train = features_part,labels_part
        else :
            features_train = torch.cat((features_train,features_part),dim =0 )
            labels_train = torch.cat((labels_train, labels_part),dim = 0)
    return features_train,labels_train, features_valid , labels_valid

In [41]:
def k_fold(k, train_features, train_labels, num_epochs, lr , weight_decay,batch_size,criterion):
    for i in range(k):
        data = get_k_fold(i,k,train_features, train_labels)
        train_l,test_l = train(net,*data,num_epochs,batch_size,lr,weight_decay,criterion)
        train_l_sum += train_l[-1]
        test_l_sum += test_l[-1]
        if i == 0:
            semilogy(range(1, num_epochs + 1), train_l, 'epochs', 'rmse',
                         range(1, num_epochs + 1), test_l,
                         ['train', 'valid'])
        print('fold %d, train rmse %f, valid rmse %f' % (i, train_l[-1], test_l[-1]))
    return train_l_sum/k, test_l_sum/k

In [69]:
k, num_epochs, lr, weight_decay, batch_size ,drop_prob = 10, 100, 0.01, 0, 64,0.6
net = digit_net()
criterion = nn.CrossEntropyLoss()
train_l, valid_l = k_fold(k, train_features, train_labels, num_epochs, lr , weight_decay,batch_size,criterion)
print('%d-fold validation: avg train rmse %f, avg valid rmse %f' % (k, train_l, valid_l))

KeyboardInterrupt: 