In [12]:
import torch
import torch.utils.data
import numpy as np
from torch import nn, optim
from torch.autograd import Variable
import torch.nn.functional as F
from torchvision import datasets, transforms


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
train = pd.read_csv("C:/Users/dell/Desktop/mnist/train.csv").values
test  = pd.read_csv("C:/Users/dell/Desktop/mnist/test.csv").values

In [14]:
train.shape

(42000, 785)

In [15]:
X_data  = train[:, 1:].reshape(train.shape[0], 1, 28, 28) # 从第一列开始读取, 并reshape成(4200,1,28,28)
X_data  = X_data.astype(float)
X_data /= 255.0                                           # 归一化
X_data  = torch.from_numpy(X_data);                       # 转换成tensor类型

In [25]:
X_label = train[:,0];                                     # label在第零列, 所以读取第零列
X_label = X_label.astype(int);
X_label = torch.from_numpy(X_label);
X_label = X_label.view(train.shape[0],-1);               # 从[42000]变为 [42000,1] 

In [26]:
print (X_data.size(), X_label.size())

torch.Size([42000, 1, 28, 28]) torch.Size([42000, 1])


In [27]:
class MnistNet(nn.Module):
    def __init__(self):
        super(MnistNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(5,5))
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(5,5))
        self.bn2 = nn.BatchNorm2d(32)
        
        self.conv2_drop = nn.Dropout2d(p = 0.2)
        
        self.fc1 = nn.Linear(128, 100)
        self.fc2 = nn.Linear(100, 10)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3,3))
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=(3,3))
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=(3,3))
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=(1,1))
        self.bn6 = nn.BatchNorm2d(128)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        ###
        x = self.bn2(x)
        x = F.max_pool2d(x,2)
        x = self.conv2_drop(x)
        ###
        # x = self.conv2_drop(F.max_pool2d(self.bn2(x),2))
        
        x = F.relu(self.conv3(x))
        x = self.bn3(x)
        x = F.relu(self.conv4(x))
        x = self.bn4(x)
        x = F.max_pool2d(x,2)
        x = self.conv2_drop(x)
        x = F.relu(self.conv5(x))
        x = self.bn5(x)
        x = F.relu(self.conv6(x))
        x = self.bn6(x)
        size =  x.size()[1]*x.size()[2]*x.size()[3]
        #print(size)
        x = x.view(-1, size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

pass

net = MnistNet()

print(net)

use_gpu = torch.cuda.is_available()
if use_gpu:
	net = net.cuda()
	print ('USE GPU')
else:
	print ('USE CPU')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.3, momentum = 0.1)

MnistNet(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_drop): Dropout2d(p=0.2)
  (fc1): Linear(in_features=128, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
  (bn6): BatchNorm2d

In [64]:
print ("3. Training phase")
nb_train = train.shape[0]
nb_epoch = 5
nb_index = 0
nb_batch = 250

3. Training phase


In [67]:
nb_train

42000

In [132]:
for epoch in range(nb_epoch):
    if nb_index + nb_batch >= nb_train:          # 创造读取的mini_batch的index, 当大于整个数据集的size时(42000), index归零
        nb_index = 0
    else:
        nb_index += nb_batch                     # 这里不应该时epoch, 而是iteration, 因为一次循环只训练一个mini_batch

    mini_data  = Variable(X_data[nb_index:(nb_index+nb_batch)].clone())     # 读取一个minibatch size的data
    mini_label = Variable(X_label[nb_index:(nb_index+nb_batch)].clone(), requires_grad = False)
    mini_data  = mini_data.type(torch.FloatTensor)                          # 需要进行类型的转换, 不然计算loss时候会出错
    mini_label = mini_label.type(torch.LongTensor)
    if use_gpu:
        mini_data  = mini_data.cuda()
        mini_label = mini_label.cuda()
    optimizer.zero_grad()
    mini_out   = net(mini_data)
    mini_label = mini_label.view(nb_batch)
    mini_loss  = criterion(mini_out, mini_label)
    mini_loss.backward()
    optimizer.step() 

    if (epoch + 1) % 5 == 0:
        print("Epoch = %d, Loss = %f" %(epoch+1, mini_loss.item()))

Epoch = 5, Loss = 0.065264


In [160]:
test  = pd.read_csv("C:/Users/dell/Desktop/mnist/test.csv").values

In [166]:
test_chamge = test[0:100][:]

In [167]:
test_chamge.shape

(100, 784)

In [168]:
print ("4. Testing phase")

Y_data  = test_chamge.reshape(test_chamge.shape[0], 1, 28, 28)
Y_data  = Y_data.astype(float)
Y_data /= 255.0
Y_data  = torch.from_numpy(Y_data)
print (Y_data.size())
nb_test = test_chamge.shape[0]

net.eval()

final_prediction = np.ndarray(shape = (nb_test, 2), dtype=int)

4. Testing phase
torch.Size([100, 1, 28, 28])


In [171]:
for each_sample in range(nb_test):
	sample_data = Variable(Y_data[each_sample:each_sample+1].clone())
	sample_data = sample_data.type(torch.FloatTensor)
	if use_gpu:
		sample_data = sample_data.cuda()
	sample_out = net(sample_data)
	pred = torch.max(sample_out, 1)                         # 找出out里面的最大值, 以及其index 
	final_prediction[each_sample][0] = 1 + each_sample      # 最终结果的第0列是index
	final_prediction[each_sample][1] = pred[1][0]           # 第1列是预测结果, out里面的最大值的index 
	if (each_sample + 1) % 10 == 0:
		print("Total tested = %d" %(each_sample + 1))

Total tested = 10
Total tested = 20
Total tested = 30
Total tested = 40
Total tested = 50
Total tested = 60
Total tested = 70
Total tested = 80
Total tested = 90
Total tested = 100


In [172]:
sample_out

tensor([[-2.0503, -2.0340, -0.9220, -1.7652,  7.7914,  0.3642,  0.9878, -1.6955,
         -0.6362,  0.3033]], device='cuda:0', grad_fn=<ThAddmmBackward>)

In [183]:
pred

(tensor([7.7914], device='cuda:0', grad_fn=<MaxBackward0>),
 tensor([4], device='cuda:0'))

In [None]:
print ('5. Generating submission file')

submission = pd.DataFrame(final_prediction, dtype=int, columns=['ImageId', 'Label'])
submission.to_csv('C:/Users/dell/Desktop/mnist/submission.csv', index=False, header=True)

In [184]:
final_prediction

array([[  1,   2],
       [  2,   0],
       [  3,   9],
       [  4,   9],
       [  5,   3],
       [  6,   7],
       [  7,   0],
       [  8,   3],
       [  9,   0],
       [ 10,   3],
       [ 11,   5],
       [ 12,   7],
       [ 13,   4],
       [ 14,   0],
       [ 15,   4],
       [ 16,   3],
       [ 17,   3],
       [ 18,   1],
       [ 19,   9],
       [ 20,   0],
       [ 21,   9],
       [ 22,   1],
       [ 23,   1],
       [ 24,   5],
       [ 25,   7],
       [ 26,   4],
       [ 27,   2],
       [ 28,   7],
       [ 29,   4],
       [ 30,   7],
       [ 31,   7],
       [ 32,   5],
       [ 33,   4],
       [ 34,   2],
       [ 35,   6],
       [ 36,   2],
       [ 37,   5],
       [ 38,   5],
       [ 39,   1],
       [ 40,   6],
       [ 41,   7],
       [ 42,   7],
       [ 43,   4],
       [ 44,   9],
       [ 45,   8],
       [ 46,   7],
       [ 47,   8],
       [ 48,   2],
       [ 49,   6],
       [ 50,   7],
       [ 51,   6],
       [ 52,   8],
       [ 53,

In [185]:
final_prediction.shape

(100, 2)