In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import numpy as np

In [None]:
modelPath = './model.pkl'
batchSize = 64
nEpochs = 2
numPrint = 1000

# cuda
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

trainset = torchvision.datasets.MNIST(
    root='./MNIST', train=True, download=True, 
    transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=batchSize, shuffle=True)
    
testset = torchvision.datasets.MNIST(
    root='./MNIST', train=False, download=True, 
    transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(
    testset, batch_size=batchSize, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
class Block(nn.Module):
  def __init__(self,in_channels,out_channels,stride=1):
    super(Block,self).__init__()
    self.plain=nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_channels)
    )
    #判斷strides是否等於1，輸出入channel是否相等，否就做downsample使用1x1convolution(stride=2=>3x3=>1x1)
    if stride!=1 or in_channels!=out_channels: 
      self.shortcut=nn.Sequential(
          nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),
          nn.BatchNorm2d(out_channels)
      )
    self.short_cut=(stride!=1) or (in_channels!=out_channels)
  def forward(self,inputs):
    x=self.plain(inputs)
    if not self.short_cut:
      shortcut=inputs
    else:
      shortcut=self.shortcut(inputs)
    # print(x.shape)
    # print(shortcut.shape)
    return x+shortcut

class ResNet(nn.Module):
  def __init__(self):
    super(ResNet, self).__init__()
    self.model=nn.Sequential(
        Block(1,64),
        Block(64,64),
        Block(64,256,2),

        Block(256,128),
        Block(128,128),
        Block(128,512,2),

        Block(512,256),
        Block(256,256),
        Block(256,1024,2),

        Block(1024,512),
        Block(512,512),
        Block(512,2048,2),

        nn.AdaptiveAvgPool2d((1,1)),
        nn.Flatten(),
        nn.Linear(2048,2048),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(2048,2048),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(2048, 10)
    )
  def forward(self,inputs):
    # x=self.block1(inputs)
    y=self.model(inputs)
    return y

In [None]:
net = ResNet().to(device)

def train():

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    for epoch in range(nEpochs): 
        running_loss = 0.0
        num = 0
        for i, data in enumerate(trainloader):

            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)  
            inputs = inputs.view(-1,1, 28, 28).to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels).to(device)
            loss.backward()   # 梯度計算
            optimizer.step()  # 參數更新

            running_loss += loss.item()
            num += labels.size()[0]

        print('epoch: %d\t loss: %.6f' % (epoch + 1, running_loss / num))
        Accuracy()

        # 保存模型
        # torch.save(net, './model.pkl')
def Accuracy():
    correct = 0
    total = 0
    with torch.no_grad():  # 用在test"不"需要反向傳播更新參數
        for data in testloader:
          images, labels = data
          images, labels = images.to(device), labels.to(device) 
          outputs = net(images)
          _, predicted = torch.max(outputs.data, 1)  
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
    print('Test Accuracy: %f' % (correct / total))
    return correct / total
if __name__ == '__main__':
    train()
    print('Training Finished')
    


epoch: 1	 loss: 0.003346
Test Accuracy: 0.988300
epoch: 2	 loss: 0.000444
Test Accuracy: 0.992200
Training Finished
