In [0]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as trans
import torchvision.datasets as dsets
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time

%matplotlib inline

In [4]:
conv = nn.Conv2d(3,10,3,padding=1)
x = torch.rand(10,3,28,28)
for name,param in conv.named_parameters():
  print(name,param.shape)

weight torch.Size([10, 3, 3, 3])
bias torch.Size([10])


In [0]:
class LeNet5(nn.Module):
  def __init__(self):
    super(LeNet5,self).__init__()
    self.conv1 = nn.Conv2d(1,10,5,padding=2)
    self.pooling1 = nn.AvgPool2d(2)
    self.conv2 = nn.Conv2d(10,20,kernel_size=5,padding=2)
    self.pooling2 = nn.AvgPool2d(2)
    self.fc1 = nn.Linear(980,500)
    self.fc2 = nn.Linear(500,10)

  def forward(self,x):
    o = self.conv1(x)
    o = self.pooling1(o)
    o = F.relu(o)
    o = self.conv2(o)
    o = self.pooling2(o)
    o = F.relu(o)
    o = o.view(x.size(0),-1)
    o = F.relu(self.fc1(o))
    o = self.fc2(o)
    return o


In [28]:
model = LeNet5()
print(model)

LeNet5(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pooling1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pooling2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc1): Linear(in_features=980, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)


In [18]:
x = torch.rand(100,1,28,28)
y = model(x)
print(y.shape)

torch.Size([100, 10])


In [0]:
trainset = dsets.MNIST(root='../data/mnist',train=True,transform=trans.ToTensor(),download=True)
testset = dsets.MNIST(root='../data/mnist',train=False,transform=trans.ToTensor(),download=True)
train_loader = DataLoader(trainset,batch_size=128,shuffle=True,num_workers=4)
test_loader = DataLoader(testset,batch_size=128,shuffle=False,num_workers=4)


In [0]:
model = LeNet5()
lr = 1e-2
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[10,20],gamma=0.5)


In [0]:
def eval(model,criterion,dataloader):
  loss,accuracy = 0,0
  for bx,by in dataloader:
    y = model(bx)
    error = criterion(y,by)
    error.backward()
    optimizer.step()
    loss += error.item()
    _,pred_y = y.max(dim=1)
    acc = (pred_y.data==by).float().sum()/bx.size(0)
    accuracy += acc

  accuracy /= len(dataloader)
  loss /= len(dataloader)
  return loss,accuracy

In [38]:
nepoch = 25
criterion = nn.CrossEntropyLoss()
for epoch in range(nepoch):
  loss,acc = 0,0
  for bx,by in train_loader:
    y = model(bx)
    err = criterion(y,by)
    err.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss += err.item()
    _,pred_y = y.max(dim=1)
    acc += (pred_y.data==by).float().sum()/bx.size(0)

  loss /= len(train_loader)
  acc /= len(train_loader)
  scheduler.step()
  print("epoch:%d,loss=%.5f,acc=%.3f"%(epoch+1,loss,acc*100))

print("********model eval**********")
loss,acc = eval(model,criterion,test_loader)
print("loss=%.5f,acc=%.3f"%(loss,acc*100))

epoch:1,loss=0.18376,acc=94.158
epoch:2,loss=0.06696,acc=97.959
epoch:3,loss=0.05323,acc=98.407
epoch:4,loss=0.04870,acc=98.511
epoch:5,loss=0.04620,acc=98.599
epoch:6,loss=0.04642,acc=98.672
epoch:7,loss=0.04533,acc=98.735
epoch:8,loss=0.03679,acc=98.929
epoch:9,loss=0.03914,acc=98.923
epoch:10,loss=0.03687,acc=98.974
epoch:11,loss=0.01244,acc=99.611
epoch:12,loss=0.00919,acc=99.730
epoch:13,loss=0.00595,acc=99.826
epoch:14,loss=0.01088,acc=99.701
epoch:15,loss=0.00935,acc=99.751
epoch:16,loss=0.01408,acc=99.663
epoch:17,loss=0.01025,acc=99.723
epoch:18,loss=0.00655,acc=99.817
epoch:19,loss=0.01217,acc=99.723
epoch:20,loss=0.01270,acc=99.710
epoch:21,loss=0.00298,acc=99.910
epoch:22,loss=0.00071,acc=99.975
epoch:23,loss=0.00006,acc=100.000
epoch:24,loss=0.00002,acc=100.000
epoch:25,loss=0.00002,acc=100.000
********model eval**********
loss=3.76279,acc=82.605
