In [1]:
from __future__ import print_function
import cv2
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import ToPILImage
show=ToPILImage()
import numpy as np
import matplotlib.pyplot as plt

In [3]:
print(torch.__version__)
import sys
print(sys.version)

1.0.1.post2
3.7.2 (default, Dec 29 2018, 00:00:04) 
[Clang 4.0.1 (tags/RELEASE_401/final)]


In [3]:
#set batch size
batchSize=16

#load data
transform = transforms.Compose([transforms.Resize(224),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])

trainset = torchvision.datasets.FashionMNIST(root='../input/mnist/data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize, shuffle=True, num_workers=0)

testset = torchvision.datasets.FashionMNIST(root='../input/mnist/data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchSize, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [9]:

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=96,kernel_size=11,stride=4)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2)
        self.conv2 = nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,padding=1)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.dense1 = nn.Linear(256*5*5,4096)
        self.drop1 = nn.Dropout(0.5)
        self.dense2 = nn.Linear(4096,4096)
        self.drop2 = nn.Dropout(0.5)
        self.dense3 = nn.Linear(4096,10)

    def forward(self,x):
        x=self.pool1(F.relu(self.conv1(x)))
        x=self.pool2(F.relu(self.conv2(x)))
        x=self.pool3(F.relu(self.conv5(F.relu(self.conv4(F.relu(self.conv3(x)))))))
        x=x.view(-1,256*5*5)
        x=self.dense3(self.drop2(F.relu(self.dense2(self.drop1(F.relu(self.dense1(x)))))))
        return x

net=AlexNet()
print (net)
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.01,momentum=0.9)


AlexNet(
  (conv1): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dense1): Linear(in_features=6400, out_features=4096, bias=True)
  (drop1): Dropout(p=0.5)
  (dense2): Linear(in_features=4096, out_features=4096, bias=True)
  (drop2): Dropout(p=0.5)
  (dense3): Linear(in_features=4096, out_features=10, bias=True)
)


In [10]:

#train
print ("training")

#we train 3 rounds
for epoch in range(3):
    #record time
    start = time.time()
    running_loss=0
    
    for i,data in enumerate(trainloader,0):
        # print (inputs,labels)
        image,label=data
        #Variable is format of PyTorch
        image=Variable(image)
        label=Variable(label)
        
        #before every echo, clean all the grad
        optimizer.zero_grad()

        # print (image.shape)
        outputs=net(image)
        # print (outputs)
        loss=criterion(outputs,label)

        #backward
        loss.backward()
        optimizer.step()

        running_loss+=loss.data

        if i%100==99:
            end=time.time()
            print ('[epoch %d,imgs %5d] loss: %.7f  time: %0.3f s'%(epoch+1,(i+1)*16,running_loss/100,(end-start)))
            start=time.time()
            running_loss=0
print ("finish training")



training begin
[epoch 1,imgs  1600] loss: 2.2847359  time: 141.918 s
[epoch 1,imgs  3200] loss: 1.6495736  time: 146.801 s
[epoch 1,imgs  4800] loss: 1.0913321  time: 144.900 s
[epoch 1,imgs  6400] loss: 0.9178799  time: 143.618 s
[epoch 1,imgs  8000] loss: 0.7255520  time: 141.535 s
[epoch 1,imgs  9600] loss: 0.6970863  time: 141.132 s
[epoch 1,imgs 11200] loss: 0.6619957  time: 141.106 s
[epoch 1,imgs 12800] loss: 0.6470241  time: 141.109 s
[epoch 1,imgs 14400] loss: 0.6290392  time: 142.699 s
[epoch 1,imgs 16000] loss: 0.5692280  time: 155.037 s
[epoch 1,imgs 17600] loss: 0.5250652  time: 166.834 s
[epoch 1,imgs 19200] loss: 0.5419562  time: 153.761 s
[epoch 1,imgs 20800] loss: 0.4948173  time: 167.480 s
[epoch 1,imgs 22400] loss: 0.4833591  time: 161.161 s
[epoch 1,imgs 24000] loss: 0.5471354  time: 144.843 s
[epoch 1,imgs 25600] loss: 0.4870166  time: 144.650 s
[epoch 1,imgs 27200] loss: 0.4130421  time: 144.474 s
[epoch 1,imgs 28800] loss: 0.4299891  time: 144.697 s
[epoch 1,imgs

In [11]:

#test
net.eval()
correct=0
total=0
for data in testloader:
    images,labels=data
    #get the output
    outputs=net(Variable(images))
    _,predicted=torch.max(outputs,1)
    total+=labels.size(0)
    correct+=(predicted==labels).sum()
print('Accuracy of the network on the %d test images: %d %%' % (total , 100 * correct / total))

Accuracy of the network on the 10000 test images: 87 %
