In [4]:
import numpy as np
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, transforms,utils, models
import matplotlib.pyplot as plt
import torch.nn.functional as F
from collections import OrderedDict

# transform = transforms.Compose(
#     [transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])   #for pretrained need normalization to imagenet input

data_transforms_train = transforms.Compose([transforms.RandomRotation([-30,30]),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


data_transforms_test_eval = transforms.Compose([transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

trainset=datasets.CIFAR10('CIFAR10/',train=True,transform=data_transforms_train,download=True)
train_loader=DataLoader(trainset,batch_size=64,shuffle=True) #use all three channels

testset=datasets.CIFAR10('CIFAR10/',train=False,transform=data_transforms_test_eval,download=True)
test_loader=DataLoader(testset,batch_size=64,shuffle=True)

arch='alexnet'
node_hidden1=4096
node_hidden2=1024
node_hidden3=512

Files already downloaded and verified
Files already downloaded and verified


In [6]:
if(arch=='alexnet'):
    model = models.alexnet(pretrained=True)
    input_size = 9216
    hidden_sizes = [node_hidden1,node_hidden2,node_hidden3]
    output_size = 10

    for param in model.parameters():
        param.requires_grad = False


    classifier = nn.Sequential(OrderedDict([
                              ('fc1', nn.Linear(input_size, hidden_sizes[0])),
                              ('relu1', nn.ReLU()),
                              ('dropout1',nn.Dropout()),
                              ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
                              ('relu2', nn.ReLU()),
                              ('dropout2',nn.Dropout()),
                              ('fc3', nn.Linear(hidden_sizes[1], hidden_sizes[2])),
                              ('relu3', nn.ReLU()),
                              ('dropout3',nn.Dropout()),        
                              ('fc4', nn.Linear(hidden_sizes[2], output_size))
#                              ('output', nn.LogSoftmax(dim=1))  will do cross entropy
                              ]))

    model.classifier = classifier

In [2]:
# class AlexNet_mod(nn.Module):

#     def __init__(self, num_classes=10):
#         super(AlexNet_mod, self).__init__()
#         self.features = nn.Sequential(
#             nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#             nn.Conv2d(64, 192, kernel_size=5, padding=2),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#             nn.Conv2d(192, 384, kernel_size=3, padding=1),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(384, 256, kernel_size=3, padding=1),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(256, 256, kernel_size=3, padding=1),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#         )
#         self.classifier = nn.Sequential(
#             nn.Dropout(),
#             nn.Linear(256 * 6 * 6, 4096),
#             nn.ReLU(inplace=True),
#             nn.Dropout(),
#             nn.Linear(4096, 4096),
#             nn.ReLU(inplace=True),
#             nn.Dropout(),
#             nn.Linear(4096, 1000),   # 1000 removed
#             nn.ReLU(inplace=True),
#             nn.Dropout(),
#             nn.Linear(1000, 10),
#         )

#     def forward(self, x):
#         x = self.features(x)
#         x = x.view(x.size(0), 256 * 6 * 6)
#         x = self.classifier(x)
#         return x

In [8]:
epochs=10
criterion=torch.nn.CrossEntropyLoss()  #if increase the lr to 0.1, it kicks to local min and stays
optimizer=torch.optim.SGD(model.classifier.parameters(),lr=0.01,momentum=0.9)  #had to change from SGD to Adam but later noy much difference
lr_scheduler_=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
#lr_scheduler_=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=epochs)

In [9]:
def train_function(train_loader):
    loss_running=0
    count=0
    count_batch=0
    sum_acc=0

    for epoch in range(epochs):

        for image,label in iter(train_loader):

            #input.resize_(input.size()[0], 784)   take to forward
            y_pred=model(image)   #this is 64 (bacth_size)*10

            if(count==0): print(y_pred.size(),label.size())
            loss=criterion(y_pred,label)    #criterion(y_pred,label), crossentropy criterion need long (output of forward) and normal tensor (target)
            loss_running=loss_running+loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            count=count+1
            #y_pred_round=torch.round(y_pred)
            count_batch=count_batch+(label.size()[0])
            _,y_pred_=torch.max(y_pred,dim=1)    #argmax is the second value returned by torch.max()  ,this collapse dimension to batch size with argmax of probabililty/value (second) item, first one is the value itself 

            acc=(label==y_pred_).sum().item()    #/label.size()[0]
            sum_acc=sum_acc+acc
            if(count%200==0): 
                print('epoch',epoch+1,'/',epochs,'train running_loss:',(loss_running/count_batch),'Accuracy',(sum_acc*100/count_batch))
                print('-----')
                test_function(test_loader)
                print('-----')
                model.train()

        check_loss=(loss_running/count_batch)
        lr_scheduler_.step(check_loss)



In [10]:
def test_function(test_loader):
    loss_running=0
    count=0
    count_batch=0
    sum_acc=0

    for input,label in iter(test_loader):
        model.eval()

        #input.resize_(input.size()[0], 784)   take to forward
        y_pred=model(input)   #this is 64 (bacth_size)*10

        #if(count==0): print(y_pred.size(),label.size())
        loss=criterion(y_pred,label)    #criterion(y_pred,label), crossentropy criterion need long (output of forward) and normal tensor (target)
        loss_running=loss_running+loss.item()
        count=count+1

        count_batch=count_batch+(label.size()[0])
        _,y_pred_=torch.max(y_pred,dim=1)    #argmax is the second value returned by torch.max()  ,this collapse dimension to batch size with argmax of probabililty/value (second) item, first one is the value itself 

        acc=(label==y_pred_).sum().item()    #/label.size()[0]
        sum_acc=sum_acc+acc
        if(count%100==0): print('test running_loss:',(loss_running/count_batch),'Accuracy',(sum_acc*100/count_batch))




#test_function(test_loader)   # w/o convnet :result show over fitting on train 64%, but overall 50% on test is not too bad wihtout convnet
#w/ convnet just after 5 epoch training the test got to 63% (50% train)
#after 10 epoch test get to 66% pretty good without big networks

#CiFAR10 with inception better in training improve to 64% but not much better for test at 64% (may be need derop out and more fc)

#after more fc on top of inception got to 70% test, 

In [None]:
train_function(train_loader)

torch.Size([64, 10]) torch.Size([64])
epoch 1 / 10 train running_loss: 0.030190545208752154 Accuracy 26.3203125
-----
test running_loss: 0.021555963940918445 Accuracy 49.59375
-----
epoch 1 / 10 train running_loss: 0.02767259328626096 Accuracy 32.546875
-----
test running_loss: 0.01775102874264121 Accuracy 56.796875
-----
epoch 1 / 10 train running_loss: 0.02631572445233663 Accuracy 36.265625
-----
test running_loss: 0.01614627324976027 Accuracy 63.109375
-----
epoch 2 / 10 train running_loss: 0.025613909860514164 Accuracy 38.48334375977479
-----
test running_loss: 0.01534952225163579 Accuracy 65.671875
-----
epoch 2 / 10 train running_loss: 0.02507131890278922 Accuracy 40.10976982737053
-----
test running_loss: 0.014511408153921365 Accuracy 67.59375
-----
epoch 2 / 10 train running_loss: 0.024632929191567985 Accuracy 41.514227642276424
-----
test running_loss: 0.014002577662467956 Accuracy 68.6875
-----
epoch 2 / 10 train running_loss: 0.024262389510587772 Accuracy 42.56521350723602
-