In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from torchvision.transforms import transforms
from torch.autograd import Variable
from torchvision.models import inception_v3
from torch.utils.data import DataLoader
import numpy as np
import os

In [2]:
class ConvUnit(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(ConvUnit, self).__init__()
        
        self.conv = nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=3, stride=1, padding=1)
        self.bn = nn.BatchNorm2d(num_features=output_channels)
        self.relu = nn.ReLU()

    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)
        
        return output

class SimpleNet(nn.Module):
    
    def __init__(self, num_classes=25):
        super(SimpleNet,self).__init__()
        
        self.layer1 = ConvUnit(input_channels=3,output_channels=32)
        self.layer2 = ConvUnit(input_channels=32,output_channels=32)
        self.layer3 = ConvUnit(input_channels=32,output_channels=32)
        
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        self.layer4 = ConvUnit(input_channels=32,output_channels=64)
        self.layer5 = ConvUnit(input_channels=64,output_channels=64)
        self.layer6 = ConvUnit(input_channels=64,output_channels=64)
        
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        
        self.layer7 = ConvUnit(input_channels=64,output_channels=128)
        self.layer8 = ConvUnit(input_channels=128,output_channels=128)
        self.layer9 = ConvUnit(input_channels=128,output_channels=128)
        
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        
        self.layer10 = ConvUnit(input_channels=128,output_channels=128)
        self.layer11 = ConvUnit(input_channels=128,output_channels=128)
        self.layer12 = ConvUnit(input_channels=128,output_channels=128)
        
        self.pool4 = nn.AvgPool2d(kernel_size=4)
        
        self.net = nn.Sequential(self.layer1, self.layer2, self.layer3, self.pool1, self.layer4, self.layer5, self.layer6, self.pool2, \
                                self.layer7, self.layer8, self.layer9, self.pool3, self.layer10, self.layer11, self.layer12, self.pool4)
        
        self.fc = nn.Linear(in_features=6272,out_features=4096)
        self.fc2 = nn.Linear(in_features=4096,out_features=25)        
        
    def forward(self, input):
        output = self.net(input)
        output = output.view(-1, 7 * 7 * 128)
        output = self.fc(output)
        output = self.fc2(output)
        
        return output

In [2]:
data_dir = '/home/jeet/WEBEmo/'

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [3]:
BATCH_SIZE = 64

dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
         for x in ['train', 'test']}
# print (dsets)

# dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=16)
#                 for x in ['train', 'test']}

# dset_sizes = {x: len(dsets[x]) for x in ['train', 'test']}

# dset_classes = dsets['train'].classes

In [5]:
dsets

{'train': Dataset ImageFolder
     Number of datapoints: 213952
     Root Location: /home/jeet/WEBEmo/train
     Transforms (if any): Compose(
                              RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                              RandomHorizontalFlip(p=0.5)
                              ToTensor()
                              Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                          )
     Target Transforms (if any): None, 'test': Dataset ImageFolder
     Number of datapoints: 53489
     Root Location: /home/jeet/WEBEmo/test
     Transforms (if any): Compose(
                              Resize(size=224, interpolation=PIL.Image.BILINEAR)
                              CenterCrop(size=(224, 224))
                              ToTensor()
                              Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                          )
     Target Tr

In [6]:
CNN = SimpleNet(num_classes=25)

In [7]:
CNN.cuda()

SimpleNet(
  (layer1): ConvUnit(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (layer2): ConvUnit(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (layer3): ConvUnit(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (layer4): ConvUnit(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (layer5): ConvUnit(
    (conv): Conv2d(64, 64, ker

In [8]:
from torch.optim import Adam

optimizer = Adam(CNN.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()

In [9]:
def test():
    CNN.eval()
    test_acc = 0.0
    for i, (images, labels) in enumerate(dset_loaders['test']):

        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        # Predict classes using images from the test set
        outputs = CNN(images)
        _, prediction = torch.max(outputs.data, 1)
        
        test_acc += torch.sum(prediction == labels.data)

    # Compute the average acc and loss over all test images
    test_acc = test_acc / (len(dset_loaders['test'])* BATCH_SIZE)

    return test_acc

In [10]:
def train(num_epochs):
    
    for epoch in range(num_epochs):
        CNN.train()
        train_acc = 0.0
        train_loss = 0.0
        best_acc = 0.0
        
        for i, (images, label) in enumerate(dset_loaders['train']):
            
            images = Variable(images.cuda())
            label = Variable(label.cuda())
            
            optimizer.zero_grad()
            
            output = CNN(images)
            
#             print (output)
#             print (label)
            loss = loss_fn(output, label)
            loss.backward()
            
            optimizer.step()
            
            train_loss += loss.item() * images.size(0)
            _, prediction = torch.max(output.data, 1)
            
#             print (label.data, prediction)
            train_acc += torch.sum(prediction == label.data)
#             print ("Accurately classified samples: ", torch.sum(prediction == label.data))

        # Compute the average acc and loss over all training images
        print("Total Accurately classified images at the end of epoch is {}".format(train_acc))
        train_acc = train_acc * 1.0 / (len(dset_loaders['train']) * BATCH_SIZE)
        train_loss = train_loss * 1.0 / (len(dset_loaders['train']) * BATCH_SIZE)

        print("Epoch {}, Train Accuracy: {:.5f} , TrainLoss: {}".format(epoch, train_acc, train_loss))
        
        # Evaluate on the test set
#         test_acc = test()

#         # Save the model if the test acc is greater than our current best
#         if test_acc > best_acc:
#             torch.save(CNN.state_dict(), "CNNmodel_{}.model".format(epoch))
#             print("Chekcpoint saved")
#             best_acc = test_acc

#         # Print the metrics
#         print("Test Accuracy: {}".format(test_acc))
                                                                                        

In [11]:
train(5)  

Total Accurately classified images at the end of epoch is 26343
Epoch 0, Train Accuracy: 0.00000 , TrainLoss: 2.999215295562265
Total Accurately classified images at the end of epoch is 27575
Epoch 1, Train Accuracy: 0.00000 , TrainLoss: 2.958603054258305
Total Accurately classified images at the end of epoch is 28077
Epoch 2, Train Accuracy: 0.00000 , TrainLoss: 2.938056596481547
Total Accurately classified images at the end of epoch is 28825
Epoch 3, Train Accuracy: 0.00000 , TrainLoss: 2.9223748377401457
Total Accurately classified images at the end of epoch is 29375
Epoch 4, Train Accuracy: 0.00000 , TrainLoss: 2.9124045687951337
