In [1]:
import numpy as np
import torch
import torchvision
import os
import math
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader, Dataset
from math import ceil
import torch.nn as nn
import torch.optim as optim
import time
import torch.nn.functional as F
import csv
from torch.optim import lr_scheduler

In [2]:
ROOT_DIR = './Assignments/Assignment 1/ift6135h19'

BATCH_SIZE = 32

In [3]:
class ImageFolderWithPaths(datasets.ImageFolder):
    # Extends torchvision.datasets.ImageFolder

    # override __getitem__ 
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_and_path = (original_tuple + (path,))
        return tuple_and_path

In [4]:
# data_loader

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(5),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# print('Device: {}'.format(device))


def data_loader(train_to_valid_ratio=0.8, 
                root_dir=ROOT_DIR, 
                batch_size=BATCH_SIZE):

    train_valid_data = ImageFolderWithPaths(
        os.path.join(root_dir, 'trainset'), data_transforms['train'])
    
    test_data = ImageFolderWithPaths(os.path.join(
        root_dir, 'testset'), data_transforms['test'])
    
    print('class_to_idx: {}'.format(train_valid_data.class_to_idx))
    class_to_idx = train_valid_data.class_to_idx
    idx_to_class = {v: k for k, v in class_to_idx.items()}
    
    print('Size of train_valid_data[0]: {}\n'.format(len(train_valid_data[0][0].shape)))
    
    train_valid_data_size = len(train_valid_data)
    train_valid_indices = list(range(train_valid_data_size))
    split = int(np.ceil(train_to_valid_ratio * train_valid_data_size))
#     print('split = {}'.format(split))
    
#     print('Image size = {}, Label = {}\n'.format(train_valid_data[0][0].shape, train_valid_data[0][1]))

    # shuffle the indices
    np.random.shuffle(train_valid_indices)
    train_indices, valid_indices = train_valid_indices[:split], train_valid_indices[split:]
    
    print('len(train_indices): {}'.format(len(train_indices)))
    print('len(valid_indices): {}'.format(len(valid_indices)))
    
    print('Size of test data: {}'.format(len(test_data)))

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(valid_indices)

    train_loader = DataLoader(train_valid_data, batch_size=batch_size, sampler=train_sampler)
    valid_loader = DataLoader(train_valid_data, batch_size=batch_size, sampler=valid_sampler)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    
    return (train_loader, valid_loader, test_loader, 
            class_to_idx, idx_to_class)


Device: cuda


In [5]:
train_loader, valid_loader, test_loader, class_to_idx, idx_to_class = data_loader()

class_to_idx: {'Cat': 0, 'Dog': 1}
Size of train_valid_data[0]: 3

split = 15999
Image size = torch.Size([3, 64, 64]), Label = 0

len(train_indices): 15999
len(valid_indices): 3999
Size of test data: 4999


## VGGNet inspired model

In [7]:
class ConvNet(nn.Module):

    def __init__(self):
        super(ConvNet, self).__init__()
        self.convolutional_layers = nn.Sequential(
            # 1
            nn.Conv2d(in_channels=3, out_channels=18, kernel_size=(3, 3), padding=1),
            nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2),

            # 2
            nn.Conv2d(in_channels=18, out_channels=36, kernel_size=(3, 3), padding=1),
            nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2),

            # 3
            nn.Conv2d(in_channels=36, out_channels=72, kernel_size=(3, 3), padding=1),
            nn.ReLU(),

            # 4
            nn.Conv2d(in_channels=72, out_channels=72, kernel_size=(3, 3), padding=1),
            nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2),

            # 5
            nn.Conv2d(in_channels=72, out_channels=144, kernel_size=(3, 3), padding=1),
            nn.ReLU(),

            # 6
            nn.Conv2d(in_channels=144, out_channels=144, kernel_size=(3, 3), padding=1),
            nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2),
        )

        # FC layers
        self.fc_layers = nn.Sequential(
            nn.Linear(144 * 4 * 4, 144 * 4 * 4), nn.ReLU(),
            nn.Linear(144 * 4 * 4, 500), nn.ReLU(),
            nn.Linear(500, 2)
        )

    # forward pass
    def forward(self, x):
        x = self.convolutional_layers(x)
        x = x.view(-1, 144*4*4)
        x = self.fc_layers(x)
        return x


## Attempt 1

In [72]:
epoch_from = 1
decoder_attempt_1 = ConvNet()
decoder_attempt_1.cuda()

optimizer = torch.optim.SGD(decoder_attempt_1.parameters(), 
                            lr=0.01,
                            momentum=0.9, 
                            weight_decay=5e-4)


# optimizer = optim.Adam(decoder.parameters(), lr=0.01)

scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

criterion = nn.CrossEntropyLoss()

start_time = time.time()

best_loss = math.inf
best_accuracy = 0.0

n_epochs = 10

for epoch in range(n_epochs):
    print('\n', '-'*10, ' Epoch: {} '.format(epoch), '-'*10)
#     scheduler.step()
    decoder_attempt_1.train() # set training mode
    
    train_dataset_size = 0
    correct_train_preds = 0
    running_train_loss = 0.0
    
    # training
    for inputs, labels, ids in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # forward prop
        outputs = decoder_attempt_1(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        # backprop
        loss.backward()
        optimizer.step()
        
        # metrics
        running_train_loss += loss.item() * inputs.size(0)
        correct_train_preds += torch.sum(preds == labels.data).item()
        train_dataset_size += len(labels)
        
        
    epoch_train_loss = running_train_loss / train_dataset_size
    epoch_train_accuracy = (correct_train_preds*1.0) / train_dataset_size
    
    print('Training Loss: {}, Accuracy: {}'.format(epoch_train_loss, epoch_train_accuracy))
    
    
    # validation
    
    decoder_attempt_1.eval() # set eval mode
    
    valid_dataset_size = 0
    correct_valid_preds = 0
    running_valid_loss = 0.0
    
    for inputs, labels, ids in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = decoder_attempt_1(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        running_valid_loss += loss.item() * inputs.size(0)
        correct_valid_preds += torch.sum(preds == labels.data).item()
        valid_dataset_size += len(labels)

    epoch_valid_loss = running_valid_loss / valid_dataset_size
    epoch_valid_accuracy = (correct_valid_preds*1.0) / valid_dataset_size
    
    print('Validation Loss: {}, Accuracy: {}'.format(epoch_valid_loss, epoch_valid_accuracy))
        
        
time_elapsed = time.time() - start_time

print('Training completed in {}minutes {}secs'.format(time_elapsed // 60, time_elapsed % 60))

## 2nd attempt

In [75]:
epoch_from = 1
decoder_attempt_2 = ConvNet()
decoder_attempt_2.cuda()

optimizer = torch.optim.SGD(decoder_attempt_2.parameters(), 
                            lr=0.01,
                            momentum=0.9, 
                            weight_decay=5e-4)


# optimizer = optim.Adam(decoder.parameters(), lr=0.01)

# scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

criterion = nn.CrossEntropyLoss()

start_time = time.time()

best_loss = math.inf
best_accuracy = 0.0

n_epochs = 50

for epoch in range(n_epochs):
    print('\n', '-'*10, ' Epoch: {} '.format(epoch), '-'*10)
#     scheduler.step()
    decoder_attempt_2.train() # set training mode
    
    train_dataset_size = 0
    correct_train_preds = 0
    running_train_loss = 0.0
    
    # training
    for inputs, labels, ids in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # forward prop
        outputs = decoder_attempt_2(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        # backprop
        loss.backward()
        optimizer.step()
        
        # metrics
        running_train_loss += loss.item() * inputs.size(0)
        correct_train_preds += torch.sum(preds == labels.data).item()
        train_dataset_size += len(labels)
        
        
    epoch_train_loss = running_train_loss / train_dataset_size
    epoch_train_accuracy = (correct_train_preds*1.0) / train_dataset_size
    
    print('Training Loss: {}, Accuracy: {}'.format(epoch_train_loss, epoch_train_accuracy))
    
    
    # validation
    
    decoder_attempt_2.eval() # set eval mode
    
    valid_dataset_size = 0
    correct_valid_preds = 0
    running_valid_loss = 0.0
    
    for inputs, labels, ids in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = decoder_attempt_2(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        running_valid_loss += loss.item() * inputs.size(0)
        correct_valid_preds += torch.sum(preds == labels.data).item()
        valid_dataset_size += len(labels)
        
    epoch_valid_loss = running_valid_loss / valid_dataset_size
    epoch_valid_accuracy = (correct_valid_preds*1.0) / valid_dataset_size
    
    print('Validation Loss: {}, Accuracy: {}'.format(epoch_valid_loss, epoch_valid_accuracy))
        
        
time_elapsed = time.time() - start_time

print('Training completed in {}minutes {}secs'.format(time_elapsed // 60, time_elapsed % 60))
  


 ----------  Epoch: 0  ----------
Training Loss: 0.6934774831031455, Accuracy: 0.49534345896618537
Validation Loss: 0.693892868839821, Accuracy: 0.490622655663916

 ----------  Epoch: 1  ----------
Training Loss: 0.6914611503559169, Accuracy: 0.5238452403275204
Validation Loss: 0.6852969962318232, Accuracy: 0.5593898474618655

 ----------  Epoch: 2  ----------
Training Loss: 0.6539089888123782, Accuracy: 0.6129758109881868
Validation Loss: 0.6301940546151429, Accuracy: 0.6589147286821705

 ----------  Epoch: 3  ----------
Training Loss: 0.6230093944489237, Accuracy: 0.6578536158509907
Validation Loss: 0.6281935878770594, Accuracy: 0.646911727931983

 ----------  Epoch: 4  ----------
Training Loss: 0.591485464391846, Accuracy: 0.6866679167447965
Validation Loss: 0.5711549117613447, Accuracy: 0.7054263565891473

 ----------  Epoch: 5  ----------
Training Loss: 0.5571032935876058, Accuracy: 0.7181073817113569
Validation Loss: 0.5511538272247997, Accuracy: 0.7209302325581395

 ---------- 

Training Loss: 0.09743307498561062, Accuracy: 0.9604350271891994
Validation Loss: 0.2694332707327704, Accuracy: 0.8844711177794449
Training completed in 21.0minutes 12.225642681121826secs


## 3rd attempt 

In [8]:
epoch_from = 1
decoder_attempt_3 = ConvNet()
decoder_attempt_3.cuda()

optimizer = torch.optim.SGD(decoder_attempt_3.parameters(), 
                            lr=0.01,
                            momentum=0.9, 
                            weight_decay=5e-4)  # TODO: DIY implementation


# optimizer = optim.Adam(decoder.parameters(), lr=0.01)

# scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

criterion = nn.CrossEntropyLoss()

start_time = time.time()

best_loss = math.inf
best_accuracy = 0.0

n_epochs = 100

for epoch in range(n_epochs):
    print('\n', '-'*10, ' Epoch: {} '.format(epoch), '-'*10)
#     scheduler.step()
    decoder_attempt_3.train() # set training mode
    
    train_dataset_size = 0
    correct_train_preds = 0
    running_train_loss = 0.0
    
    # training
    for inputs, labels, ids in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # forward prop
        outputs = decoder_attempt_3(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        # backprop
        loss.backward()
        optimizer.step()
        
        # metrics
        running_train_loss += loss.item() * inputs.size(0)
        correct_train_preds += torch.sum(preds == labels.data).item()
        train_dataset_size += len(labels)
        
        
    epoch_train_loss = running_train_loss / train_dataset_size
    epoch_train_accuracy = (correct_train_preds*1.0) / train_dataset_size
    
    print('Training Loss: {}, Accuracy: {}'.format(epoch_train_loss, epoch_train_accuracy))
    
    
    # validation
    
    decoder_attempt_3.eval() # set eval mode
    
    valid_dataset_size = 0
    correct_valid_preds = 0
    running_valid_loss = 0.0
    
    for inputs, labels, ids in valid_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = decoder_attempt_3(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        running_valid_loss += loss.item() * inputs.size(0)
        correct_valid_preds += torch.sum(preds == labels.data).item()
        valid_dataset_size += len(labels)

    epoch_valid_loss = running_valid_loss / valid_dataset_size
    epoch_valid_accuracy = (correct_valid_preds*1.0) / valid_dataset_size
    
    print('Validation Loss: {}, Accuracy: {}'.format(epoch_valid_loss, epoch_valid_accuracy))
        
        
time_elapsed = time.time() - start_time

print('Training completed in {}minutes {}secs'.format(time_elapsed // 60, time_elapsed % 60))


 ----------  Epoch: 0  ----------
Training Loss: 0.6935378930979844, Accuracy: 0.5007187949246827
Validation Loss: 0.6940448439309048, Accuracy: 0.4983745936484121

 ----------  Epoch: 1  ----------
Training Loss: 0.6934254142118772, Accuracy: 0.5034689668104256
Validation Loss: 0.6919998350248363, Accuracy: 0.5016254063515879

 ----------  Epoch: 2  ----------
Training Loss: 0.6910999782667048, Accuracy: 0.5161572598287393
Validation Loss: 0.6932700010918772, Accuracy: 0.4983745936484121

 ----------  Epoch: 3  ----------
Training Loss: 0.6935032305437905, Accuracy: 0.5029064316519782
Validation Loss: 0.6936783038934906, Accuracy: 0.5016254063515879

 ----------  Epoch: 4  ----------
Training Loss: 0.6823910699575289, Accuracy: 0.5452215763485218
Validation Loss: 0.6371076867323453, Accuracy: 0.6384096024006002

 ----------  Epoch: 5  ----------
Training Loss: 0.6355473335642123, Accuracy: 0.6447902993937121
Validation Loss: 0.6095011945693724, Accuracy: 0.6721680420105026

 --------

Validation Loss: 0.31451180611559376, Accuracy: 0.8879719929982496

 ----------  Epoch: 50  ----------
Training Loss: 0.11164885281927847, Accuracy: 0.9559972498281143
Validation Loss: 0.28991744387266066, Accuracy: 0.8924731182795699

 ----------  Epoch: 51  ----------
Training Loss: 0.10528427198262869, Accuracy: 0.9593099568723045
Validation Loss: 0.3017193765476603, Accuracy: 0.8837209302325582

 ----------  Epoch: 52  ----------
Training Loss: 0.11310673920300664, Accuracy: 0.9556847302956435
Validation Loss: 0.3282301284411932, Accuracy: 0.8744686171542886

 ----------  Epoch: 53  ----------
Training Loss: 0.1054898504507982, Accuracy: 0.9589974373398338
Validation Loss: 0.3096133410692155, Accuracy: 0.8957239309827457

 ----------  Epoch: 54  ----------
Training Loss: 0.09991392493154848, Accuracy: 0.9613100818801175
Validation Loss: 0.2846294977301864, Accuracy: 0.8994748687171793

 ----------  Epoch: 55  ----------
Training Loss: 0.09675430568730625, Accuracy: 0.96324770298143

Training Loss: 0.06238237089586393, Accuracy: 0.9763735233452091
Validation Loss: 0.3327739164661723, Accuracy: 0.8952238059514879
Training completed in 42.0minutes 4.715376138687134secs


In [None]:
def save_predictions(model, outfile_name):
    
    model.eval()
    
    ids_and_predictions = dict()
    
    for inputs, labels, ids in test_loader:
        ids_list = [x.split('/')[-1].split('.')[0] for x in ids]
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        
        preds_list = list(preds.cpu().numpy())
        
        count = 0

        for (idx, pred) in zip(ids_list, preds_list):
            idx = int(idx)
            ids_and_predictions[idx] = idx_to_class[pred]
        
    print(len(ids_and_predictions))
        
    with open(outfile_name + '.csv', 'w') as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(['id', 'label'])
        for idx in range(1, len(ids_and_predictions) + 1):
            csv_writer.writerow([idx, ids_and_predictions[idx]])

In [None]:
save_predictions(decoder, 'decoder')