# Hyperparameters

#### batch_size = 128
#### num_training_epochs = 400
#### lr = 0.01
#### valid_loss_stable_count = 10

#### Make learning rate lr one tenth if the number of epochs in which validation loss doesn't decrease exceeds the paramter of valid_loss_stable_count.

#### Test Loss: 0.678148
#### Test Accuracy: 70% (140/200)



In [1]:
import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.datasets
import torchvision.transforms as transforms
from PIL import ImageFile

import numpy as np

import torch.optim as optim
import time

In [2]:
TRAIN_TEST_CSV_PATH = '../C1-P1_Train Dev_fixed/train.csv'
TRAIN_CSV_PATH = '../C1-P1_Train Dev_fixed/train_split.csv'
VALID_CSV_PATH = '../C1-P1_Train Dev_fixed/dev.csv'

ORIGINAL_TRAIN_TEST_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Train/' 
ORIGINAL_VALID_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Dev/' 


TRAIN_DATA_PATH = '../data/train'
VALID_DATA_PATH = '../data/valid'
TEST_DATA_PATH = '../data/test'

# ToChange!!!
MODEL_WEIGHTS_FILE = 'model_weights_v3_2_run_1.pt'

image_size = 224

In [3]:
# Hyperparameters

batch_size = 128
lr = 0.01
valid_loss_stable_count = 10  # and when valid loss is stable, adjust lr
lr_decay_factor = 0.1
lr_lower_bound = 1e-4
num_training_epochs = 400

num_worker = 6
sgd_momentum = 0.9

In [4]:
ImageFile.LOAD_TRUNCATED_IMAGES = True


transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.RandomRotation(degrees=(-15, 15)),
                                transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
                                transforms.ToTensor(),
                                transforms.Normalize(
                                    mean=(0.485, 0.456, 0.406),
                                    std =(0.229, 0.224, 0.225))
                               ])
transform_plain = transforms.Compose([
                            transforms.Resize(224),
                            transforms.CenterCrop(224),
                            transforms.ToTensor(),
                            transforms.Normalize(
                                    mean=(0.485, 0.456, 0.406),
                                    std =(0.229, 0.224, 0.225))
                           ]) 

loaders_transfer = {}
data_transfer = {}

data_transfer['train'] = torchvision.datasets.ImageFolder(TRAIN_DATA_PATH, transform=transform)
loaders_transfer['train'] = torch.utils.data.DataLoader(data_transfer['train'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=num_worker)

data_transfer['valid'] = torchvision.datasets.ImageFolder(VALID_DATA_PATH, transform=transform_plain)
loaders_transfer['valid'] = torch.utils.data.DataLoader(data_transfer['valid'],
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=num_worker)
data_transfer['test'] = torchvision.datasets.ImageFolder(TEST_DATA_PATH, transform=transform_plain)
loaders_transfer['test'] = torch.utils.data.DataLoader(data_transfer['test'],
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=num_worker)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
import torchvision.models as models
import torch.nn as nn

model_transfer = models.resnet152(pretrained=True).to(device)
    
for param in model_transfer.parameters():
    param.requires_grad = False   
    
model_transfer.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3)).to(device)
## uncomment the following line for continuing trainging only
# model_transfer.load_state_dict(torch.load(MODEL_WEIGHTS_FILE, map_location=device))

In [7]:
import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.SGD(model_transfer.fc.parameters(), lr = lr, momentum = 0.9)

In [8]:
def get_lr_from_optim(optimizer):
    for param_group in optimizer_transfer.param_groups:
        return param_group['lr']

In [9]:
def set_lr_to_optim(optimizer, lr):
    for param_group in optimizer_transfer.param_groups:
        param_group['lr'] = lr

In [10]:
import numpy as np
import time
import copy

# train the model
def train(n_epochs, loaders, model, optimizer, criterion, save_path):

    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    # Valid Loss Stable counter
    valid_loss_stable_counter = 0

    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
                
        start = time.time()
        
        lr = get_lr_from_optim(optimizer)
        
        # train the model
        model.train()
        for batch_idx, (data, target) in enumerate(loaders_transfer['train']):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*data.size(0)
            
        # validate the model

        model.eval()
        valid_corrects = 0
        for batch_idx, (data, target) in enumerate(loaders_transfer['valid']):

            data, target = data.to(device), target.to(device)
            output = model(data)
            _, preds = torch.max(output, 1)
            
            loss = criterion(output, target)
            valid_loss += loss.item()*data.size(0)
    
            valid_corrects += torch.sum(preds == target.data)
        
        # ---------
        
        train_loss = train_loss/len(loaders_transfer['train'].dataset)
        valid_loss = valid_loss/len(loaders_transfer['valid'].dataset)
        
        epoch_acc = valid_corrects.double() / len(loaders_transfer['valid'].dataset)
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.3f} \ttime: {:.1f} \tlr={}'.format(
            epoch, 
            train_loss,
            valid_loss,
            epoch_acc,
            time.time() - start,
            lr
            ))
        
        if valid_loss < valid_loss_min:
            print('Validation loss decreased from {:.6f} to {:.6f}. Model was saved'.format(
                valid_loss_min,
                valid_loss
            ))

            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, save_path)

            valid_loss_min = valid_loss
            
            valid_loss_stable_counter = 0
        else:
            valid_loss_stable_counter += 1
            if valid_loss_stable_counter >= valid_loss_stable_count:
                valid_loss_stable_counter = 0
                lr = get_lr_from_optim(optimizer)
                lr = lr * lr_decay_factor
                if lr <= lr_lower_bound:
                    return model
                set_lr_to_optim(optimizer, lr)
                
    
    # return trained model
    return model

train_start = time.time()
model_transfer = train(num_training_epochs,
                       loaders_transfer,
                       model_transfer,optimizer_transfer,
                       criterion_transfer,
                       MODEL_WEIGHTS_FILE)
print("Total training time: {:.2f} seconds".format(time.time() - train_start))

Epoch: 1 	Training Loss: 0.9085 	Validation Loss: 0.7200 	Validation Accuracy: 0.672 	time: 72.2 	lr=0.01
Validation loss decreased from inf to 0.720048. Model was saved
Epoch: 2 	Training Loss: 0.7089 	Validation Loss: 0.8144 	Validation Accuracy: 0.629 	time: 74.3 	lr=0.01
Epoch: 3 	Training Loss: 0.6856 	Validation Loss: 0.6759 	Validation Accuracy: 0.686 	time: 74.1 	lr=0.01
Validation loss decreased from 0.720048 to 0.675897. Model was saved
Epoch: 4 	Training Loss: 0.6617 	Validation Loss: 0.6612 	Validation Accuracy: 0.706 	time: 74.8 	lr=0.01
Validation loss decreased from 0.675897 to 0.661168. Model was saved
Epoch: 5 	Training Loss: 0.6383 	Validation Loss: 0.6228 	Validation Accuracy: 0.734 	time: 73.9 	lr=0.01
Validation loss decreased from 0.661168 to 0.622833. Model was saved
Epoch: 6 	Training Loss: 0.6465 	Validation Loss: 0.6665 	Validation Accuracy: 0.691 	time: 73.1 	lr=0.01
Epoch: 7 	Training Loss: 0.6412 	Validation Loss: 0.6420 	Validation Accuracy: 0.720 	time: 7

Process Process-1014:
Process Process-1011:
Process Process-1012:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process Process-1009:
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/process.py", line 93, in run
    s

Traceback (most recent call last):
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-c21dfb8596a2>", line 95, in <module>
    MODEL_WEIGHTS_FILE)
  File "<ipython-input-10-c21dfb8596a2>", line 26, in train
    for batch_idx, (data, target) in enumerate(loaders_transfer['train']):
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 280, in __next__
    idx, batch = self._get_batch()
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 259, in _get_batch
    return self.data_queue.get()
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/home/huohsien/.anaconda3/envs/dl/lib/python3.6/multiprocessing/connection.py", line 216, in rec

KeyboardInterrupt: 

# The following module can be run separately if trained weights are available

In [None]:
# # ToChange!!!
# MODEL_WEIGHTS_FILE = 'model_weights_v3_1__run_1.pt'

In [None]:
# import torch
# import torchvision.models as models
# import torch.nn as nn
# import torchvision.datasets
# import torchvision.transforms as transforms
# from PIL import ImageFile

# ImageFile.LOAD_TRUNCATED_IMAGES = True
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model_transfer = models.resnet152(pretrained=True).to(device)
    
# model_transfer.fc = nn.Sequential(
#                nn.Linear(2048, 128),
#                nn.ReLU(inplace=True),
#                nn.Linear(128, 3)).to(device)
# model_transfer.load_state_dict(torch.load(MODEL_WEIGHTS_FILE, map_location=device))

In [None]:
# TEST_DATA_PATH = '../data/test'
# transform_plain = transforms.Compose([
#                             transforms.Resize(224),
#                             transforms.CenterCrop(224),
#                             transforms.ToTensor(),
#                             transforms.Normalize(
#                                     mean=(0.485, 0.456, 0.406),
#                                     std =(0.229, 0.224, 0.225))
#                            ]) 

# if not 'data_transfer' in locals():
#     print("create empty data_transfer")
#     data_transfer = {}
# if not 'loaders_transfer' in locals():
#     print("create empty loaders_transfer")
#     loaders_transfer = {}
# data_transfer['test'] = torchvision.datasets.ImageFolder(TEST_DATA_PATH, transform=transform_plain)
# loaders_transfer['test'] = torch.utils.data.DataLoader(data_transfer['test'],
#                                           batch_size=1,
#                                           shuffle=False,
#                                           num_workers=4)

# import torch.optim as optim

# criterion_transfer = nn.CrossEntropyLoss()

In [11]:
import numpy as np
import time

def test(loaders, model, criterion):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()

    for batch_idx, (data, target) in enumerate(loaders['test']):
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = criterion(output, target)
        # accumulate test loss 
        test_loss += loss.item()*data.size(0)
        
        # convert output probabilities to predicted class
        preds = output.data.max(1, keepdim=True)[1]
    
        # compare predictions to true label
        if torch.cuda.is_available():
            correct += torch.sum(preds == target.data)
        else:
            correct += np.sum(np.squeeze(preds.eq(target.data.view_as(preds))).cpu().numpy())
        total += data.size(0)
        
    test_loss = test_loss/len(loaders_transfer['test'].dataset)      
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function   
test_start = time.time()
test(loaders_transfer, model_transfer, criterion_transfer)
print("Total testing time: {:.2f} seconds".format(time.time() - test_start))

Test Loss: 0.678148


Test Accuracy: 70% (140/200)
Total testing time: 4.64 seconds
