# Hyperparameters

#### batch_size = 128
#### start_epoch = 0
#### lr = 0.001
#### valid_loss_stable_count = 10

#### Make learning rate lr one tenth if the number of epochs in which validation loss doesn't decrease exceeds the paramter of valid_loss_stable_count.

### Result Accuracy on Sample Dataset is 75% , Test loss is 0.648536

In [1]:
TRAIN_CSV_PATH = '../C1-P1_Train Dev_fixed/train.csv'
VALID_CSV_PATH = '../C1-P1_Train Dev_fixed/dev.csv'
TEST_CSV_PATH = '../AIMango_sample/label.csv'

ORIGINAL_TRAIN_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Train/' 
ORIGINAL_VALID_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Dev/' 
ORIGINAL_TEST_DATA_PATH = '../AIMango_sample/sample_image/' 

TRAIN_DATA_PATH = '../data/train'
VALID_DATA_PATH = '../data/valid'
TEST_DATA_PATH = '../data/test'

In [2]:
import csv
import os
new_data = []
with open(TEST_CSV_PATH) as f:
    reader = csv.reader(f, delimiter=',')
    for row in reader:
        new_label = row[1][len(row[1])-1]
        new_data.append([row[0],new_label])


folder, filename = os.path.split(TEST_CSV_PATH)
NEW_TEST_CSV_PATH = os.path.join(folder, 'label_new.csv')
                                 
with open(NEW_TEST_CSV_PATH, 'w') as f:

    writer = csv.writer(f)
    
    for row in new_data:
        writer.writerow(row)

In [3]:
import csv
import os
from shutil import copyfile


# make file structure for training dataset
#
with open(TRAIN_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_TRAIN_DATA_PATH, row[0])
        dest_path = os.path.join(TRAIN_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1
        
# make file structure for validation dataset
#
with open(VALID_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_VALID_DATA_PATH, row[0])
        dest_path = os.path.join(VALID_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1

# make file structure for validation dataset
#
with open(NEW_TEST_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_TEST_DATA_PATH, row[0])
        dest_path = os.path.join(TEST_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1

In [4]:
import torch

from torchvision import datasets

import torchvision.transforms as transforms

from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

# Hyperparameters

batch_size = 128
start_epoch = 0
lr = 0.001
valid_loss_stable_count = 10

transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.RandomRotation(degrees=(-15, 15)),
                                transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
                                transforms.ToTensor(),
                                transforms.Normalize(
                                    mean=(0.485, 0.456, 0.406),
                                    std =(0.229, 0.224, 0.225))
                               ])
loaders_transfer = {}
data_transfer = {}

data_transfer['train'] = datasets.ImageFolder(TRAIN_DATA_PATH, transform=transform)
loaders_transfer['train'] = torch.utils.data.DataLoader(data_transfer['train'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

data_transfer['valid'] = datasets.ImageFolder(VALID_DATA_PATH, transform=transform)
loaders_transfer['valid'] = torch.utils.data.DataLoader(data_transfer['valid'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
import torchvision.models as models
import torch.nn as nn

model_transfer = models.resnet152(pretrained=True).to(device)
    
for param in model_transfer.parameters():
    param.requires_grad = False   
    
model_transfer.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3)).to(device)

In [7]:
import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = [optim.SGD(model_transfer.fc.parameters(), lr = lr, momentum = 0.9),
                      optim.SGD(model_transfer.fc.parameters(), lr = lr * 0.1, momentum = 0.9)]

In [None]:
import numpy as np
import time
import copy


# train the model
def train(n_epochs, loaders, model, optimizers, criterion, save_path):

    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    # Valid Loss Stable counter
    valid_loss_stable_counter = 0
    #optimizer index
    optim_idx = 0
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        optimizer = optimizers[optim_idx]
        
        start = time.time()
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders_transfer['train']):
            data, target = data.to(device), target.to(device)
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update accumulated training loss
            train_loss += loss.item()*data.size(0)
#             train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        valid_corrects = 0
        for batch_idx, (data, target) in enumerate(loaders_transfer['valid']):

            data, target = data.to(device), target.to(device)
            
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            _, preds = torch.max(output, 1)
            
            # calculate the batch loss
            loss = criterion(output, target)
            # update accumulated validation loss 
            valid_loss += loss.item()*data.size(0)
#             valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
    
            valid_corrects += torch.sum(preds == target.data)
        
        train_loss = train_loss/len(loaders_transfer['train'].dataset)
        valid_loss = valid_loss/len(loaders_transfer['valid'].dataset)
        
        epoch_acc = valid_corrects.double() / len(loaders_transfer['valid'].dataset)
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \tValidation Accuracy: {:.4f} \ttime: {:.1f}'.format(
            epoch, 
            train_loss,
            valid_loss,
            epoch_acc,
            time.time() - start
            ))
        
        if valid_loss < valid_loss_min:
            print('Validation loss decreased from {:.6f} to {:.6f}. Model was saved'.format(
                valid_loss_min,
                valid_loss
            ))

            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, 'model_transfer_v31.pt')


            valid_loss_min = valid_loss
            
            valid_loss_stable_counter = 0
        else:
            valid_loss_stable_counter += 1
            optim_idx = 1
    
    # return trained model
    return model

model_transfer = train(100, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, 'model_transfer.pt')

Epoch: 1 	Training Loss: 1.064065 	Validation Loss: 1.020472 	Validation Accuracy: 0.5850 	time: 135.1
Validation loss decreased from inf to 1.020472. Model was saved
Epoch: 2 	Training Loss: 0.981229 	Validation Loss: 0.939269 	Validation Accuracy: 0.6375 	time: 133.4
Validation loss decreased from 1.020472 to 0.939269. Model was saved
Epoch: 3 	Training Loss: 0.896884 	Validation Loss: 0.876669 	Validation Accuracy: 0.6100 	time: 134.7
Validation loss decreased from 0.939269 to 0.876669. Model was saved
Epoch: 4 	Training Loss: 0.828545 	Validation Loss: 0.798988 	Validation Accuracy: 0.6700 	time: 133.3
Validation loss decreased from 0.876669 to 0.798988. Model was saved
Epoch: 5 	Training Loss: 0.774061 	Validation Loss: 0.765037 	Validation Accuracy: 0.6750 	time: 134.9
Validation loss decreased from 0.798988 to 0.765037. Model was saved
Epoch: 6 	Training Loss: 0.733329 	Validation Loss: 0.721000 	Validation Accuracy: 0.6837 	time: 130.7
Validation loss decreased from 0.765037 to

# Competition- Phase 1: generate the result csv file

In [None]:
from PIL import Image
from torch.autograd import Variable
import torchvision.transforms as transforms


# list of class names by index, i.e. a name can be accessed like class_names[0]
class_names = ['A','B','C']

def predict_class_transfer(img_path):
    # load the image and return the predicted breed
    img = Image.open(img_path)

    transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()
                               ]) 
    img = transform(img)
    img = img.unsqueeze(0) 

    img = Variable(img)

    img = img.to(device)
        
    prediction = model_transfer(img)  # Returns a Tensor of shape (batch, num class labels)
    prediction = prediction.data.cpu().numpy().argmax()  # Our prediction will be the index of the class label with the largest value.
    prediction = class_names[prediction]
    return prediction 


predict_class_transfer('../data/competition/02186.jpg')

In [None]:
#Get all test files

from glob import glob
import numpy as np

test_results = []

mango_files = np.array(glob("../data/competition/*"))

for idx, file in enumerate(mango_files):
    _ , filename = os.path.split(file)
    className = predict_class_transfer(file)
    test_results.append([filename, className])
    
# test_results[:3]
    

In [None]:
import csv

with open('results.csv', 'w') as f:

    writer = csv.writer(f)
    
    for row in test_results:
        writer.writerow(row)

# The following module can be run separately if trained weights are available

In [None]:
import torch
import torchvision.models as models
import torch.nn as nn
from torchvision import datasets
import torchvision.transforms as transforms
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_transfer = models.resnet152(pretrained=True).to(device)
    
model_transfer.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3)).to(device)
model_transfer.load_state_dict(torch.load('model_transfer_v31.pt', map_location=device))

In [None]:
TEST_DATA_PATH = '../data/test'
transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()
                               ]) 

if not 'data_transfer' in locals():
    print("create empty data_transfer")
    data_transfer = {}
if not 'loaders_transfer' in locals():
    print("create empty loaders_transfer")
    loaders_transfer = {}
data_transfer['test'] = datasets.ImageFolder(TEST_DATA_PATH, transform=transform)
loaders_transfer['test'] = torch.utils.data.DataLoader(data_transfer['test'],
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=4)

import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()

In [None]:
import numpy as np

def test(loaders, model, criterion):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_transfer, model_transfer, criterion_transfer)