In [1]:
TRAIN_CSV_PATH = '../C1-P1_Train Dev_fixed/train.csv'
VALID_CSV_PATH = '../C1-P1_Train Dev_fixed/dev.csv'
TEST_CSV_PATH = '../AIMango_sample/label.csv'

ORIGINAL_TRAIN_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Train/' 
ORIGINAL_VALID_DATA_PATH = '../C1-P1_Train Dev_fixed/C1-P1_Dev/' 
ORIGINAL_TEST_DATA_PATH = '../AIMango_sample/sample_image/' 

TRAIN_DATA_PATH = '../data/train'
VALID_DATA_PATH = '../data/valid'
TEST_DATA_PATH = '../data/test'

In [2]:
import csv
import os
new_data = []
with open(TEST_CSV_PATH) as f:
    reader = csv.reader(f, delimiter=',')
    for row in reader:
        new_label = row[1][len(row[1])-1]
        new_data.append([row[0],new_label])


folder, filename = os.path.split(TEST_CSV_PATH)
NEW_TEST_CSV_PATH = os.path.join(folder, 'label_new.csv')
                                 
with open(NEW_TEST_CSV_PATH, 'w') as f:

    writer = csv.writer(f)
    
    for row in new_data:
        writer.writerow(row)

In [3]:
import csv
import os
from shutil import copyfile


# make file structure for training dataset
#
with open(TRAIN_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_TRAIN_DATA_PATH, row[0])
        dest_path = os.path.join(TRAIN_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1
        
# make file structure for validation dataset
#
with open(VALID_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_VALID_DATA_PATH, row[0])
        dest_path = os.path.join(VALID_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1

# make file structure for validation dataset
#
with open(NEW_TEST_CSV_PATH) as csv_file:

    csv_reader = csv.reader(csv_file, delimiter=',')

    line_count = 0
    for row in csv_reader:

        if line_count == 0:
            line_count += 1
            continue  #header

        src_path = os.path.join(ORIGINAL_TEST_DATA_PATH, row[0])
        dest_path = os.path.join(TEST_DATA_PATH, row[1], row[0])
        if not os.path.isfile(dest_path):
            copyfile(src_path, dest_path)
        
        line_count += 1

In [4]:
import torch

from torchvision import datasets

import torchvision.transforms as transforms

from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

# how many data per batch to load
batch_size = 32
start_epoch = 0

transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.RandomRotation(degrees=(-15, 15)),
                                transforms.ToTensor(),
                                transforms.Normalize(
                                    mean=(0.485, 0.456, 0.406),
                                    std =(0.229, 0.224, 0.225))
                               ])
loaders_transfer = {}
data_transfer = {}

data_transfer['train'] = datasets.ImageFolder(TRAIN_DATA_PATH, transform=transform)
loaders_transfer['train'] = torch.utils.data.DataLoader(data_transfer['train'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=1)

data_transfer['valid'] = datasets.ImageFolder(VALID_DATA_PATH, transform=transform)
loaders_transfer['valid'] = torch.utils.data.DataLoader(data_transfer['valid'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=1)
data_transfer['test'] = datasets.ImageFolder(TEST_DATA_PATH, transform=transform)
loaders_transfer['test'] = torch.utils.data.DataLoader(data_transfer['test'],
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=1)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
import torchvision.models as models
import torch.nn as nn

model_transfer = models.resnet50(pretrained=True).to(device)
    
for param in model_transfer.parameters():
    param.requires_grad = False   
    
model_transfer.fc = nn.Sequential(
               nn.Linear(2048, 128),
               nn.ReLU(inplace=True),
               nn.Linear(128, 3)).to(device)

In [7]:
# import torchvision.models as models
# import torch.nn as nn

# # check if CUDA is available
# use_cuda = torch.cuda.is_available()

# ## TODO: Specify model architecture 
# model_transfer = models.vgg16(pretrained=True)

# for param in model_transfer.parameters():
#     param.requires_grad = False

# from collections import OrderedDict
# classifier = nn.Sequential(OrderedDict([
#                           ('fc1', nn.Linear(25088, 1024)),
#                           ('relu', nn.ReLU()),
#                           ('dropout', nn.Dropout(0.25)),
#                           ('fc2', nn.Linear(1024, 3)),
#                           ('logsoftmax', nn.LogSoftmax(dim=1))
#                           ]))
    
# model_transfer.classifier = classifier



# if use_cuda:
#     model_transfer = model_transfer.cuda()

In [8]:
# import torch.optim as optim

# criterion_transfer = nn.NLLLoss()
# optimizer_transfer = optim.SGD(model_transfer.classifier.parameters(), lr = 0.001)

In [9]:
import torch.optim as optim

criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optim.Adam(model_transfer.fc.parameters())

In [10]:
# train the model

import numpy as np
import time

def train(n_epochs, loaders, model, optimizer, criterion, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        start = time.time()
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders_transfer['train']):
            data, target = data.to(device), target.to(device)
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update accumulated training loss
#             train_loss += loss.item()*data.size(0)
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders_transfer['valid']):

            data, target = data.to(device), target.to(device)
            
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update accumulated validation loss 
#             valid_loss += loss.item()*data.size(0)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))

#         train_loss = train_loss/len(loaders_transfer['train'].dataset)
#         valid_loss = valid_loss/len(loaders_transfer['valid'].dataset)
        
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \t time: {:.1f}'.format(
            epoch, 
            train_loss,
            valid_loss,
            time.time() - start
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            print('Validation loss decreased from {:.6f} to {:.6f}. Model was saved'.format(
                valid_loss_min,
                valid_loss
            ))
            
            torch.save(model.state_dict(), 'model_transfer.pt')
            valid_loss_min = valid_loss
    
    # return trained model
    return model

model_transfer = train(20, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, 'model_transfer.pt')

# load the model that got the best validation accuracy (uncomment the line below)
model_transfer.load_state_dict(torch.load('model_transfer.pt'))

Epoch: 1 	Training Loss: 0.840045 	Validation Loss: 0.708563 	 time: 169.3
Validation loss decreased from inf to 0.708563. Model was saved
Epoch: 2 	Training Loss: 0.687470 	Validation Loss: 0.663397 	 time: 162.2
Validation loss decreased from 0.708563 to 0.663397. Model was saved
Epoch: 3 	Training Loss: 0.646825 	Validation Loss: 0.618083 	 time: 161.2
Validation loss decreased from 0.663397 to 0.618083. Model was saved
Epoch: 4 	Training Loss: 0.672033 	Validation Loss: 0.669437 	 time: 161.2
Epoch: 5 	Training Loss: 0.647924 	Validation Loss: 0.612102 	 time: 161.5
Validation loss decreased from 0.618083 to 0.612102. Model was saved
Epoch: 6 	Training Loss: 0.651291 	Validation Loss: 0.773446 	 time: 162.1
Epoch: 7 	Training Loss: 0.659110 	Validation Loss: 0.676000 	 time: 162.3
Epoch: 8 	Training Loss: 0.634901 	Validation Loss: 0.625091 	 time: 161.2
Epoch: 9 	Training Loss: 0.633858 	Validation Loss: 0.598476 	 time: 160.9
Validation loss decreased from 0.612102 to 0.598476. M

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [11]:
# # train the model

# import numpy as np
# import time

# def train(n_epochs, loaders, model, optimizer, criterion, save_path):

#     valid_loss_min = np.Inf 

#     for epoch in range(n_epochs):
#         print('Epoch {}/{}'.format(epoch+1, n_epochs))
#         print('-' * 10)
#         start = time.time()

#         for phase in ['train', 'valid']:
#             if phase == 'train':
#                 model.train()
#             else:
#                 model.eval()

#             running_loss = 0.0
#             running_corrects = 0

#             for inputs, labels in loaders[phase]:
#                 inputs = inputs.to(device)
#                 labels = labels.to(device)

#                 outputs = model(inputs)
#                 loss = criterion(outputs, labels)

#                 if phase == 'train':
#                     optimizer.zero_grad()
#                     loss.backward()
#                     optimizer.step()

#                 _, preds = torch.max(outputs, 1)
#                 running_loss += loss.item() * inputs.size(0)
#                 running_corrects += torch.sum(preds == labels.data)

#             epoch_loss = running_loss / len(data_transfer[phase])
#             epoch_acc = running_corrects.double() / len(data_transfer[phase])

#             print('{} loss: {:.4f}, acc: {:.4f}, time: {:.1f}'.format(phase, epoch_loss, epoch_acc, time.time() - start))
    

#     return model

# model_transfer = train(20, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, 'model_transfer.pt')

# # load the model that got the best validation accuracy (uncomment the line below)
# model_transfer.load_state_dict(torch.load('model_transfer.pt'))

In [12]:
# load the model that got the best validation accuracy (uncomment the line below)
model_transfer.load_state_dict(torch.load('model_transfer.pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [13]:
# def test(loaders, model, criterion, use_cuda):

#     # monitor test loss and accuracy
#     test_loss = 0.
#     correct = 0.
#     total = 0.

#     model.eval()
#     for batch_idx, (data, target) in enumerate(loaders['test']):
#         # move to GPU
#         if use_cuda:
#             data, target = data.cuda(), target.cuda()
#         # forward pass: compute predicted outputs by passing inputs to the model
#         output = model(data)
#         # calculate the loss
#         loss = criterion(output, target)
#         # update average test loss 
#         test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
#         # convert output probabilities to predicted class
#         pred = output.data.max(1, keepdim=True)[1]
#         # compare predictions to true label
#         correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
#         total += data.size(0)
            
#     print('Test Loss: {:.6f}\n'.format(test_loss))

#     print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
#         100. * correct / total, correct, total))

# # call test function    
# test(loaders_transfer, model_transfer, criterion_transfer, use_cuda)

In [15]:
from PIL import Image
from torch.autograd import Variable

# list of class names by index, i.e. a name can be accessed like class_names[0]
class_names = data_transfer['train'].classes

def predict_class_transfer(img_path):
    # load the image and return the predicted breed
    img = Image.open(img_path)

    transform = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor()
                               ]) 
    img = transform(img)
    img = img.unsqueeze(0) 

    img = Variable(img)

    img = img.to(device)
        
    prediction = model_transfer(img)  # Returns a Tensor of shape (batch, num class labels)
#     print(prediction)
#     print(class_names)
#     prediction = prediction.data.max(1, keepdim=True)[1]
    prediction = prediction.data.cpu().numpy().argmax()  # Our prediction will be the index of the class label with the largest value.
    prediction = class_names[prediction]
    return prediction 


predict_class_transfer('../data/competition/02186.jpg')

'A'

In [16]:
#Get all test files

from glob import glob
import numpy as np

test_results = []

mango_files = np.array(glob("../data/competition/*"))

for idx, file in enumerate(mango_files):
    _ , filename = os.path.split(file)
    className = predict_class_transfer(file)
    test_results.append([filename, className])
    
test_results[:3]
    

[['04108.jpg', 'A'], ['03703.jpg', 'A'], ['06214.jpg', 'A']]

In [17]:
import csv

with open('results.csv', 'w') as f:

    writer = csv.writer(f)
    
    for row in test_results:
        writer.writerow(row)

In [19]:
def test(loaders, model, criterion):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_transfer, model_transfer, criterion_transfer)

Test Loss: 0.746572


Test Accuracy: 66% (62/93)
