# Description
This notebook was used to run predictions using a trained PyTorch model.
The final output was a .csv file which was submitted to Kaggle.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
!pip install pyprind

In [0]:
from PIL import Image
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch import nn
from torch import optim
import torch

import datetime

import zipfile
import datetime
from shutil import copyfile

import time
from torchsummary import summary
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import torch.nn.functional as F

import pyprind

In [0]:
groot = '/content/drive/My Drive/AML'
model_path = groot + "/hrvoje/pt/food_challenge_model_5.pt"
data_zip = "custom_split_gauss.zip"
test_zip = 'test_set.zip'

# Data copy procedure

In [0]:
os.mkdir("test_set")
copyfile(groot+"/kaggle_test/"+test_zip, "test_set.zip")
with zipfile.ZipFile(test_zip,"r") as zip_ref:
    zip_ref.extractall("test_set")

os.mkdir("data")
copyfile(groot+"/"+data_zip, data_zip)
with zipfile.ZipFile(data_zip,"r") as zip_ref:
    zip_ref.extractall("data")

# Applying Transforms to the Data

In [0]:
image_transforms = { 
    'test': transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
}

In [0]:
# Load the Data

dataset='food_challenge'
 
# Set train and valid directory paths
train_directory = './data/custom_split/train'
 
# Load Data from folders
data = {
    'train': datasets.ImageFolder(root=train_directory),
}

# Get a mapping of the indices to the class names, in order to see the output classes of the test images.
idx_to_class = {v: k for k, v in data['train'].class_to_idx.items()}
print(idx_to_class)

{0: '1', 1: '10', 2: '11', 3: '12', 4: '13', 5: '14', 6: '15', 7: '16', 8: '17', 9: '18', 10: '19', 11: '2', 12: '20', 13: '21', 14: '22', 15: '23', 16: '24', 17: '25', 18: '26', 19: '27', 20: '28', 21: '29', 22: '3', 23: '30', 24: '31', 25: '32', 26: '33', 27: '34', 28: '35', 29: '36', 30: '37', 31: '38', 32: '39', 33: '4', 34: '40', 35: '41', 36: '42', 37: '43', 38: '44', 39: '45', 40: '46', 41: '47', 42: '48', 43: '49', 44: '5', 45: '50', 46: '51', 47: '52', 48: '53', 49: '54', 50: '55', 51: '56', 52: '57', 53: '58', 54: '59', 55: '6', 56: '60', 57: '61', 58: '62', 59: '63', 60: '64', 61: '65', 62: '66', 63: '67', 64: '68', 65: '69', 66: '7', 67: '70', 68: '71', 69: '72', 70: '73', 71: '74', 72: '75', 73: '76', 74: '77', 75: '78', 76: '79', 77: '8', 78: '80', 79: '9'}


In [0]:
def predict(model, test_image_name):
    transform = image_transforms['test']

    test_image = Image.open(test_image_name)
#     plt.imshow(test_image)
    
    test_image_tensor = transform(test_image)

    if torch.cuda.is_available():
        test_image_tensor = test_image_tensor.view(1, 3, 224, 224).cuda()
    else:
        test_image_tensor = test_image_tensor.view(1, 3, 224, 224)
    
    with torch.no_grad():
        model.eval()
        # Model outputs log probabilities
        out = model(test_image_tensor)
        ps = torch.exp(out)
        topk, topclass = ps.topk(3, dim=1)
#         for i in range(3):
#             print("Predcition", i+1, ":", idx_to_class[topclass.cpu().numpy()[0][i]], ", Score: ", topk.cpu().numpy()[0][i])
    return int(idx_to_class[topclass.cpu().numpy()[0][0]])

## Loading Model

In [0]:
# Test a particular model on a test image
model = torch.load(model_path)

# Test Predictions

In [0]:
test_df = pd.read_csv(groot+'/kaggle_test/sample.csv')

In [0]:
pbar = pyprind.ProgBar(len(test_df))
predictions = []
for index, row in test_df.iterrows():
    image = row['img_name']
    preidction = predict(model, 'test_set/test_set/'+image)
    predictions.append(preidction)
    pbar.update()

0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:02:41


In [0]:
assert len(predictions) == 7653 # number of test samples

In [0]:
test_df['label'] = predictions
test_df.head()

Unnamed: 0,img_name,label
0,test_1.jpg,63
1,test_2.jpg,60
2,test_3.jpg,9
3,test_4.jpg,65
4,test_5.jpg,2


In [0]:
ts = '{:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now())
test_df.to_csv(groot+'/kaggle_test/results/test_res_'+ts+".csv", index=False)