In [23]:
import torchvision.transforms as tt
from torch.utils.data import Dataset
import torch
from PIL import Image
from image_augmentation import random_augmentation
import cv2

class GDataset(Dataset):
    def __init__(self, df, directory):

        self.df = df
        self.dir = directory
        self.transform = tt.Compose([
            tt.Resize((128, 128)),
            tt.ToTensor(),
            tt.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        
    def randBrightness(self, image):
        brightness = np.random.uniform(0.7, 1.3)
        jitter = brightness * np.array(image).astype(np.float32)
        jitter = np.clip(jitter, 0, 255).astype(np.uint8)
        jittered = Image.fromarray(jitter)
        return jittered
    
    def randCrop(self, image):
        code = np.random.randint(0, 5)
        scale = np.random.uniform(0.2, 0.4)
        width, height = image.size

        img_crop = np.array(image)

        if code == 0:
            img_crop = img_crop[int(height*scale):, :, :]

        elif code == 1:
            img_crop = img_crop[0:int(-height*scale), :, :]

        elif code == 2:
            img_crop = img_crop[:,int(width*scale):, :]

        elif code == 3:
            img_crop = img_crop[:, 0:int(-width*scale), :]

        # Upsample to original shape
        img_crop = cv2.resize(img_crop, (width, height))

        return Image.fromarray(img_crop)
    
    def __len__(self):
        return len(self.df) * 3

    
    def __getitem__(self, index):
        index, aug = index // 3, index % 3
        id_ = self.df.iloc[index]['id']
        img_name = "/".join([self.dir, id_[:3], id_+".jpg"])
        image = Image.open(img_name).convert('RGB')
        if aug == 1:
            image = self.randCrop(image)
        elif aug == 2:
            image = self.randBrightness(image)
        image = self.transform(image)
        return image, torch.tensor(int(self.df.iloc[index]['class_id'])).long()

    
class InferenceDataset(GDataset):
    def __init__(self, df, directory):
        super().__init__(df, directory)

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        id_ = self.df.iloc[index]['id']
        img_name = "/".join([self.dir, id_[:3], id_+".jpg"])
        image = Image.open(img_name).convert('RGB')
        return image, torch.tensor(int(self.df.iloc[index]['class_id'])).long()


In [24]:
import pandas as pd
from torch.utils.data import DataLoader
import torch
import numpy as np
import torchvision.transforms as tt

from vgg16 import VGG16
import sys
from torchvision import models


def train_model(index, dir_, model_file="", 
                epochs=12, 
                model_save=False, model_save_name="vgg16_iter"):
    """ Training the model
        @param index: index file of the train data
        @param dir_: directory name that stores the properly formatted data
        @param model_file: model state file for continuous training
        @param epochs: number of iterations to run
        @param model_save: whether to save as a model file
        @param model_save_name: name of the saving model file
    """
    
    df = pd.read_csv(index)
    df = df.loc[df.class_id < 100]
    dataset = GDataset(df, dir_)
    
    batch_size = 108
    dataloader = DataLoader(dataset, batch_size=batch_size)

    model = models.vgg16(pretrained=True)
    model.avgpool = torch.nn.Sequential(torch.nn.AdaptiveAvgPool2d(output_size=(3, 3)))
    model.classifier = torch.nn.Sequential(
                torch.nn.Linear(4608, 4096),
                torch.nn.ReLU(),
                torch.nn.Dropout(p=0.5),
                torch.nn.Linear(4096, 4096),
                torch.nn.ReLU(),
                torch.nn.Linear(4096, 100))
    
    for param in model.features[:34].parameters():
        param.requires_grad = False
    
    model.cuda()
    
    criterion = torch.nn.CrossEntropyLoss()
    softmax = torch.nn.Softmax(dim=1)
    optimizer = torch.optim.Adam(model.parameters())

        # train the model
    for e in range(epochs):

        train_loss, valid_loss = 0, 0
        accuracy, accuracy_v = 0, 0
        counter = 0
        for x, t in dataloader:
            counter += 1
            print(round(counter / len(dataloader) * 100, 2), "%  ", end="\r")
            x, t = x.cuda(), t.cuda()
            optimizer.zero_grad()
            z = model(x)
            loss = criterion(z, t)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

            y = softmax(z)
            top_p, top_class = y.topk(1, dim=1)
            accuracy += (top_class[:, 0] == t).sum().item()
            

        print(e, 
              train_loss / len(dataset), 
              accuracy / len(dataset))
    return model



def inference(index, dir_, model):
    """ Inferencing the model
        @param index: index file of the train data
        @param dir_: directory name that stores the properly formatted data
        @param model_file: model state file for continuous training
    """
    df = pd.read_csv(index)
    df = df.loc[df.class_id < 100]
    dataset = InferenceDataset(df, dir_)
    
    # model = VGG16()
    # model.load_state_dict(torch.load(model_file))
    model.cuda()
    model.eval()

    criterion = torch.nn.CrossEntropyLoss()
    softmax = torch.nn.Softmax(dim=1)
    transform = tt.Compose([
            tt.Resize((128, 128)),
            tt.ToTensor(),
            tt.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

    loss_sum = 0
    accuracy = 0
    counter = 0
    for x, t_cpu in dataset:
        counter += 1
        x_cpu = np.array(x)
        x, t = transform(x).unsqueeze(0).cuda(), t_cpu.unsqueeze(0).cuda()
        z = model(x)
        y = softmax(z)
        top_p, top_class = y.topk(5, dim=1)
        accuracy += (top_class[:, 0] == t).sum().item()
            

    print("Top 1 Accuracy:", accuracy / len(dataset))

In [25]:
model = train_model("train_try.csv", "train")

0 0.028101516308267013 0.27268754552075747
1 0.02104456410821302 0.4229667394998786
2 0.017585092836515683 0.4998300558387958
3 0.015168939762018236 0.5599417334304443
4 0.0128095516796163 0.6181111920369021
5 0.011044324937738473 0.6674435542607429
6 0.009911194224472213 0.6920611798980335
7 0.00886769620328599 0.7270211216314639
8 0.007991505981678233 0.7543092983733916
9 0.0072619308847106465 0.7736829327506677
10 0.006584201231834002 0.7939791211459092
11 0.006237845715749434 0.8082544306870599


In [26]:
inference("valid_try.csv", "train", model)

Top 1 Accuracy: 0.5145631067961165
