In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pylab
import os

import torchvision
from torchvision import datasets, transforms
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Subset, DataLoader
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score
from IPython.display import clear_output
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

%matplotlib inline

In [4]:
DATA_FOLDER = './data/merged/'
IMAGE_FOLDER = os.path.join(DATA_FOLDER, 'img/')
DESCRIPTION_PATH = os.path.join(DATA_FOLDER, 'description.csv')
BATCH_SIZE = 20

description = pd.read_csv(DESCRIPTION_PATH)

In [5]:
class ImageDataset(Dataset):
    def __init__(self, data_folder, description_csv):
        
        self.data_folder = data_folder
        self.description = pd.read_csv(description_csv)

    def __len__(self):
        return self.description.shape[0]

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.to_list()

        images_to_load = list([self.description.iloc[index]['image_name']])
        classes = self.description.iloc[index]['class_id']

        images = []
        for image_relative_path in images_to_load:
            images.append(Image.open(os.path.join(self.data_folder, image_relative_path)))
        return images, classes

In [6]:
class ImageSubset(Dataset):
    def __init__(self, subset, transform):
        self.subset = subset
        self.transform = transform

    def __len__(self):
        return len(self.subset)

    def __getitem__(self, index):
        images, classes = self.subset[index]

        result = torch.tensor([])
        for image in images:
            result = torch.cat([result, self.transform(image)])

        return result, classes

In [7]:
train_indices, test_indices = train_test_split(range(len(description)), test_size=0.3)

In [8]:
train_transform = torchvision.transforms.Compose([
    transforms.Resize(400),
    transforms.CenterCrop(350),
    transforms.RandomHorizontalFlip(),
    transforms.RandomPerspective(distortion_scale=0.3, p=1),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [9]:
np.unique(description.values[:,1], return_counts=True)

(array([0, 1], dtype=object), array([1146, 2006], dtype=int64))

In [10]:
dataset = ImageDataset(IMAGE_FOLDER, DESCRIPTION_PATH)

train_dataset = ImageSubset(Subset(dataset, train_indices), train_transform)
test_dataset = ImageSubset(Subset(dataset, test_indices), train_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
false_count = np.unique(description.values[:, 1], return_counts=True)[1][0]
true_count = np.unique(description.values[:, 1], return_counts=True)[1][1]
total_count = false_count + true_count

weights = [false_count / total_count, true_count / total_count]

In [12]:
weights

[0.3635786802030457, 0.6364213197969543]

In [15]:
model = torchvision.models.resnet101(pretrained=True)
model.fc = nn.Linear(2048, 2)

layers_count = len(list(model.parameters()))
for i, parameter in enumerate(model.parameters()):
    if i < layers_count - 5:
        parameter.requires_grad = False
model = model.to(device)

criterion = nn.CrossEntropyLoss(weight=torch.tensor(weights).float())
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5, nesterov=True)

In [18]:
def train_model(n_epoch, model, criterion, optimizer):
    iteration = 0
    loss_history = []
    for epoch in range(n_epoch):
        epoch_loss = []
        for X_batch, y_batch in train_loader:
            model.train()
            
            optimizer.zero_grad()
            logits = model(X_batch.to(device)).cpu()
            loss = criterion(logits, y_batch)
            epoch_loss.append(loss.item())
            
            loss.backward()
            optimizer.step()
            print("Iteration ", iteration + 1, ', loss: ', epoch_loss[-1], sep='')
            iteration += 1

        loss_history.append(np.mean(epoch_loss))
        print("Epoch ", epoch + 1, ', loss: ', loss_history[-1], sep='')

In [21]:
train_model(15, model, criterion, optimizer)

Iteration 1, loss: 0.6327765583992004
Iteration 2, loss: 0.512994647026062
Iteration 3, loss: 0.5218090415000916
Iteration 4, loss: 0.566226065158844
Iteration 5, loss: 0.408704549074173
Iteration 6, loss: 0.707396924495697
Iteration 7, loss: 0.6957191228866577
Iteration 8, loss: 0.6727884411811829
Iteration 9, loss: 0.6895254850387573
Iteration 10, loss: 0.6018190383911133
Iteration 11, loss: 0.5686454772949219
Iteration 12, loss: 0.5151491761207581
Iteration 13, loss: 0.5409913659095764
Iteration 14, loss: 0.5447427034378052
Iteration 15, loss: 0.6118852496147156
Iteration 16, loss: 0.4950386583805084
Iteration 17, loss: 0.3993721902370453
Iteration 18, loss: 0.5552801489830017
Iteration 19, loss: 0.46690985560417175
Iteration 20, loss: 0.4117988646030426
Iteration 21, loss: 0.6021331548690796
Iteration 22, loss: 0.5762264728546143
Iteration 23, loss: 0.4372714161872864
Iteration 24, loss: 0.5257930159568787
Iteration 25, loss: 0.4582720696926117
Iteration 26, loss: 0.504985451698303

In [23]:
torch.save(model.state_dict(), 'model_resnet101.pt')