# Multiclassification using Kaggle Planet data

## Original script

Taken from https://www.kaggle.com/mratsim/starting-kit-for-pytorch-deep-learning and adapted to PyTorch 0.4

In [1]:
import pandas as pd
import numpy as np

import os
from PIL import Image

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms as tr
from torchvision.models import resnet34
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

In [2]:
IMG_PATH = 'planet/train-jpg/'
IMG_EXT = '.jpg'
TRAIN_DATA = 'planet/train_v2.csv'
ORIG_IMG_SIZE = 256
RESNET_ING_SIZE = 224
DEST_IMG_SIZE = 32

In [3]:
df = pd.read_csv(TRAIN_DATA)

In [4]:
X = df.image_name
tags = df['tags'].str.split()
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(tags).astype(np.float32)

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

In [6]:
class KaggleAmazonDataset(Dataset):
    def __init__(self, X, Y, img_path, img_ext, transform=None, mlb=None):
        self.img_path = img_path
        self.img_ext = img_ext
        self.transform = transform
        self.X = X
        self.Y = Y

    def __getitem__(self, index):
        img = Image.open(self.img_path + self.X.iloc[index] + self.img_ext)
        img = img.convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        label = torch.from_numpy(self.Y[index])
        return img, label

    def __len__(self):
        return len(self.X.index)

In [7]:
transformations = tr.Compose([tr.Resize(DEST_IMG_SIZE),tr.ToTensor()])
dset_train = KaggleAmazonDataset(X_train, Y_train, IMG_PATH, IMG_EXT, transformations)
dset_test  = KaggleAmazonDataset(X_test,  Y_test,  IMG_PATH, IMG_EXT, transformations)

In [8]:
train_loader = DataLoader(
    dset_train,
    batch_size=256,
    shuffle=True,
    num_workers=1,
    pin_memory=True
)

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, DEST_IMG_SIZE, kernel_size=3)
        self.conv2 = nn.Conv2d(DEST_IMG_SIZE, 64, kernel_size=3)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(2304, 256)
        self.fc2 = nn.Linear(256, 17)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(x.size(0), -1) # Flatten layer
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return torch.sigmoid(x)

model = Net().cuda()

In [10]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.33)

In [11]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.binary_cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx > 0 and batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [12]:
train(0)



In [13]:
train(1)



## Validation

In [14]:
len(dset_test)

4048

In [15]:
test_loader = DataLoader(
    dset_test,
    batch_size=len(dset_test),
    shuffle=False,
    num_workers=1
)

In [16]:
model.eval()
with torch.no_grad():
    x, y_true = next(iter(test_loader))
    y_pred = (model(x.cuda()).cpu() >= 0.5).long().numpy()

In [17]:
precision_score(y_true.numpy(), y_pred, average='micro')

0.8136116600790514

In [18]:
recall_score(y_true.numpy(), y_pred, average='micro')

0.5668674698795181

## TODO

1. Delete of given test set, it is useless
1. Data augmentation!
1. Resize to 64 instead of 32
1. Rehearse fast.ai "ResNet from scrach" - loading resnet does not work as advertized
1. Different loss functions for multilabel (what's the difference?)
1. Batching on validation, concat `y_pred` and `y_true` and submit to sklean metric
1. tqdm progress bars instead of printouts