In [19]:
cd ~/Documents/dl/LeafClassification

/zhome/68/a/154632/Documents/dl/LeafClassification


In [59]:
%matplotlib inline
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import time
import copy
import random
import pickle

from IPython.display import clear_output
from skimage.io import imread
from skimage.transform import resize

import data_utils

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
from torch.nn import Linear, GRU, Conv2d, Dropout, MaxPool2d, BatchNorm1d
from torch.nn.functional import relu, elu, relu6, sigmoid, tanh, softmax
from skimage import io
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights
# from torchvision.nets.quantization import resnet50, ResNet50_QuantizedWeights
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler


In [21]:
use_cuda = torch.cuda.is_available()
print("Running GPU.") if use_cuda else print("No GPU available.")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

Running GPU.


# Load dataloaders and net

In [22]:
NUM_CLASSES = 99

In [23]:
class SelectItem(nn.Module):
    def __init__(self, item_index):
        super(SelectItem, self).__init__()
        self._name = 'selectitem'
        self.item_index = item_index

    def forward(self, inputs):
        return inputs[self.item_index]

In [24]:
class LeafDataset(Dataset):
    """Leaf dataset."""

    def __init__(self, csv_file, root_dir, transform=None, train=False, test=False):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.leafs_df = pd.read_csv(csv_file)
        self.data = self.leafs_df.values
        self.root_dir = root_dir
        self.transform = transform
        self.train = train
        self.test = test

        self.parse_data()

    def parse_data(self):
        self.ids = np.array(self.data[:, 0], dtype=int)
        if self.train:
            self.labels = pd.Categorical(pd.factorize(self.leafs_df.species)[0])
            self.labels = np.array(self.labels)
            self.label_dummies = pd.get_dummies(self.leafs_df.species)
            self.label_dummies = np.array(self.label_dummies)
            self.species = np.array(self.data[:, 1], dtype=str)
            self.margins = np.array(self.data[:, 2:66], dtype=float)
            self.shapes = np.array(self.data[:, 66:130], dtype=float)
            self.textures = np.array(self.data[:, 130:], dtype=float)
        if self.test:
            self.labels = np.empty((len(self.data)), dtype=int)
            self.label_dummies = np.empty((len(self.data), NUM_CLASSES), dtype=int)
            self.species = np.empty((len(self.data)), dtype=str)
            self.margins = np.array(self.data[:, 1:65], dtype=float)
            self.shapes = np.array(self.data[:, 65:129], dtype=float)
            self.textures = np.array(self.data[:, 129:], dtype=float)


    def __len__(self):
        return len(self.leafs_df)

    def __getitem__(self, idx):
        # get the image
        img_name = str(self.ids[idx]) + '.jpg'
        img_path = os.path.join(self.root_dir, img_name)
        image = io.imread(img_path)

        # no matter what happens, we need to padd all the images to the same dimensions, so that we can resize them without distorting them
        image = data_utils.pad2square(image)  # Make the image square
        image = resize(image, output_shape=(128, 128), mode='reflect', anti_aliasing=True)  # resizes the image

        # augment the image if chosen to
        if self.transform:
            image = self.transform(image)

        # get the other data
        id_ = self.ids[idx]
        margin = self.margins[idx]
        shape = self.shapes[idx]
        texture = self.textures[idx]
        label = self.labels[idx]
        label_dummy = self.label_dummies[idx]
        specie = self.species[idx]
            
        return image, margin, shape, texture, label, label_dummy, specie, id_

In [25]:
test_csv = 'test.csv'
root_dir = 'images/'
test_transform = transforms.Compose([transforms.ToTensor()])
batch_size = 32
testset = LeafDataset(test_csv, root_dir, transform=test_transform, train=False, test=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)

In [26]:
height, width, channels = 128,128,1

# Keep track of features to output layer
conv_feature_size = 768
vector_input_size = 128
vector_feature_size = 128
rnn_input_size = 64 # must be the same as the x_shape channels
rnn_feature_size = 128
features_cat_size = 1024

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # prepare pretrained net - image
        resnet_weights = ResNet50_Weights.DEFAULT
        self.resnet = resnet50(weights=resnet_weights)
        self.resnet.eval()
        for param in self.resnet.parameters():
            param.requiresGrad = False
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_ftrs, conv_feature_size)
        self.preprocess = resnet_weights.transforms()

        ## margin, texture
        self.fc2 = nn.Sequential(
            nn.Linear(in_features=vector_input_size,
                    out_features=vector_feature_size,
                    bias=False),
            # nn.BatchNorm1d(128),
            nn.Dropout(0.5)
        )


        # shape
        self.recurrent = nn.Sequential(
            nn.GRU(input_size=rnn_input_size,     # The number of expected features in the input x
                    hidden_size=rnn_feature_size, # The number of features in the hidden state h
                    num_layers=2),                # Number of recurrent layers
            SelectItem(0)
        )

        # classification
        self.l_out = nn.Sequential(
            nn.Linear(in_features=features_cat_size,
                        out_features=NUM_CLASSES,
                        bias=True)
        )
        
        
    def forward(self, X):
        X_img, x_margin, x_shape, x_texture = X
        X_img = self.preprocess(X_img)
        features = []

        features_img = self.resnet(X_img)
        features.append(features_img)

        x = torch.cat((x_margin, x_texture), dim=1)  # if you want to use features as feature vectors
        x = self.fc2(x)
        features_vector = x
        features.append(features_vector)
        
        features_rnn = self.recurrent(x_shape)
        features.append(features_rnn)
        
        features_final = torch.cat(features, dim=1)
        
        out = self.l_out(features_final)
        return out, F.softmax(out, dim=1)

net = Net()
net = net.float()
if use_cuda:
    net.to(device)
print(net)

Net(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): 

In [17]:
net.load_state_dict(torch.load("/zhome/68/a/154632/Documents/dl/LeafClassification/models/my_model.pth"))

<All keys matched successfully>

# Submission to Kaggle

First we have to make test set predictions, then we have to place the output in the submission file and then upload to Kaggle to get our score! You can upload up to 5 submissions per day.

In [76]:
# show intermediate results
# Compute the val accuracy
ids_test, preds_test = [], []
net.eval()  # testing mode
for (i, batch) in enumerate(test_loader):
    # extract subfields
    image, margins, shapes, textures, _, _, _, ids = batch
    num = len(ids)
    image = np.repeat(image, 3, axis=1)
    
    # convert to float and move to cuda
    image = image.to(device).float()
    margins = margins.to(device).float()
    shapes = shapes.to(device).float()
    textures = textures.to(device).float()

    # split input and label
    inputs = image, margins, shapes, textures

    _, y_out = net(inputs)
    y_out = y_out.detach().cpu().numpy()
    # print(np.shape(y_out))

    ids = ids.detach().cpu().numpy()
    ids_test.extend(ids)
    if i!=len(y_out):
        # in case of the last batch, num will be less than batch_size
        y_out = y_out[:num]
    preds_test.append(y_out)
    # print(np.shape(preds_test))

preds_test = np.concatenate(preds_test, axis=0)
# print(len(ids_test), ids_test)
# print(len(preds_test), preds_test)

assert len(ids_test) == len(preds_test)

## Make submission file

In [77]:
with open('pickles/data.pickle', 'rb') as f:
    data = pickle.load(f)

In [78]:
preds_df = pd.DataFrame(preds_test, columns=data.le.classes_)
ids_test_df = pd.DataFrame(ids_test, columns=["id"])
submission = pd.concat([ids_test_df, preds_df], axis=1)
submission.to_csv("/zhome/68/a/154632/Documents/dl/LeafClassification/submissions/submission.csv", index=False, encoding="utf-8")

# below prints the submission, can be removed and replaced with code block below
submission.head(5)

Unnamed: 0,id,Acer_Capillipes,Acer_Circinatum,Acer_Mono,Acer_Opalus,Acer_Palmatum,Acer_Pictum,Acer_Platanoids,Acer_Rubrum,Acer_Rufinerve,...,Salix_Fragilis,Salix_Intergra,Sorbus_Aria,Tilia_Oliveri,Tilia_Platyphyllos,Tilia_Tomentosa,Ulmus_Bergmanniana,Viburnum_Tinus,Viburnum_x_Rhytidophylloides,Zelkova_Serrata
0,4,0.009461,0.010169,0.008479,0.010233,0.008833,0.010082,0.010518,0.009891,0.010216,...,0.009742,0.010285,0.009651,0.01192,0.00974,0.008265,0.009737,0.010058,0.011304,0.009004
1,7,0.010028,0.010686,0.008188,0.010322,0.008344,0.010944,0.009602,0.011802,0.009685,...,0.010285,0.010338,0.009693,0.010183,0.011543,0.008781,0.009415,0.01101,0.011018,0.009893
2,9,0.009843,0.009203,0.008562,0.009201,0.010173,0.010908,0.010285,0.010283,0.010202,...,0.011597,0.011808,0.010987,0.00964,0.011667,0.009608,0.008621,0.008901,0.010974,0.009978
3,12,0.010037,0.010876,0.009165,0.009888,0.009773,0.009631,0.009467,0.010347,0.010207,...,0.009857,0.011638,0.01032,0.00968,0.011227,0.008964,0.009006,0.00937,0.011292,0.010055
4,13,0.009072,0.009492,0.00981,0.010342,0.009651,0.009695,0.0099,0.00919,0.010184,...,0.01013,0.011286,0.009196,0.011311,0.009381,0.008373,0.010328,0.009733,0.011071,0.009233


## Upload submission

1. Go to [`https://www.kaggle.com/c/leaf-classification/submit`](https://www.kaggle.com/c/leaf-classification/submit)
3. Click or drop your submission here (writing a description is good practice)
4. Submit and look at where you are on the leaderboard.

Success! 