## Libraries

In [None]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler 
import torchvision
from torchvision import transforms
from torchvision.utils import make_grid
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from random import randint
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import pandas as pd
import cv2

## Set Device

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

cuda:0


## Loading and Preparing Dataset

In [3]:
train_df = pd.read_pickle("images_l.pkl")
label_df = pd.read_pickle("labels_l.pkl")
test_df = pd.read_pickle("images_test.pkl")

In [4]:
cut = int(30000 * 0.9)
image_train_base = train_df[:cut]
label_train_base = label_df[:cut]

image_val_base = train_df[cut:]
label_val_base = label_df[cut:]

In [5]:
image_train = np.append(image_train_base, image_train_base, axis=0)
label_train = np.append(label_train_base, label_train_base, axis=0)

image_val = np.append(image_val_base, image_val_base, axis=0)
label_val = np.append(label_val_base, label_val_base, axis=0)

In [7]:
def denoise(src):
  blur_data = cv2.GaussianBlur(src, (3, 1), 0)
  retval, denoised_data = cv2.threshold(blur_data, 85, maxval=255, type=cv2.THRESH_TOZERO)

  return denoised_data

In [8]:
image_train = denoise(image_train)
image_val = denoise(image_val)
test_df = denoise(test_df)

In [9]:
class Transform_Dataset(Dataset):
  def __init__(self, X, 
               transform = transforms.Compose([transforms.ToPILImage(), transforms.Resize(224),
                  transforms.ToTensor(), transforms.Normalize(mean=(0.0,), std=(1.0,))])):
  
    self.X = X.astype(np.uint8)[:,:,:,None]
    # self.X = X
    self.transform = transform
    
  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return self.transform(self.X[idx]).to(device)

In [10]:
RandAffine = transforms.RandomAffine(degrees=25, translate=(0, 0), scale=(1.0, 1.0))

train_transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize(224),
  RandAffine.to(device),
  transforms.ToTensor(),
  transforms.Normalize(mean=(0.5,), std=(0.5,)).to(device)
])

In [11]:

train_dataset = Transform_Dataset(image_train, train_transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=30, shuffle=False)

val_dataset = Transform_Dataset(image_val, train_transform)
validation_loader = DataLoader(dataset=val_dataset, batch_size=30, shuffle=False)

train_label_loader = DataLoader(dataset=label_train, batch_size=30, shuffle=False)

test_dataset = Transform_Dataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=30, shuffle=False)

In [12]:
criterion = nn.CrossEntropyLoss().to(device)

## Optimizer, and Loss Functions

In [13]:

def predict(model, data_loader):
  total_output = []
  for i, data in enumerate(data_loader):
    if torch.cuda.is_available():
      output = model(data).cpu().detach().numpy()
    else:
      output = model(data).detach().numpy()
    # print(output.shape)

    for row_output in output:
      # print(row_output.shape)
      # find the max two number
      max1 = np.max(row_output[:10])
      # print(output_without_max1.shape)
      max2 = np.max(row_output[10:])
      # print("max1 = ", max1)
      # print("max2 = ", max2)
      # find index of max1 and max2
      max1_index = np.where(row_output == max1)
      max2_index = np.where(row_output == max2)
      # print(max1_index)
      # print(max2_index)
      row_output = np.zeros(row_output.shape, dtype=int)

      row_output[max1_index[0][0]] = 1
      row_output[max2_index[0][0]] = 1

      # print(np.where(row_output == 1))
      total_output.append(row_output)
    
  return np.array(total_output)

In [14]:

def accuracy(images_val_loader, labels_val, model):
  correct = 0
  total = 0
  output = predict(model,images_val_loader)
  i = 0
  labels = labels_val.astype(int)
  while i < len(output):
    #print(output[i])
    #print(labels[i])
    if (output[i] == labels[i]).all():
      correct += 1
    total += 1
    i += 1
  return correct/total

## VGG Network

In [15]:
vgg19_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 
                512, 512, 512, 512, 'M']

In [16]:
def get_vgg_layers(config, batch_norm):
    
    layers = []
    in_channels = 1
    
    for c in config:
        assert c == 'M' or isinstance(c, int)
        if c == 'M':
            layers += [nn.MaxPool2d(kernel_size = 2).to(device)]
        else:
            conv2d = nn.Conv2d(in_channels, c, kernel_size = 3, padding = 1).to(device)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(c).to(device), nn.ReLU(inplace = True).to(device)]
            else:
                layers += [conv2d, nn.ReLU(inplace = True).to(device)]
            in_channels = c
            
    return nn.Sequential(*layers).to(device)

In [17]:
class VGG(nn.Module):
    def __init__(self, features, output_dim):
        super().__init__()
        
        self.features = features
        
        self.avgpool = nn.AdaptiveAvgPool2d(7).to(device)
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096).to(device),
            nn.ReLU(inplace = True).to(device),
            nn.Dropout(0.5).to(device),
            nn.Linear(4096, 4096).to(device),
            nn.ReLU(inplace = True).to(device),
            nn.Dropout(0.5).to(device),
            nn.Linear(4096, output_dim).to(device),
        ).to(device)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        h = x.view(x.shape[0], -1)
        x = self.classifier(h)
        return x

In [18]:
vgg19_layers_bn = get_vgg_layers(vgg19_config, batch_norm = True)

## Train Loop

In [19]:
def train_vgg(model, epoches, image_train_loader, label_train_loader, image_val_loader, label_val):

  optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.9)
  scheduler = lr_scheduler.StepLR(optimizer,step_size = 10, gamma = 0.5)

  for epoch in range(epoches):
    image_train_dl_iterator = iter(image_train_loader)

    for i, label in enumerate(label_train_loader, 0):
      inputs = next(image_train_dl_iterator).to(device)
      inputs = inputs.float().to(device)

      optimizer.zero_grad()

      outputs = model(inputs).to(device)

      loss = criterion(outputs, label.to(device)).to(device)
      loss.backward()

      optimizer.step()
    scheduler.step()

    val_accuracy = accuracy(image_val_loader, label_val, model)

    print('[%d] loss: %.3f  val_accuracy: %.4f' % (epoch + 1, loss.item(), val_accuracy))

  return model

In [20]:
VGG19_bn = VGG(vgg19_layers_bn, 36)

In [None]:
model_vgg19_bn = train_vgg(VGG19_bn, 20, train_loader, train_label_loader, validation_loader, label_val)

## Prediction on Test Set

In [None]:
test_predictions = predict(VGG19_bn, test_loader)
d = {'Id' : [x for x in range(0, len(test_predictions))], 'Category' : [''.join([str(i) for i in x]) for x in test_predictions.tolist()]}
submission = pd.DataFrame(d)
submission.rename(columns={'Id':'# Id'}, inplace=True)
print(submission.shape)
submission.to_csv(path_or_buf='submission.csv', index=False)