In [None]:
from google.colab import drive
drive.mount("/content/drive/",force_remount=True)

from zipfile import ZipFile
! cp '/content/drive/MyDrive/ML_Capstone/drive-download.zip' '/content'
from zipfile import ZipFile
zip = ZipFile('/content/drive-download.zip')
zip.extractall()

Mounted at /content/drive/


In [None]:
# importing required packages
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
import numpy as np
import cv2
import pickle as pkl
import matplotlib.pyplot as plt
from torchvision import models
from PIL import Image
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
# Set device to cuda if available
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Dataset and Dataloaders

# Lazy Loading

data
-train
  - X
    - 1
      - rgb
        - 0.png
        - 1.png
        - 2.png
      - depth.npy
      - field_id.pkl
    
    - 2

    - ...
  - Y
    - 1.npy
    - 2.npy

-test

In [None]:
class LazyLoadDataset(Dataset):
  def __init__(self,path,train=True,transform=None):
    self.train = train
    self.transform = transform
    path = path + ("train/" if self.train else "test/")

    self.pathX = path + "X/"
    self.pathY = path + "Y/"

    self.data = os.listdir(self.pathX)

  def __getitem__(self,idx):
    f = self.data[idx]

    # X
    # read rgb images
    img0 = cv2.imread(self.pathX + f + "/rgb/0.png")/255


    if self.transform is not None:
      img0 = self.transform(img0)

    # read field ID
    field_id = pkl.load(open(self.pathX + f + "/field_id.pkl","rb"))

    if self.train==True:
      # Y 
      Y = np.load(self.pathY + f + ".npy")*1000
      return (img0.float().type(torch.float32),field_id),torch.from_numpy(Y).float().type(torch.float32)

    else: # if test, there is no Y
      return img0.float().type(torch.float32),field_id

  def __len__(self):
    return len(self.data)


# Normalization: Compute Mean and Std



In [None]:
train_dataset_raw = LazyLoadDataset("/content/",train=True,transform=transforms.Compose([
                       transforms.ToTensor(),])) 

In [None]:
(img_i, id_i),Y_i=train_dataset_raw[0]
img_i.shape

torch.Size([3, 224, 224])

In [None]:
means=[]
stds=[]
for i in range(3396):
  (img_i, id_i),Y_i=train_dataset_raw[i]
  means.append([torch.mean(img_i[0]).item(),torch.mean(img_i[1]).item(),torch.mean(img_i[2]).item()])
  stds.append([torch.std(img_i[0]).item(),torch.std(img_i[1]).item(),torch.std(img_i[2]).item()])

In [None]:
img_i

In [None]:
means

In [None]:
norm_mean=np.mean(means,axis =0)
norm_mean = list(norm_mean)
norm_mean

[0.3959879955305368, 0.41699021958139393, 0.43522507327886295]

In [None]:
norm_std=np.mean(stds,axis=0)
norm_std = list(norm_std)
norm_std

[0.21113784175510963, 0.1986706505394656, 0.1991702498400801]

# Train Dataset & Train loader

In [None]:
#transform=transforms.Compose([transforms.ToTensor()])
train_dataset = LazyLoadDataset("/content/",transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.RandomRotation(degrees=(0, 30)),
                       #transforms.ColorJitter(brightness=0.5, contrast=1, saturation=0.1, hue=0.5),
                       #transforms.RandomPerspective(distortion_scale=0.5, p=0.4),
                       transforms.Normalize(norm_mean, norm_std)])) 

In [None]:
(img_0, field_id), Y = train_dataset[0]

In [None]:
img_0.shape

torch.Size([3, 224, 224])

In [None]:
batch_size_train = 2
train_loader=DataLoader(train_dataset,batch_size=batch_size_train,shuffle=True)

In [None]:
Y/1000

tensor([ 0.0522,  0.0528,  0.1172,  0.0645, -0.0028,  0.0988,  0.0699, -0.0515,
         0.0879,  0.0538,  0.0143, -0.0535])

# Test Dataset & Test loader

In [None]:
test_dataset = LazyLoadDataset("/content/",train=False,transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.RandomRotation(degrees=(0, 30)),
                       #transforms.ColorJitter(brightness=0.5, contrast=1, saturation=0.1, hue=0.5),
                       #transforms.RandomPerspective(distortion_scale=0.5, p=0.4),
                       transforms.Normalize(norm_mean, norm_std),
                   ])) 

batch_size_test = 1
test_loader=DataLoader(test_dataset,batch_size=batch_size_test,shuffle=True)

# Model

In [None]:
class Res(nn.Module):
    def __init__(self, input_channels, output_size):
        super(Res, self).__init__()
        
        # We use Sequential for simplicity
        self.stack = nn.Sequential(
            models.resnet50(pretrained=True),
            nn.Linear(1000, output_size)
        )                               
    def forward(self, x):
        x = self.stack(x)
        return x

In [None]:
# Model input/output settings
input_channels = 3 # number of input channels
output_size=12

In [None]:
def train(epoch, model, optimizer,loader=train_loader):
    """
    Train the model for one epoch

    Args:
        epoch (int): current epoch
        model (nn.Module): model to train
        optimizer (torch.optim): optimizer to use
    """
    model.train()

    for batch_idx,((data, id),target) in enumerate(loader):
        # send to device
        data, target = data.to(device), target.to(device)

        # consider passing different data augmentation in training
        # data = F.invert()
        # data = F.adjust_sharpness(data,sharpness_factor = 0.5)
        # data = F.adjust_hue(data,hue_factor = 0.9)
        # data = F.adjust_saturation(data,saturation_factor = 0.5)
        
        data = data.view(len(id),3,224, 224)

        # make sure we erase all the gradients before computing new ones
        optimizer.zero_grad() 
        
        # forward propagation
        output = model(data)

        #RMSE loss
        loss = torch.sqrt(nn.functional.mse_loss(output,target))
        
        # backward propagation
        loss.backward()
        optimizer.step()
        
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(loader.dataset),
                100. * batch_idx / len(loader), loss))
    return loss

In [None]:
def test(model):
    """
    Test the model

    Args:
        model (nn.Module): model to test
      
    """
    model.eval()
    ids = []
    preds = []
    with torch.no_grad():
      for batch_idx,(data, id) in enumerate(test_loader):
        # send to device
        data = data.to(device)          
        pred = model(data)
        for i in range(len(id)):
          ids.append(id[i])
          preds.append(np.array(pred[i].cpu()/1000,dtype="float64"))

    return ids, preds  

# Cross-Validation & Grid-Search for determining hyperparameters

What should hyperparameters in the models be? We could perform grid-search on the whole train dataset and pick the best-performing hyerparameters. But to avoid over-fitting on the train dataset, we split the whole train dataset into 9:1.
*   cross_val_train_set (90% of train dataset)
*   cross_val_test_set (10% of train dataset)

We train our model on the cross_val_train_set and test our model on cross_val_test_set. We pick the best-performing hyperparameters on the cross_val_test_set.

After all these steps, we train our model using the picked hyperparameters on the whole train dataset. 

In [None]:
cross_val_train_set, cross_val_test_set = train_test_split(train_dataset, test_size=0.1)

In [None]:
# Sanity Check
len(cross_val_train_set) == int(len(train_dataset)*0.9)

True

In [None]:
batch_size_cross_val_train = 4
batch_size_cross_val_test = len(cross_val_test_set) 

In [None]:
cross_val_train_loader=DataLoader(cross_val_train_set,batch_size=batch_size_cross_val_train,shuffle=True)
cross_val_test_loader=DataLoader(cross_val_test_set,batch_size=batch_size_cross_val_test,shuffle=True)

In [None]:
len(cross_val_test_loader)

1

In [None]:
def cross_val(model):
    """
    Test the model

    Args:
        model (nn.Module): model to test
      
    """
    model.eval()
    # We can just edit on the code for train(). We should delete the bakward propagation part and optimizer part.

    with torch.no_grad():
      for batch_idx,((data, id),target) in enumerate(cross_val_test_loader):
          # send to device
          if batch_idx==0:
            data, target = data.to(device), target.to(device)

            # forward propagation
            output = model(data)

            #RMSE loss
            loss = torch.sqrt(nn.functional.mse_loss(output,target))
            # Since batch_size_cross_val_test = len(cross_val_test_set), we have only one batch. So we can just return the loss on this batch.
            return loss.item()

In [None]:
def SGD_hyper_tuning(lr_list,momentum_list):  
  for j in momentum_list:
    for i in lr_list:
      model_res = Res(input_channels, output_size) # create Res model
      model_res.to(device)
      print("lr=",i,"momentum=",j)
      train(epoch=0, model=model_res, optimizer=torch.optim.SGD(model_res.parameters(), lr=i,momentum=j),loader=cross_val_train_loader)
      print("Validation Performance:",cross_val(model_res))

In [None]:
lr_list = [1e-5,1e-4,1e-3,1e-2,1e-1]
momentum_list = [0.99,0.97,0.95,0.93,0.91]
SGD_hyper_tuning(lr_list,momentum_list)

lr= 1e-05 momentum= 0.99
Validation Performance: 13.263710975646973
lr= 0.0001 momentum= 0.99
Validation Performance: 7.795881271362305
lr= 0.001 momentum= 0.99
Validation Performance: 10.428595542907715
lr= 0.01 momentum= 0.99
Validation Performance: 15.773784637451172
lr= 0.1 momentum= 0.99
Validation Performance: nan
lr= 1e-05 momentum= 0.97
Validation Performance: 23.86100196838379
lr= 0.0001 momentum= 0.97
Validation Performance: 8.416244506835938
lr= 0.001 momentum= 0.97
Validation Performance: 10.035009384155273
lr= 0.01 momentum= 0.97
Validation Performance: 16.189674377441406
lr= 0.1 momentum= 0.97
Validation Performance: 25.329208374023438
lr= 1e-05 momentum= 0.95
Validation Performance: 50.94879150390625
lr= 0.0001 momentum= 0.95
Validation Performance: 8.552640914916992
lr= 0.001 momentum= 0.95
Validation Performance: 6.945775032043457
lr= 0.01 momentum= 0.95
Validation Performance: 9.886741638183594
lr= 0.1 momentum= 0.95
Validation Performance: 16.081586837768555
lr= 1e-0



Top 3 Performance
1.  lr = 1e-3, momentum = 0.91, loss on validation set = 5.897
2.  lr = 1e-3, momentum = 0.93, loss on validation set = 6.93
3.  lr = 1e-3, momentum = 0.95, loss on validation set = 6.94





In [None]:
lr_list = [8e-4,9e-4,1e-3,2e-3,3e-3]
momentum_list = [0.99,0.97,0.95,0.93,0.91]
SGD_hyper_tuning(lr_list,momentum_list)

lr= 0.0008 momentum= 0.99
Validation Performance: 13.585453987121582
lr= 0.0009 momentum= 0.99
Validation Performance: 13.811532020568848
lr= 0.001 momentum= 0.99
Validation Performance: 10.829007148742676
lr= 0.002 momentum= 0.99
Validation Performance: 14.18579387664795
lr= 0.003 momentum= 0.99
Validation Performance: 19.600557327270508
lr= 0.0008 momentum= 0.97
Validation Performance: 7.617082595825195
lr= 0.0009 momentum= 0.97
Validation Performance: 7.063784122467041
lr= 0.001 momentum= 0.97
Validation Performance: 7.547131538391113
lr= 0.002 momentum= 0.97
Validation Performance: 8.126799583435059
lr= 0.003 momentum= 0.97
Validation Performance: 11.71267032623291
lr= 0.0008 momentum= 0.95
Validation Performance: 5.754782676696777
lr= 0.0009 momentum= 0.95
Validation Performance: 6.255874156951904
lr= 0.001 momentum= 0.95
Validation Performance: 6.567749500274658
lr= 0.002 momentum= 0.95
Validation Performance: 6.9812517166137695
lr= 0.003 momentum= 0.95
Validation Performance: 8.

Top 5 Performance

1.   lr = 3e-3, momentum = 0.91, loss on validation set = 5.72
1.   lr = 8e-4, momentum = 0.95, loss on validation set = 5.75
2.   lr = 9e-4, momentum = 0.91, loss on validation set = 5.82

3.   lr = 9e-4, momentum = 0.95, loss on validation set = 6.25

4.   lr = 1e-3, momentum = 0.95, loss on validation set = 6.56

#lr=3e-3, momentum=0.91, 10 epoch, train_batch_size = 2, with lr decay

In [None]:
model_res = Res(input_channels, output_size) # create Res model
model_res.to(device)
optimizer = torch.optim.SGD(model_res.parameters(), lr=3e-3,momentum=0.91) 
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

print("lr=",3e-3,"momentum=",0.91)
for epoch in range(10):
  train(epoch, model=model_res, optimizer=torch.optim.SGD(model_res.parameters(), lr=3e-3,momentum=0.91),loader=train_loader)
  scheduler.step()

lr= 0.003 momentum= 0.91






In [None]:
torch.save(model_res,'/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m2.pt')
model_res = torch.load('/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m2.pt')
model_res.eval()
ids,preds = test(model_res)



outfile = '/content/drive/MyDrive/ML_Capstone/submission_v4_res_3img0_m2.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

df = pd.concat([pd.DataFrame(ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

# lr=3e-3, momentum=0.91, 20 epoch, train_batch_size = 2, no lr decay

In [None]:
model_res = Res(input_channels, output_size) # create Res model
model_res.to(device)
optimizer = torch.optim.SGD(model_res.parameters(), lr=3e-3,momentum=0.91) 

print("lr=",3e-3,"momentum=",0.91)
for epoch in range(20):
  train(epoch, model=model_res, optimizer=torch.optim.SGD(model_res.parameters(), lr=3e-3,momentum=0.91),loader=train_loader)

lr= 0.003 momentum= 0.91


In [None]:
torch.save(model_res,'/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m3.pt')
model_res = torch.load('/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m3.pt')
model_res.eval()
ids,preds = test(model_res)

import pickle
import pandas as pd

outfile = '/content/drive/MyDrive/ML_Capstone/submission_v4_res_3img0_m3.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

df = pd.concat([pd.DataFrame(ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Conclusion for SGD_hyperparameter choosing: 
When lr=3e-3, momentum=0.91, 20 epoch, train_batch_size = 2, no lr decay, we get best score on Kaggle.

In [None]:
def Adam_hyper_tuning(lr_list):  
  for i in lr_list:
    model_res = Res(input_channels, output_size) # create Res model
    model_res.to(device)
    print("lr=",i)
    train(epoch=0, model=model_res, optimizer=torch.optim.Adam(model_res.parameters(), lr=i),loader=cross_val_train_loader)
    print("Validation Performance:",cross_val(model_res))

In [None]:
lr_list = [1e-5,1e-4,1e-3,1e-2,1e-1]
Adam_hyper_tuning(lr_list)

lr= 1e-05
Validation Performance: 13.924497604370117
lr= 0.0001
Validation Performance: 8.159941673278809
lr= 0.001
Validation Performance: 8.506036758422852
lr= 0.01
Validation Performance: 26.6864013671875
lr= 0.1
Validation Performance: 22.735260009765625


Top 2 Performance
1.  lr = 1e-4,  loss on validation set = 8.15
2.  lr = 1e-3,  loss on validation set = 8.5

In [None]:
lr_list = [1e-4,2e-4,3e-4,4e-4,5e-4]
Adam_hyper_tuning(lr_list)

lr= 0.0001
Validation Performance: 6.761453628540039
lr= 0.0002
Validation Performance: 7.139166355133057
lr= 0.0003
Validation Performance: 6.6652913093566895
lr= 0.0004
Validation Performance: 7.405411243438721
lr= 0.0005
Validation Performance: 7.915044784545898


Top 1 Performance
1.  lr = 3e-4,  loss on validation set = 6.66

In [None]:
model_res = Res(input_channels, output_size) # create Res model
model_res.to(device)
optimizer = torch.optim.Adam(model_res.parameters(), lr=3e-4) 

print("lr=",3e-4)
for epoch in range(20):
  train(epoch, model=model_res, optimizer=torch.optim.Adam(model_res.parameters(), lr=3e-4),loader=train_loader)

lr= 0.0003


In [None]:
torch.save(model_res,'/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m4.pt')
model_res = torch.load('/content/drive/MyDrive/ML_Capstone/v4_res_3img0_m4.pt')
model_res.eval()
ids,preds = test(model_res)

import pickle
import pandas as pd

outfile = '/content/drive/MyDrive/ML_Capstone/submission_v4_res_3img0_m4.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']

df = pd.concat([pd.DataFrame(ids), pd.DataFrame.from_records(preds)], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Conclusion for Adam_hyperparameter choosing: Although from results here, ADAM with best parameters perform better than SGD, on Kaggle, SGD still wins over ADAM. Maybe it's because Kaggle only uses 60% of testing data or overfitting problem.