# Training a ConvNet PyTorch

In this notebook, you'll learn how to use the powerful PyTorch framework to specify a conv net architecture and train it on the CIFAR-10 dataset.

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

import timeit

In [3]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{u'kaggle.json': '{"username":"dainiaozhou","key":"7000702f342133c038b6be791b599e60"}'}

In [8]:
!ls

kaggle.json  sample_data


In [0]:
!mkdir -p ~/.kaggle

In [10]:
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls ~/.kaggle

kaggle.json


In [11]:
!ls -l ~/.kaggle
!cat ~/.kaggle/kaggle.json

total 4
-rw------- 1 root root 67 Mar 20 21:17 kaggle.json
{"username":"dainiaozhou","key":"7000702f342133c038b6be791b599e60"}

In [0]:
!pip install -q kaggle
!pip install -q kaggle-cli

In [15]:
!kaggle competitions download -c humpback-whale-identification

Downloading sample_submission.csv to /content
  0% 0.00/498k [00:00<?, ?B/s]
100% 498k/498k [00:00<00:00, 34.7MB/s]
Downloading train.csv to /content
  0% 0.00/594k [00:00<?, ?B/s]
100% 594k/594k [00:00<00:00, 80.6MB/s]
Downloading test.zip to /content
100% 1.34G/1.35G [00:20<00:00, 78.4MB/s]
100% 1.35G/1.35G [00:20<00:00, 71.9MB/s]
Downloading train.zip to /content
100% 4.15G/4.16G [01:06<00:00, 78.6MB/s]
100% 4.16G/4.16G [01:06<00:00, 67.4MB/s]


In [29]:
ls

kaggle.json   sample_submission.csv  train.csv
[0m[01;34msample_data[0m/  test.zip               train.zip


In [0]:
!unzip train.zip -d train

In [31]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "./"]).decode("utf8"))

kaggle.json
sample_data
sample_submission.csv
test.zip
train
train.csv
train.zip



In [0]:
import os, sys
from IPython.display import display
from IPython.display import Image as _Imgdis
from PIL import Image
import numpy as np
from time import time
from time import sleep

In [0]:
import matplotlib.pyplot as plt

In [0]:
import os
import cv2
import math

import numpy as np # linear algebra
from PIL import Image
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [21]:
print(os.listdir("."))

['.config', 'sample_submission.csv', 'test.zip', 'train.csv', 'kaggle.json', 'train.zip', 'sample_data']


In [0]:
label_df = pd.read_csv('./train.csv')
submission_df = pd.read_csv('./sample_submission.csv')


In [0]:
label_df = label_df[label_df['Id'] != "new_whale"]

In [101]:
count = dict()
for index, row in label_df.iterrows():
  if row['Id'] not in count.keys():
    count[row['Id']] = 1
  else:
    count[row['Id']] += 1
    

good_ids = []
for id, c in count.items():
  if c >= 31:
    good_ids.append(id)
print(good_ids)
print(len(good_ids))

['w_6cda039', 'w_f765256', 'w_af367c3', 'w_0369a5c', 'w_5e8e218', 'w_3de579a', 'w_9c506f6', 'w_9b5109b', 'w_f0fe284', 'w_2b069ba', 'w_1ca9ab1', 'w_d405854', 'w_6822dbc', 'w_d72771c', 'w_fd3e556', 'w_23a388d', 'w_a9304b9', 'w_778e474', 'w_700ebb4', 'w_60ce6fc', 'w_789c969', 'w_5a2634c', 'w_8c25681', 'w_564a34b', 'w_17b0d3a', 'w_343f088', 'w_08630fd', 'w_88e4537']
28


In [102]:
dataset = []
for index, row in label_df.iterrows():
  if row['Id'] in good_ids:
    dataset.append((row['Id'],row['Image']))
print(dataset[1])
print(len(dataset))

('w_9c506f6', '00570db6b.jpg')
1235


In [103]:
label = dict()
class_label = 0
for whale_id,_ in dataset:
  if not whale_id in label.keys():
    label[whale_id] = class_label
    class_label += 1
print(label)

{'w_789c969': 23, 'w_1ca9ab1': 8, 'w_6cda039': 19, 'w_08630fd': 7, 'w_778e474': 4, 'w_a9304b9': 6, 'w_d72771c': 10, 'w_6822dbc': 2, 'w_9b5109b': 15, 'w_5a2634c': 26, 'w_17b0d3a': 24, 'w_3de579a': 0, 'w_fd3e556': 18, 'w_d405854': 12, 'w_9c506f6': 1, 'w_88e4537': 20, 'w_f0fe284': 9, 'w_8c25681': 25, 'w_564a34b': 13, 'w_700ebb4': 3, 'w_343f088': 21, 'w_f765256': 16, 'w_0369a5c': 5, 'w_af367c3': 22, 'w_5e8e218': 14, 'w_60ce6fc': 17, 'w_2b069ba': 27, 'w_23a388d': 11}


In [0]:
def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = int(math.floor(pad_diff[0]/2)), int(math.ceil(pad_diff[0]/2))
    l, r = int(math.floor(pad_diff[1]/2)), int(math.ceil(pad_diff[1]/2))
    if is_rgb:
        pad_width = ((t,b), (l,r), (0, 0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def pad_and_resize_cv(image_path, dataset, desired_size=224):
    img = cv2.imread('./' + dataset + '/' + image_path )
    
    pad_width = get_pad_width(img, max(img.shape))
    padded = np.pad(img, pad_width=pad_width, mode='constant', constant_values=0)
    
    resized = cv2.resize(padded, (desired_size,)*2).astype('uint8')
    
    return resized

def pad_and_resize_pil(image_path, dataset, desired_size=224):
    '''Experimental'''
    im = Image.open('./' + dataset + '/' + image_path)
    
    old_size = im.size
    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    resized = im.resize(new_size)
    im_array = np.asarray(resized)
    
    pad_width = get_pad_width(im_array, desired_size)
    padded = np.pad(im_array, pad_width=pad_width, mode='constant', constant_values=0)
    
    return padded


def pad_and_resize(image_path, dataset, desired_size=128, mode='cv'):
    if mode =='pil':
        return pad_and_resize_pil(image_path, dataset, desired_size)
    else:
        return pad_and_resize_cv(image_path, dataset, desired_size)

In [0]:
train_resized_imgs = []
test_resized_imgs = []

for id,image_path in dataset:
    train_resized_imgs.append(pad_and_resize(image_path, 'train'))

## for image_path in submission_df['Image']:
    ## test_resized_imgs.append(pad_and_resize(image_path, 'test'))

In [0]:
for i in range(len(train_resized_imgs)):
  train_resized_imgs[i] = np.transpose(train_resized_imgs[i], (2,1,0))
  train_resized_imgs[i] = torch.from_numpy(train_resized_imgs[i])

In [107]:
print(train_resized_imgs[0].shape)
print(len(train_resized_imgs))

torch.Size([3, 128, 128])
1235


In [0]:
import random

In [0]:
val_set = random.sample(xrange(1235),235)
train_set = list(set(range(1235)) - set(val_set))

# val_set = range(1000,1235)
# train_set = range(1000)

In [0]:
partitions = {"train":train_set, "validation": val_set}


In [0]:
import torch
from torch.utils import data

class Dataset(data.Dataset):
  'Characterizes a dataset for PyTorch'
  def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = train_resized_imgs[ID]
        y = self.labels[dataset[ID][0]]

        return X, y

In [0]:
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}


# Generators
training_set = Dataset(partitions['train'], label)
training_generator = data.DataLoader(training_set, **params)

validation_set = Dataset(partitions['validation'], label)
validation_generator = data.DataLoader(validation_set, **params)

In [0]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [113]:
# Verify that CUDA is properly configured and you have a GPU available

torch.cuda.is_available()

True

In [0]:
gpu_dtype = torch.cuda.FloatTensor
def train(model, loss_fn, optimizer, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(training_generator):
            x_var = Variable(x.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())


            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    #if loader.dataset.train:
        #print('Checking accuracy on validation set')
    #else:
        #print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for x, y in loader:
        x_var = Variable(x.type(gpu_dtype), volatile=True)

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [0]:
print_every = 10

# This is a little utility that we'll use to reset the model
# if we want to re-initialize all our parameters
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

In [0]:
# Train your model here, and make sure the output of this cell is the accuracy of your best model on the 
# train, val, and test sets. Here's some code to get you started. The output of this cell should be the training
# and validation accuracy on your best model (measured by validation accuracy).
gpu_dtype = torch.cuda.FloatTensor

# model_base = nn.Sequential (
#     nn.Conv2d(3,32, kernel_size = 7),
#     nn.LeakyReLU(inplace = True),
#     nn.Conv2d(32,32, kernel_size = 3),
#     nn.LeakyReLU(inplace = True),
#     nn.Conv2d(32,32, kernel_size = 3),
#     nn.LeakyReLU(inplace = True),
#     nn.BatchNorm2d(32),
#     nn.MaxPool2d(kernel_size=4, stride = 4),
#     nn.Dropout(0.30),
    
#     Flatten(),
#     nn.Linear(26912,1280),
#     nn.ReLU(inplace = True),
#     nn.Dropout(),
#     nn.Linear(1280,28),
#       )

model_base_1 = nn.Sequential (
    nn.Conv2d(3,32, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(32,32, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(32,64, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(64,128, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(kernel_size=4, stride = 4),
    
    
    nn.Dropout(0.20),
    
    
    
    Flatten(),
    nn.Linear(1152,128),
    nn.ReLU(inplace = True),
    nn.Dropout(0.3),
    nn.Linear(128,28)
)


model_1 = model_base_1.type(gpu_dtype)
loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.RMSprop(model_1.parameters(), lr = 1e-3)

train(model_1, loss_fn, optimizer, num_epochs=50)
check_accuracy(model_1, validation_generator)




In [117]:

check_accuracy(model_1, validation_generator)



Got 141 / 235 correct (60.00)


In [0]:
model_base_2 = nn.Sequential (
    nn.Conv2d(3,32, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(kernel_size=4, stride = 4),
    
    nn.Conv2d(32,64, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(64,64, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(64,128, kernel_size = 3),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    
    nn.Dropout(0.50),
    
    
    
    Flatten(),
    nn.Linear(512,128),
    nn.ReLU(inplace = True),
    nn.Dropout(0.5),
    nn.Linear(128,28)
)


model_2 = model_base_2.type(gpu_dtype)
loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.RMSprop(model_2.parameters(), lr = 1e-3)

train(model_2, loss_fn, optimizer, num_epochs=50)
check_accuracy(model_2, validation_generator)

In [50]:
check_accuracy(model_2, validation_generator)

Got 145 / 235 correct (61.70)




In [0]:
model_base_3 = nn.Sequential (
    nn.Conv2d(3,32, kernel_size = 7),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    
    nn.Conv2d(32,64, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=3, stride = 3),
    
    nn.Conv2d(64,128, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    
    nn.Dropout(0.40),
    
    
    
    Flatten(),
    nn.Linear(6272,1024),
    nn.ReLU(inplace = True),
    nn.Dropout(0.3),
    nn.Linear(1024,128),
    nn.ReLU(inplace = True),
    nn.Dropout(0.4),
    nn.Linear(128,28)
)


model_3 = model_base_3.type(gpu_dtype)
loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.RMSprop(model_3.parameters(), lr = 1e-3)

train(model_3, loss_fn, optimizer, num_epochs=50)
check_accuracy(model_3, validation_generator)

In [53]:
check_accuracy(model_3, validation_generator)



Got 106 / 235 correct (45.11)


In [0]:
model_base_4 = nn.Sequential (
    nn.Conv2d(3,16, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(16),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Conv2d(16,32, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(32),
    nn.Conv2d(32,64, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=3, stride = 3),
    
    nn.Conv2d(64,128, kernel_size = 5),
    nn.LeakyReLU(inplace = True),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(kernel_size=2, stride = 2),
    
    nn.Dropout(0.30),
    
    
    Flatten(),
    nn.Linear(6272,1024),
    nn.ReLU(inplace = True),
    nn.Dropout(0.4),
    nn.Linear(1024,128),
    nn.ReLU(inplace = True),
    nn.Dropout(0.5),
    nn.Linear(128,28)
)


model_4 = model_base_4.type(gpu_dtype)
loss_fn = nn.CrossEntropyLoss().type(gpu_dtype)
optimizer = optim.RMSprop(model_4.parameters(), lr = 1e-3)

train(model_4, loss_fn, optimizer, num_epochs=50)
check_accuracy(model_4, validation_generator)

In [55]:
check_accuracy(model_4, validation_generator)



Got 138 / 235 correct (58.72)


In [81]:
num_correct = 0
num_samples = 0
model_1.eval() # Put the model in test mode (the opposite of model.train(), essentially)
model_2.eval()
model_3.eval()
model_4.eval()
for x, y in validation_generator:
    x_var = Variable(x.type(gpu_dtype), volatile=True)

    scores_1 = model_1(x_var)
    scores_2 = model_2(x_var)
    scores_3 = model_3(x_var)
    scores_4 = model_4(x_var)
    
    _, preds_1 = scores_1.data.cpu().max(1)
    _, preds_2 = scores_2.data.cpu().max(1)
    _, preds_3 = scores_3.data.cpu().max(1)
    _, preds_4 = scores_4.data.cpu().max(1)
    
    
    for i in range(list(preds_1.shape)[0]):
      preds[i] = np.argmax(np.bincount([preds_1[i],preds_2[i],preds_3[i],preds_4[i]]))
    
    
    
    
    
    num_correct += (preds == y).sum()
    num_samples += preds.size(0)
acc = float(num_correct) / num_samples
print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

print(preds_1[0])

  


Got 152 / 235 correct (64.68)
tensor(18)
