## CNN Model Template (pytorch)

* Use `mnist` image dataset, build a CNN for image classification

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from abc import ABC, abstractmethod
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
print(torch.__version__)

Using TensorFlow backend.


1.0.0


In [2]:
train = pd.read_csv('../input/train.csv')
print(train.shape)

X_train, X_val, y_train, y_val = train_test_split(
    train.iloc[:, 1:].values,
    train.iloc[:, 0].values,
    test_size=0.3,
    random_state=2019
)

print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(42000, 785)
(29400, 784) (12600, 784) (29400,) (12600,)


In [3]:
num_classes = 10
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_val = X_val.reshape(X_val.shape[0], 1, img_rows, img_cols)
X_train = X_train / 255
X_val = X_val / 255

y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(29400, 1, 28, 28) (12600, 1, 28, 28) (29400, 10) (12600, 10)


In [4]:
class Network(nn.Module):
    """
    
    Network structure definition (pytorch based)
    
    Some nn.{Layer} could be replaced with functions F.{func}:
    nn.MaxPool2d, nn.Softmax, etc.
    
    Need to specifiy input_size and output_size for each layer.
    For conv layer, out_channel is equivalent to num_filters in keras conv layers.
    
    conv2d: output_size = (inp_size - kernel_size + padding * 2) / stride + 1
    flatten conv2d - maxpool2d: kernel_size * kernel_size * num_filters
    """
    def __init__(self, model_params):
        super().__init__()
        # parameters
        self.model_params = model_params
        dropout_rate = self.model_params['dropout_rate']
        
        # layers (number of hidden units are hard-coded)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(in_features=4*4*128, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=100)
        self.fc3 = nn.Linear(in_features=100, out_features=10)
        self.softmax = nn.Softmax()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool2(x)
        # reshape - Flatten layer
        x = x.view(-1, 4 * 4 * 128)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        out = self.softmax(self.fc3(x))
        return out

In [5]:
class BasicNN(ABC):
    @abstractmethod
    def initialize(self):
        pass
    
    @abstractmethod
    def fit(self):
        pass
    
    @abstractmethod
    def predict(self):
        pass
    
    @abstractmethod
    def save(self):
        pass
    
class BaiscCNN(BasicNN):
    """ Basic CNN model (pytorch) """
    def __init__(self, network, model_params):
        self._model = network
        self._out_size = list(self._model.parameters())[-1].size()[0]
        self.model_params = model_params
        
    def initialize(self):
        raise NotImplementedError("Method .initialized() not implemented.")
    
    def fit(self, X_train, y_train, X_valid, y_valid):
        self.train_params = self.model_params['train_params']
        self.nb_epochs = self.train_params['nb_epochs']
        self.batch_size = self.train_params['batch_size']
        self.loss_fn = self.model_params['loss_fn']
        self.optimizer = self.model_params['optimizer']
        
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda()
        y_train_tensor = torch.tensor(y_train, dtype=torch.float32).cuda()
        X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32).cuda()
        y_valid_tensor = torch.tensor(y_valid, dtype=torch.float32).cuda()
        train_tensor = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
        valid_tensor = torch.utils.data.TensorDataset(X_valid_tensor, y_valid_tensor)
        train_loader = torch.utils.data.DataLoader(train_tensor, batch_size=self.batch_size)
        valid_loader = torch.utils.data.DataLoader(valid_tensor, batch_size=self.batch_size)
        
        self._model.cuda()
        
        for epoch in range(self.nb_epochs):
            self._model.train()
            avg_train_loss = 0
            for x_batch, y_batch in train_loader:
                # back-prop: calculate loss, use optimizer to update trainable params
                y_pred = self._model(x_batch)
                loss = self.loss_fn(y_pred, y_batch)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                avg_train_loss += loss.item() / len(train_loader)
            self._model.eval()
            valid_preds_fold = np.zeros((X_valid_tensor.size(0)))
            avg_val_loss = 0
            for i, (x_batch, y_batch) in enumerate(valid_loader):
                # calculate validation loss
                y_pred = self._model(x_batch).detach()
                loss = self.loss_fn(y_pred, y_batch)
                avg_val_loss += loss.item() / len(valid_loader)
            
            print(f'Epoch {str(epoch+1):3s}/  {str(self.nb_epochs):3s}'
                  f' train_loss = {avg_train_loss:.5f} valid_loss = {avg_val_loss:.5f}')
        
    def predict(self, X):
        test_preds = np.zeros((X.shape[0], self._out_size))
        X_test_tensor = torch.tensor(X, dtype=torch.float32).cuda()
        test_tensor = torch.utils.data.TensorDataset(X_test_tensor)
        test_loader = torch.utils.data.DataLoader(test_tensor, batch_size=self.batch_size, shuffle=False)
        for i, (x_batch, ) in enumerate(test_loader):
            y_pred = self._model(x_batch).detach()
            test_preds[i*(batch_size):(i+1)*(batch_size)] = y_pred.cpu().numpy()
        return test_preds
           
    def save(self, saved_model_destination):
        self.saved_model_destination = saved_model_destination
        self._state_dict = self._model.state_dict()
        torch.save(self._state_dict, self.saved_model_destination)

In [6]:
model_params = {
    'train_params': {'batch_size': 128,
                     'patience'  : 10,
                     'nb_epochs' : 10},
    'loss_fn'     : nn.BCEWithLogitsLoss(reduction='sum'),
    'dropout_rate': 0.5,
}

network = Network(model_params)
model_params['optimizer'] = optim.Adam(network.parameters())

In [7]:
model = BaiscCNN(network, model_params)
model.fit(X_train, y_train, X_val, y_val)



Epoch 1  /  10  train_loss = 866.40916 valid_loss = 840.04294
Epoch 2  /  10  train_loss = 844.98896 valid_loss = 837.61317
Epoch 3  /  10  train_loss = 842.53257 valid_loss = 836.92374
Epoch 4  /  10  train_loss = 841.63790 valid_loss = 837.41705
Epoch 5  /  10  train_loss = 841.05909 valid_loss = 836.34534
Epoch 6  /  10  train_loss = 840.72285 valid_loss = 836.07681
Epoch 7  /  10  train_loss = 840.47727 valid_loss = 836.58970
Epoch 8  /  10  train_loss = 840.13513 valid_loss = 835.87560
Epoch 9  /  10  train_loss = 839.90453 valid_loss = 835.72339
Epoch 10 /  10  train_loss = 839.82816 valid_loss = 835.86436


In [8]:
model.save('./basic_cnn')