## Check GPU
Chose Runtime -> Reset all runtime to get GPU NVIDIA Tesla P100 if provided GPU is not P100.

In [2]:
!nvidia-smi

Mon Dec 16 15:00:32 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.44       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

## Mount Drive to VM

In [4]:
from google.colab import drive
drive.mount('/content/drive')

DATA_PATH = '/content/drive/My Drive/Thesis/Code/ML/mnist_data'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import Libraries

In [None]:
import os
import time 
import datetime

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline

import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.utils.data import DataLoader

from torchvision import datasets
import torchvision.transforms as transforms

from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")

## Data Loader

In [None]:
transform = transforms.Compose([
     transforms.ToTensor(),  
     transforms.Normalize((0.1307,), (0.3081,))                    
])

# Data 
data = datasets.MNIST(root="./data/", 
                         train=True, 
                         transform=transform, 
                         download=True)

# Get test set
test_set = datasets.MNIST('../mnist_data', 
                          download=True, 
                          train=False,
                          transform=transform)

# Split data to train_set, dev_set
train_set, val_set = torch.utils.data.random_split(data, lengths=[50000, 10000])

# Train_loader
train_loader = DataLoader(train_set, 
                          batch_size=64, 
                          shuffle=True,
                          pin_memory=True)
# Dev_loader
val_loader = DataLoader(val_set, 
                        batch_size=64, 
                        shuffle=False, 
                        pin_memory=True)

# Test Loader
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

## Model

In [None]:
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.relu1 = nn.ReLU()
        
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.relu2 = nn.ReLU()
        
        self.conv2_drop = nn.Dropout2d(p=0.5)
        
        self.fc1 = nn.Linear(320, 50)
        self.fc1_relu = nn.ReLU()
        self.fc1_drop = nn.Dropout2d(p=0.5)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu1(x)
        
        x = self.conv2(x)
        x = self.conv2_drop(x)
        x = self.pool2(x)
        x = self.relu2(x)
        
        x = x.view(-1, 320)
        
        x = self.fc1(x)
        x = self.fc1_relu(x)
        x = self.fc1_drop(x)
        x = self.fc2(x)
        return x

In [9]:
model = MNISTNet()
print(model)

MNISTNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu2): ReLU()
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc1_relu): ReLU()
  (fc1_drop): Dropout2d(p=0.5, inplace=False)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)


In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                            lr=3e-4,
                            betas=[0.5, 0.999])

# Using cuda
model = model.to(device)
criterion = criterion.to(device)

## Training

In [12]:
# Some setting
num_epochs = 30
best_model_path = './best_model.data'
best_val_loss = float("inf") 
step = 0
print_every = 100
num_batch_train = len(train_loader)
num_batch_val = len(val_loader)

# Training
for epoch in range(0, num_epochs):
    # Set mode train for using dropout
    model.train()
    running_loss = 0

    for batch_idx, (batch_data, batch_label) in enumerate(train_loader):
      # Move to GPU if device = "cuda"
      batch_data = batch_data.to(device)
      batch_label = batch_label.to(device)

      step += 1
      # Forward phase 
      out = model(batch_data)

      # Calculate loss 
      loss = criterion(out, batch_label)

      # Backward and update parameters
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      running_loss += loss.data
    train_loss = running_loss / num_batch_train
    
    # Set model eval 
    model.eval()
    val_loss = 0
    for batch_idx, (batch_data, batch_label) in enumerate(val_loader):
      # Move to GPU if device = "cuda"
      batch_data = batch_data.to(device)
      batch_label = batch_label.to(device)

      # Forward
      out = model(batch_data)

      # Calculate loss
      v_loss = criterion(out, batch_label)
      val_loss += v_loss.data 
    val_loss /= num_batch_val 

    # Save model if it better than current best model
    if val_loss < best_val_loss:
      best_val_loss = val_loss
      torch.save(model.state_dict(), best_model_path)
    
    # Log
    print("Epoch {}, train_loss: {:0.4f}, val_loss: {:0.4f}".format(epoch + 1, train_loss, val_loss))

Epoch 1, train_loss: 0.2748, val_loss: 0.1023
Epoch 2, train_loss: 0.2379, val_loss: 0.0887
Epoch 3, train_loss: 0.2167, val_loss: 0.0819
Epoch 4, train_loss: 0.2005, val_loss: 0.0742
Epoch 5, train_loss: 0.1866, val_loss: 0.0710
Epoch 6, train_loss: 0.1839, val_loss: 0.0679
Epoch 7, train_loss: 0.1763, val_loss: 0.0628
Epoch 8, train_loss: 0.1678, val_loss: 0.0634
Epoch 9, train_loss: 0.1612, val_loss: 0.0622
Epoch 10, train_loss: 0.1579, val_loss: 0.0622
Epoch 11, train_loss: 0.1485, val_loss: 0.0588
Epoch 12, train_loss: 0.1475, val_loss: 0.0568
Epoch 13, train_loss: 0.1451, val_loss: 0.0566
Epoch 14, train_loss: 0.1442, val_loss: 0.0569
Epoch 15, train_loss: 0.1396, val_loss: 0.0531
Epoch 16, train_loss: 0.1356, val_loss: 0.0564
Epoch 17, train_loss: 0.1318, val_loss: 0.0550
Epoch 18, train_loss: 0.1333, val_loss: 0.0529
Epoch 19, train_loss: 0.1294, val_loss: 0.0519
Epoch 20, train_loss: 0.1270, val_loss: 0.0525
Epoch 21, train_loss: 0.1243, val_loss: 0.0498
Epoch 22, train_loss: 

In [28]:
# Load pre-trained model
model.load_state_dict(torch.load(best_model_path))

<All keys matched successfully>

In [None]:
def predict(model, dataloader, device):
  predicts = []
  targets = [] 
  for batch_idx, (batch_data, batch_label) in enumerate(dataloader):
    batch_data = batch_data.to(device)
    batch_label = batch_label.to(device)

    out = model(batch_data)
    out = F.softmax(out)
    out = torch.argmax(out, dim=1).detach()
    predicts.append(out)
    targets.append(batch_label)
  predicts = torch.cat(predicts, dim=0).cpu().numpy()
  targets = torch.cat(targets, dim=0).cpu().numpy()
  return predicts, targets

In [30]:
# Predictions for test set using API in PyTorch
predicts, targets = predict(model, test_loader, device)
print(classification_report(targets, predicts))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       980
           1       0.99      1.00      0.99      1135
           2       0.99      0.99      0.99      1032
           3       0.99      0.99      0.99      1010
           4       0.99      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.98      0.98      0.98      1028
           8       0.99      0.99      0.99       974
           9       0.99      0.97      0.98      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



## Features Extraction

In [None]:
class FeaturesExtractor(nn.Module):
  def __init__(self, model):
    super(FeaturesExtractor, self).__init__()
    self.conv1 = model.conv1
    self.pool1 = model.pool1
    self.relu1 = model.relu1
    self.conv2 = model.conv2
    self.pool2 = model.pool2
    self.relu2 = model.relu2
    self.conv2_drop = model.conv2_drop 
    self.fc1 = model.fc1
    self.fc1_relu = model.fc1_relu
    
  def forward(self, x):
    x = self.conv1(x)
    x = self.pool1(x)
    x = self.relu1(x)
    x = self.conv2(x)
    x = self.pool2(x)
    x = self.relu2(x)
    x = self.conv2_drop(x)
    x = x.view(-1, 320)
    x = self.fc1(x)
    out = self.fc1_relu(x)
    return out

In [32]:
feature_extractor = FeaturesExtractor(model)

# Set mode eval to get features
feature_extractor.eval()

FeaturesExtractor(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu1): ReLU()
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu2): ReLU()
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc1_relu): ReLU()
)

## Helper functions

In [None]:
def extract_features(model, dataloader):
  features_total = []
  target = []
  for batch_idx, (batch_data, batch_label) in enumerate(dataloader):
    batch_data = batch_data.to(device)
    batch_label = batch_label.to(device)

    features = feature_extractor(batch_data)
    target.append(batch_label)
    features_total.append(features)
  
  features = torch.cat(features_total, dim=0).cpu().detach().numpy()
  target = torch.cat(target, dim=0).cpu().detach().numpy()
  return features, target

##############
def get_plain_data(loader):
  features_total = []
  target = []
  for batch_idx, (batch_data, batch_label) in enumerate(loader):
    batch_data = batch_data.view(batch_data.size(0), -1)
    batch_label = batch_label.view(batch_label.size(0), -1)
    target.append(batch_label)
    features_total.append(batch_data)
  
  features = torch.cat(features_total, dim=0).cpu().detach().numpy()
  target = torch.cat(target, dim=0).cpu().detach().numpy()
  return features, target

##############
def save_feature_to_csv(features, target, filename):
  features_dim = features.shape[1]
  df = pd.DataFrame(columns = ["f_{}".format(i) for i in range(features_dim)] + ["label"])
  for i in range(features_dim):
    df["f_{}".format(i)] = features[:, i]
  df["label"] = target
  df.to_csv(filename, index=False, header=False)
  return df

## Transform data to features for using with Softmax Regression in pylearn_ml191

In [None]:
train_plain_features, train_plain_target = get_plain_data(train_loader)
val_plain_features, val_plain_target = get_plain_data(val_loader)
test_plain_features, test_plain_target = get_plain_data(test_loader)

df_train_plain = save_feature_to_csv(train_plain_features, 
                                     train_plain_target, 
                                     os.path.join(DATA_PATH, "train_set_plain.csv"))

df_val_plain = save_feature_to_csv(val_plain_features, 
                                   val_plain_target, 
                                   os.path.join(DATA_PATH, "val_set_plain.csv"))

df_test_plain = save_feature_to_csv(test_plain_features, 
                                    test_plain_target, 
                                    os.path.join(DATA_PATH, "test_set_plain.csv"))

In [None]:
train_features, train_target = extract_features(feature_extractor, train_loader)
train_features = train_features.reshape(train_features.shape[0], -1)
train_target = train_target.reshape(train_target.shape[0], -1)

val_features, val_target = extract_features(feature_extractor, val_loader)
val_features = val_features.reshape(val_features.shape[0], -1)
val_target = val_target.reshape(val_target.shape[0], -1)

test_features, test_target = extract_features(feature_extractor, test_loader)
test_features = test_features.reshape(test_features.shape[0], -1)
test_target = test_target.reshape(test_target.shape[0], -1)

In [None]:
df_train = save_feature_to_csv(train_features, 
                               train_target, 
                               os.path.join(DATA_PATH, "train_set_extracted.csv"))

df_val = save_feature_to_csv(val_features, 
                             val_target, 
                             os.path.join(DATA_PATH, "val_set_extracted.csv"))

df_test = save_feature_to_csv(test_features, 
                               test_target, 
                               os.path.join(DATA_PATH, "test_set_extracted.csv"))