# MNIST on SageMaker with PyTorch Lightning

## Download dataset to local folder

In [1]:
S3_DATA_BUCKET = 'dataset.mnist'
S3_TRAINING_DATA = S3_DATA_BUCKET+'/training'
S3_TESTING_DATA = S3_DATA_BUCKET+'/testing'

DATA_PATH = '../dataset'
BATCH_SIZE = 128

In [2]:
#!mkdir -p $DATA_PATH/training
#!mkdir -p $DATA_PATH/testing
#!aws s3api get-object --bucket $S3_DATA_BUCKET --key mnist.tar.gz $DATA_PATH/mnist.tar.gz
#!cd $DATA_PATH && tar xvf mnist.tar.gz && rm -f mnist.tar.gz

In [3]:
# Install libraries if not already installed
! pip install torch
! pip install torchvision
! pip install pytorch_lightning

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
# Install libraries specific for Jupyter Notebook
! pip install ipywidgets
! jupyter nbextension enable --py widgetsnbextension

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [21]:
import os
import math
import random as rn

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from PIL import Image, ImageFile

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import transforms as T, datasets
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint


In [6]:
# Import libraries specific for Jupyter notebook visualization
from matplotlib import pyplot as plt, image
from PIL import Image
%matplotlib inline

In [8]:
#train_data_dir = DATA_PATH+'/training'
#dataset = datasets.ImageFolder(train_data_dir)
#train_set, val_set = torch.utils.data.random_split(dataset, [55000, 5000])

Display a few images to have an idea about the input

In [9]:
#plt.imshow(image.imread(train_set.dataset.imgs[0][0]))
#plt.figure()
#plt.imshow(image.imread(train_set.dataset.imgs[7000][0]))
#plt.figure()
#plt.imshow(image.imread(train_set.dataset.imgs[20000][0]))
#plt.figure()

In [10]:
#fix random seed
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
rn.seed(12345)
torch.manual_seed(2020)
torch.cuda.manual_seed(2020)
torch.cuda.manual_seed_all(2020)
torch.backends.cudnn.deterministic = True

In [11]:
num_workers = 4
epochs = 10
validation_size = .3
batch_size = 128

In [12]:
data_dir = DATA_PATH+'/training'

## Create model

In [22]:
class MNISTClassifier(pl.LightningModule):
    def __init__(self):
        super(MNISTClassifier, self).__init__()
        self.conv_layer_1 = torch.nn.Sequential(
        torch.nn.Conv2d(3,28, kernel_size=5),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2))
        self.conv_layer_2 = torch.nn.Sequential(
        torch.nn.Conv2d(28,10, kernel_size=2),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=2))
        self.dropout1=torch.nn.Dropout(0.25)
        self.fully_connected_1=torch.nn.Linear(250,18)
        self.dropout2=torch.nn.Dropout(0.08)
        self.fully_connected_2=torch.nn.Linear(18,10)

    def load_split_train_test(self, datadir, valid_size = .2):
        train_transforms = T.Compose([T.RandomHorizontalFlip(),                                       
                                           T.ToTensor(),
                                           T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

        test_transforms = T.Compose([T.ToTensor(),T.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

        train_data = datasets.ImageFolder(datadir, transform=train_transforms)
        test_data = datasets.ImageFolder(datadir, transform=test_transforms)

        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(valid_size * num_train))
        np.random.shuffle(indices)
        from torch.utils.data.sampler import SubsetRandomSampler
        train_idx, test_idx = indices[split:], indices[:split]
        train_sampler = SubsetRandomSampler(train_idx)
        test_sampler = SubsetRandomSampler(test_idx)
        trainloader = torch.utils.data.DataLoader(train_data, sampler=train_sampler, batch_size=batch_size, num_workers=num_workers)
        testloader = torch.utils.data.DataLoader(test_data, sampler=test_sampler, batch_size=batch_size, num_workers=num_workers)
        return trainloader, testloader
    
    def prepare_data(self):
        self.train_loader, self.val_loader = self.load_split_train_test(data_dir, validation_size)
        
    def train_dataloader(self):
        return self.train_loader
    
    def val_dataloader(self):
        return self.val_loader
    
#    def test_dataloader(self):
#        return DataLoader(MNIST(os.getcwd(), train=False, download=False, transform=transform.ToTensor()), batch_size=128)
    
    def forward(self,x):
        x=self.conv_layer_1(x)
        x=self.conv_layer_2(x)
        x=self.dropout1(x)
        x=torch.relu(self.fully_connected_1(x.view(x.size(0),-1)))
        x=F.leaky_relu(self.dropout2(x))
        return F.softmax(self.fully_connected_2(x), dim=1)
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())
    
    def training_step(self, batch, batch_idx):
        
        # Get input and output from batch
        x, labels = batch
        
        # Compute prediction through the network
        prediction = self.forward(x)
        
        loss = F.nll_loss(prediction, labels)
        
        # Logs training loss
        logs={'train_loss':loss}
        
        output = {
            # This is required in training to be used by backpropagation
            'loss':loss,
            # This is optional for logging pourposes
            'log':logs
        }
        
        return output
    
    def validation_step(self, batch, batch_idx):
        x, labels = batch
        prediction = self.forward(x)
        return {
            'val_loss': F.cross_entropy(prediction, labels)
        }
    
    def validation_epoch_end(self, outputs):
        val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
        return {'val_loss': val_loss_mean}

    
    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        print('Average training loss: '+str(avg_loss.item()))
        logs = {'val_loss':avg_loss}
        return {
            'avg_val_loss':avg_loss,
            'log':logs
        }

In [23]:
# The trainer abstracts training, validation and test loops

mnistTrainer=pl.Trainer(gpus=1, max_epochs=epochs)

model = MNISTClassifier()
mnistTrainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type       | Params
-------------------------------------------------
0 | conv_layer_1      | Sequential | 2 K   
1 | conv_layer_2      | Sequential | 1 K   
2 | dropout1          | Dropout    | 0     
3 | fully_connected_1 | Linear     | 4 K   
4 | dropout2          | Dropout    | 0     
5 | fully_connected_2 | Linear     | 190   


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Average training loss: 2.302227735519409


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.8066515922546387


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.7187001705169678


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.705643892288208


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.702525019645691


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.701408863067627


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.69669771194458


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.697007179260254


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.6952210664749146


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.693630337715149


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Average training loss: 1.696158528327942



1

In [26]:
with open(os.path.join('./', 'model.pth'), 'wb') as f:
    torch.save(model.state_dict(), f)