<a href="https://colab.research.google.com/github/hazelhkim/Pytorch/blob/master/Basic_Model_(Image_Classifier)_Training_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What We Need
1. Model.py
2. Dataloader.py
3. Trainer.py
4. Train.py
5. Predict.py

# 1. Model

In [1]:
import torch
import torch.nn as nn

class ImageClassifier(nn.Module):
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size

        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(input_size, 500),
            nn.LeakyReLU(),
            nn.BatchNorm1d(500),
            nn.Linear(500, 400),
            nn.LeakyReLU(),
            nn.BatchNorm1d(400),
            nn.Linear(400, 300),
            nn.LeakyReLU(),
            nn.BatchNorm1d(300),
            nn.Linear(300, 200),
            nn.LeakyReLU(),
            nn.BatchNorm1d(200),
            nn.Linear(200, 100),
            nn.LeakyReLU(),
            nn.BatchNorm1d(100),
            nn.Linear(100, 50),
            nn.LeakyReLU(),
            nn.BatchNorm1d(50),
            nn.Linear(50, output_size),
            nn.Softmax(dim=-1),
        )

    def forward(self, x):
      y = self.layers(x)
      return y

# 2. DataLoader
We'll do MNIST dataset here for practice. DataLoader will be used in Train.py part later.

In [2]:
from torchvision import datasets, transforms
from argparse import Namespace
config = {
    "n_epochs": 20,
    'batch_size': 80,
    'train_ratio': .8,
    'verbose': 1,
    'gpu_id': 0 if torch.cuda.is_available() else -1,
}
config = Namespace(**config)
device = torch.device('cpu') if config.gpu_id <0 else torch.device('cuda:%d' % config.gpu_id)

dataset = datasets.MNIST(
    '../data',
    train=True,
    transform = transforms.Compose([transforms.ToTensor()]),
    download = True
)

x = dataset.data.float()/255.
x = x.view(x.size(0), -1).to(device) # Flatten all the data for x.
y = dataset.targets.to(device)

# 3. Trainer.py

In [5]:
import numpy as np
from copy import deepcopy

def _train(model, optimizer, crit, x, y, config):
  model.train()

  # Shuffle before training
  indices = torch.randperm(x.size(0), device = x.device)
  x = torch.index_select(x, dim=0, index=indices).split(config.batch_size, dim=0)
  y = torch.index_select(y, dim=0, index=indices).split(config.batch_size, dim=0)

  total_loss = 0

  for i, (x_i, y_i) in enumerate(zip(x, y)):
    y_hat_i = model(x_i)
    loss_i = crit(y_hat_i, y_i.squeeze()) # y_i might have unncessary extra space other than dim=0.

    # Initialize the gradients of the model.
    optimizer.zero_grad()
    loss_i.backward()
    optimizer.step()

    if config.verbose >= 2:
      print("Train Iteration(%d/%d): loss=%.4e" % (i+1, len(x, float(loss_i))))
    
    # Don't forget to detach to prevent memory leak.
    total_loss += float(loss_i)

  return total_loss/len(x)

def _validate(model, optimizer, crit, x, y, config):
  # Turn evaluation(validation) mode on.
  model.eval()
  with torch.no_grad():
    # Shuffle before validating. (Shuffling is not necessary for validation but splittin is.)
    indices = torch.randperm(x.size(0), device = x.device)
    x = torch.index_select(x, dim=0, index=indices).split(config.batch_size, dim=0)
    y = torch.index_select(y, dim=0, index=indices).split(config.batch_size, dim=0)

    total_loss = 0

    for i, (x_i, y_i) in enumerate(zip(x, y)): # for each batch
    # Turn on the no_grad mode to make more efficiently 
      y_hat_i = model(x_i)
      loss_i = crit(y_hat_i, y_i.squeeze())

    if config.verbose >= 2:
      print("Validate Iteration (%d/%d): loss=%.4e" %(i+1, len(x), float(loss_i)))

    # Don't forget to detach to prevent memory leak.
    total_loss += float(loss_i)

  return  total_loss / len(x)

def train(model, optimizer, crit, x, y, config):

  lowest_loss = np.inf
  best_model = None

  # Shuffle 
  indices = torch.randperm(x.size(0), device = x.device)
  x = torch.index_select(x, dim=0, index = indices).split([int(x.size(0)*config.train_ratio), int(x.size(0) - x.size(0)*config.train_ratio)], dim=0)
  y = torch.index_select(y, dim=0, index = indices).split([int(y.size(0)*config.train_ratio), int(y.size(0) - y.size(0)*config.train_ratio)], dim=0)


  # Split the data for training and validating
  ## The method below is not working since the train_ratio is not an exact integer but float even though it was initialized as a solid percentage such as 0.8(80%)
  ## x = torch.split(x, (x.size(0)*config.train_ratio, x.size(0)*(1-config.train_ratio)), dim=0)
  ## y = torch.split(y, (y.size(0)*config.train_ratio, y.size(0)*(1-config.train_ratio)), dim=0)
  ###x = torch.split(x, [int(x.size(0)*config.train_ratio), int(x.size(0) - x.size(0)*config.train_ratio)], dim=0)
  ###y = torch.split(y, [int(y.size(0)*config.train_ratio), int(y.size(0) - y.size(0)*config.train_ratio)], dim=0)

  for i in range(config.n_epochs):

    train_loss = _train(model, optimizer, crit, x[0], y[0], config)
    valid_loss = _validate(model, optimizer, crit, x[1], y[1], config)

    # You must use deepcopy to take a snapshot of current best weights.
    if valid_loss <= lowest_loss:
      lowest_loss = valid_loss
      best_model = deepcopy(model.state_dict())

    print("Epoch(%d/%d): train_loss=%.4e  valid_loss=%.4e lowest_loss=%.4e" %(
        i+1, 
        config.n_epochs,
        train_loss,
        valid_loss,
        lowest_loss,
        ))
    
    # Restore to best model.
    model.load_state_dict(best_model)



# 4. Train.py

In [6]:
import torch.optim as optim

model = ImageClassifier(28*28, 10).to(device)
optimizer = optim.Adam(model.parameters())
crit = nn.CrossEntropyLoss()

train(model, optimizer, crit, x, y, config)

Epoch(1/20): train_loss=1.5744e+00  valid_loss=1.0143e-02 lowest_loss=1.0143e-02
Epoch(2/20): train_loss=1.5248e+00  valid_loss=1.0240e-02 lowest_loss=1.0143e-02
Epoch(3/20): train_loss=1.5267e+00  valid_loss=9.9128e-03 lowest_loss=9.9128e-03
Epoch(4/20): train_loss=1.5177e+00  valid_loss=9.9917e-03 lowest_loss=9.9128e-03
Epoch(5/20): train_loss=1.5201e+00  valid_loss=1.0390e-02 lowest_loss=9.9128e-03
Epoch(6/20): train_loss=1.5175e+00  valid_loss=1.0457e-02 lowest_loss=9.9128e-03
Epoch(7/20): train_loss=1.5212e+00  valid_loss=1.0165e-02 lowest_loss=9.9128e-03
Epoch(8/20): train_loss=1.5212e+00  valid_loss=9.9934e-03 lowest_loss=9.9128e-03
Epoch(9/20): train_loss=1.5219e+00  valid_loss=9.9306e-03 lowest_loss=9.9128e-03
Epoch(10/20): train_loss=1.5236e+00  valid_loss=9.9086e-03 lowest_loss=9.9086e-03
Epoch(11/20): train_loss=1.5125e+00  valid_loss=1.0285e-02 lowest_loss=9.9086e-03
Epoch(12/20): train_loss=1.5143e+00  valid_loss=9.9294e-03 lowest_loss=9.9086e-03
Epoch(13/20): train_loss=