In [26]:
'''Import packages'''

import numpy as np
import time
import argparse
import os
import os.path
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn as nn
import wandb
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


In [27]:
def _load_data(DATA_PATH, batch_size):
    ## for training
    rotation = 15
    train_trans = transforms.Compose([transforms.RandomRotation(rotation),\
                                      transforms.RandomHorizontalFlip(),\
                                      transforms.ToTensor(),\
                                      transforms.Normalize((0.5), (0.5))])
    train_dataset = torchvision.datasets.MNIST(root=DATA_PATH, download=True,\
                                               train=True, transform=train_trans)
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size,\
                              shuffle=True, num_workers=0)
    ## for testing
    test_trans = transforms.Compose([transforms.ToTensor(),\
                                     transforms.Normalize((0.5), (0.5))])
    test_dataset = torchvision.datasets.MNIST(root=DATA_PATH,\
                                              download=True, train=False, transform=test_trans)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size,\
                             shuffle=False, num_workers=0)
    
    return train_loader, test_loader

In [28]:
'''Fun: write the MLP model'''
class MLPModel(nn.Module):
    """docstring for ClassName"""
    def __init__(self,):
        super(MLPModel, self).__init__()
        ##-----------------------------------------------------------
        ## define the model architecture here
        ## MNIST image input size batch * 28 * 28 (one input channel)
        ##-----------------------------------------------------------
        
        ## Write code about three MLP layers below
        self.mlp = nn.Sequential(nn.Linear(28*28,100),
                                nn.ReLU(),
                                nn.Dropout(0.2),
                                nn.Linear(100,50),
                                nn.ReLU(),
                                nn.Linear(50,10)
                                )
    '''feed features to the model'''
    def forward(self, x):
        ## write flatten tensor code below [I have done it]
        x = torch.flatten(x,1)
        ## ---------------------------------------------------
        ## write code about MLP predict results
        ## ---------------------------------------------------
        result = self.mlp(x)
        
        return result

In [29]:
def _compute_counts(y_pred, y_batch, mode='train'):
    return (y_pred==y_batch).sum().item()

In [30]:
def adjust_learning_rate(learning_rate, optimizer, epoch, decay):
    """initial LR decayed by 1/10 every args.lr epochs"""
    lr = learning_rate
    if (epoch > 5):
        lr = 0.001
    if (epoch >= 10):
        lr = 0.0001
    if (epoch > 20):
        lr = 0.00001
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [31]:
def _save_checkpoint(ckp_path, model, epoch, optimizer, global_step):
    ## save checkpoint to ckp_path: 'checkpoint/step_100.pt'
    ckp_path = ckp_path + 'ckp_{}.pt'.format(epoch+1) 
    checkpoint = {'epoch': epoch,
                  'global_step': global_step,
                  'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict()}
    torch.save(checkpoint, ckp_path)

In [32]:
def main():
    os.makedirs('checkpoint', exist_ok=True)
    
    ## choose cpu or gpu
    use_cuda = torch.cuda.is_available() ## if have gpu or cpu 
    device = torch.device("cuda" if use_cuda else "cpu")
    print("device: ", device)
    if use_cuda:
        torch.cuda.manual_seed(72)
    
    ## initialize hyper-parameters
    num_epoches = 10
    decay = 0.01
    learning_rate = 0.0001
    batch_size = 50
    ckp_path = 'checkpoint/'
    
    ## step 1: Data loader to load MNIST data
    DATA_PATH = "./data/"
    train_loader, test_loader=_load_data(DATA_PATH, batch_size)
    ##-------------------------------------------------------
    ## Step 2: load the MLP model in model.py file
    ##-------------------------------------------------------
    model =  MLPModel()
    ## load model to gpu or cpu
    model.to(device)
    
    ## --------------------------------------------------
    ## Step 3: define the LOSS FUNCTION: cross-entropy
    ## --------------------------------------------------
    optimizer = optim.Adam(model.parameters(),lr=learning_rate)  ## optimizer
    loss_fun = nn.CrossEntropyLoss()    ## cross entropy loss
    
    ## ---------------------------------------
    ## load checkpoint below
    ## ---------------------------------------
    
    
    ##  model training
    iteration = 0
    if True:
        model = model.train() ## model training
        for epoch in range(num_epoches): #10-50
            ## learning rate
            adjust_learning_rate(learning_rate, optimizer, epoch, decay)
            for batch_id, (x_batch,y_labels) in enumerate(train_loader):
                iteration += 1
                x_batch,y_labels = Variable(x_batch).to(device), Variable(y_labels).to(device)
                
                ## feed input data x into model
                output_y = model(x_batch)
                ##--------------------------------------------------------------
                ## Step 4: compute loss between ground truth and predicted result
                ##---------------------------------------------------------------
                loss = loss_fun(output_y, y_labels)
                
                ##----------------------------------------------
                ## Step 5: write back propagation steps below
                ##----------------------------------------------
                optimizer.zero_grad()
                loss.backward()
                optimizer.step() # update params
                
                ##---------------------------------------------------------
                ## Step 6: get the predict result and then compute accuracy
                ##---------------------------------------------------------
                y_pred = torch.argmax(output_y.data, 1)
                accy = _compute_counts(y_pred, y_labels)/batch_size
                ##----------------------------------------------------------
                ## Step 7: print loss values [I have done it]
                ##----------------------------------------------------------
                if iteration%10==0:
                    print('iter: {} loss: {}, accy: {}'.format(iteration, loss.item(), accy))
                    wandb.log({'iter': iteration, 'loss': loss.item()})
                    wandb.log({'iter': iteration, 'accy': accy})
                    
            ##---------------------------------------------------
            ##    save checkpoint below
            ##---------------------------------------------------
            _save_checkpoint(ckp_path, model, epoch, optimizer, iteration)
    
    ##------------------------------------
    ##    model testing code below
    ##------------------------------------
    total = 0
    accy_count = 0
    model.eval()
    with torch.no_grad():
        for batch_id, (x_batch,y_labels) in enumerate(test_loader):
            x_batch, y_labels = Variable(x_batch).to(device), Variable(y_labels).to(device)
            ##---------------------------------------
            ## Step 8: write the predict result below
            ##---------------------------------------
            output_y = model(x_batch)
            y_pred = torch.argmax(output_y.data, 1)
            
            ##--------------------------------------------------
            ## Step 9: computing the test accuracy
            ##---------------------------------------------------
            total += len(y_labels)
            accy_count += _compute_counts(y_pred, y_labels)
    accy = accy_count/total
    print("testing accy: ", accy)
            

In [33]:
with wandb.init(project='MLP', name='MLP_demo'):
    main()

device:  cpu
iter: 10 loss: 2.277869701385498, accy: 0.2
iter: 20 loss: 2.3038549423217773, accy: 0.1
iter: 30 loss: 2.233750581741333, accy: 0.38
iter: 40 loss: 2.262511968612671, accy: 0.28
iter: 50 loss: 2.2082018852233887, accy: 0.32
iter: 60 loss: 2.1862735748291016, accy: 0.34
iter: 70 loss: 2.1569480895996094, accy: 0.34
iter: 80 loss: 2.1378846168518066, accy: 0.28
iter: 90 loss: 2.0879573822021484, accy: 0.38
iter: 100 loss: 2.021610975265503, accy: 0.42
iter: 110 loss: 1.9413557052612305, accy: 0.52
iter: 120 loss: 1.915293574333191, accy: 0.44
iter: 130 loss: 1.915187954902649, accy: 0.44
iter: 140 loss: 1.903316617012024, accy: 0.46
iter: 150 loss: 1.8937950134277344, accy: 0.44
iter: 160 loss: 1.8163059949874878, accy: 0.48
iter: 170 loss: 1.8250336647033691, accy: 0.46
iter: 180 loss: 1.8099641799926758, accy: 0.46
iter: 190 loss: 1.753089189529419, accy: 0.44
iter: 200 loss: 1.6482194662094116, accy: 0.54
iter: 210 loss: 1.681785225868225, accy: 0.48
iter: 220 loss: 1.66

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accy,▁▃▄▅▆▇▆▅▅▆▄▅▅▇▆▆▆▇▆▆▇▇▆█▇▆▇▅█▇▇▇██▆█▇▆▇▇
iter,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,█▆▄▄▂▃▂▃▃▃▄▅▄▂▃▂▂▁▃▂▃▂▃▁▂▂▁▃▁▁▁▂▁▁▃▁▁▂▁▂

0,1
accy,0.88
iter,12000.0
loss,0.43085
