<a href="https://colab.research.google.com/github/WenheLI/-/blob/main/a0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Git and Drive Integration

In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import time

### Mount google drive

In [2]:
# Mount google drive to use a persistent directory structure
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


### Clone the assignment repository

In [None]:
# Clone repository in google drive
%mkdir /content/gdrive/MyDrive/ece5545
%cd /content/gdrive/MyDrive/ece5545
!git clone https://YOUR_TOKEN@github.com/ML-HW-SYS/a0.git #paste your github token here

### Add `src` directory to the Python import path

In [10]:
# import functions from the src directory in repo
import sys
sys.path.insert(0, '/content/gdrive/MyDrive/ece5545/a0-WenheLI/src')

### Import functions from `src/train_utils.py`

In [11]:
import train_utils
from model import *

### Make changes to `src` files and push to repo

In [12]:
# You will likely need to set your git user identification settings
!git config --global user.email "wl692@cornell.edu"  # update with your email
!git config --global user.name "Wenhe LI"   # update with your name 

In [13]:
# To run the same tests that are run on Github Classroom
%cd /content/gdrive/MyDrive/ece5545/a0-WenheLI/
!python3 -m pytest

/content/gdrive/MyDrive/ece5545/a0-WenheLI
platform linux -- Python 3.7.12, pytest-3.6.4, py-1.11.0, pluggy-0.7.1
rootdir: /content/gdrive/MyDrive/ece5545/a0-WenheLI, inifile:
plugins: typeguard-2.7.1
collected 1 item                                                               [0m

tests/test_model.py .[36m                                                    [100%][0m



In [None]:
%cd /content/gdrive/MyDrive/ece5545/a0/src
# Navigate to a0/src/model.py and fix the bug (denoted by TODO)
# in the file then commit the changes below
!git commit -am "fixed bug in model.py"
!git push

## Training Configuration

In [23]:
batch_size = 128
epochs = 5
lr = 1.0
gamma = 0.7
no_cuda = False
seed = 42
log_interval = 50
save_model = False
dry_run = False

# Derived parameters
use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if use_cuda else "cpu")

train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': batch_size}
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

## Data Loaders and Optimizer Setup

In [24]:
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])
dataset1 = datasets.MNIST('/content/gdrive/MyDrive/ece5545/data', train=True, download=True,
                    transform=transform)
dataset2 = datasets.MNIST('/content/gdrive/MyDrive/ece5545/data', train=False,
                       transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=lr)

scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

## Training Loop

In [26]:
import importlib
importlib.reload(train_utils)
# we use importlib to make sure that we are loading
# the latest version of train_utils after any changes
# that you may have done

# for epoch in range(1, epochs + 1):
#     train_utils.train(model, device, train_loader, optimizer, epoch, log_interval, dry_run)
#     train_utils.test(model, device, test_loader)
#     scheduler.step()

# if save_model:
#     torch.save(model.state_dict(), "mnist_cnn.pt")

<module 'train_utils' from '/content/gdrive/MyDrive/ece5545/a0-WenheLI/src/train_utils.py'>

In [42]:
# Diagnostic information about the used GPU device
!nvidia-smi

Mon Jan 24 18:52:58 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P0    65W / 149W |    876MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [28]:
import time
import statistics 


In [27]:
# GPU Benchmark
training_time_gpu = []
for epoch in range(1, epochs + 1):
    start = time.time()
    train_utils.train(model, device, train_loader, optimizer, epoch, log_interval, dry_run)
    end = time.time()
    training_time_gpu.append(end - start)
    train_utils.test(model, device, test_loader)
    scheduler.step()
   



Epoch 1 time = 15.21s on device cuda

Test set: Average loss: 0.0431, Accuracy: 9863/10000 (99%)


Epoch 2 time = 15.28s on device cuda

Test set: Average loss: 0.0380, Accuracy: 9867/10000 (99%)


Epoch 3 time = 15.18s on device cuda

Test set: Average loss: 0.0298, Accuracy: 9904/10000 (99%)


Epoch 4 time = 15.19s on device cuda

Test set: Average loss: 0.0273, Accuracy: 9910/10000 (99%)


Epoch 5 time = 14.94s on device cuda

Test set: Average loss: 0.0275, Accuracy: 9913/10000 (99%)



In [36]:
mean_time = statistics.mean(training_time_gpu)
std_time = statistics.stdev(training_time_gpu)
print(f"Mean Training Time is {mean_time:.2f}\n std Training Time is {std_time:.2f}")

Mean Training Time is 15.16
 std Training Time is 0.13


In [40]:
# Init CPU model
train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': batch_size}
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
device = 'cpu'
model = Net().to('cpu')
optimizer = optim.Adadelta(model.parameters(), lr=lr)

scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [41]:
# CPU Benchmark
training_time_cpu = []
for epoch in range(1, epochs + 1):
    start = time.time()
    train_utils.train(model, device, train_loader, optimizer, epoch, log_interval, dry_run)
    end = time.time()
    training_time_cpu.append(end - start)
    train_utils.test(model, device, test_loader)
    scheduler.step()


Epoch 1 time = 130.31s on device cpu

Test set: Average loss: 0.0525, Accuracy: 9832/10000 (98%)


Epoch 2 time = 133.31s on device cpu

Test set: Average loss: 0.0392, Accuracy: 9866/10000 (99%)


Epoch 3 time = 135.06s on device cpu

Test set: Average loss: 0.0320, Accuracy: 9890/10000 (99%)


Epoch 4 time = 135.24s on device cpu

Test set: Average loss: 0.0310, Accuracy: 9891/10000 (99%)


Epoch 5 time = 135.15s on device cpu

Test set: Average loss: 0.0284, Accuracy: 9908/10000 (99%)



In [44]:
mean_time = statistics.mean(training_time_cpu)
std_time = statistics.stdev(training_time_cpu)
print(f"Mean Training Time is {mean_time:.2f}\n std Training Time is {std_time:.2f}")

Mean Training Time is 133.82
 std Training Time is 2.12


## Batch Size Experiment

In [None]:
batch_sizes = [1, 4, 8, 16, 32, 64, 128, 256, 512]
epochs = 5
lr = 1.0
gamma = 0.7
no_cuda = True
seed = 42
log_interval = 50
save_model = False
dry_run = False
optimizer = optim.Adadelta(model.parameters(), lr=lr)

scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

# Derived parameters
use_cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)

for batch_size in batch_sizes:
  train_kwargs = {'batch_size': batch_size}
  test_kwargs = {'batch_size': batch_size}
  if use_cuda:
      cuda_kwargs = {'num_workers': 1,
                    'pin_memory': True,
                    'shuffle': True}
      train_kwargs.update(cuda_kwargs)
      test_kwargs.update(cuda_kwargs)
  train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
  test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
  training_time_batch = []
  for epoch in range(1, epochs + 1):
      start = time.time()
      train_utils.train(model, device, train_loader, optimizer, epoch, log_interval, dry_run)
      end = time.time()
      training_time_batch.append(end - start)
      train_utils.test(model, device, test_loader)
      scheduler.step()

