In [1]:
# !pip install -U "git+https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git"

In [2]:
# # connect to our wandb project
# !pip install wandb
# !wandb login "API_KEY"

**Imports**

In [3]:
import torch
import torch.nn as nn
import random
import sys 
sys.path.append("..")
import time
import gc

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

**Setup**

In [5]:
SEED = 1234

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Data**

In [6]:
from src.data import get_transformed_data, make_data_loaders
from src.transforms import make_transforms

BATCH_SIZE = 256
EPOCHS  = 200

valid_ratio = 0.1

train_data, valid_data, test_data = (
    get_transformed_data(
        make_transforms = make_transforms,
        valid_ratio = valid_ratio
    )
)

train_iterator, valid_iterator, test_iterator = (
    make_data_loaders(
        train_data,
        valid_data,
        test_data,
        batch_size=BATCH_SIZE,
    )
)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


**Define our Model**



In [7]:
from src.model import ResNet, StemConfig, ResidualBlockType
from src.utils import initialize_parameters, epoch_time

DROPOUT = 0.1
model_architecture = (
    (ResidualBlockType.BASIC, 3, 64, DROPOUT),
    (ResidualBlockType.BASIC, 5, 128, DROPOUT),
    (ResidualBlockType.BOTTLENECK, 26, 256, DROPOUT),
    (ResidualBlockType.BOTTLENECK, 5, 512, DROPOUT),
)

stem_config = StemConfig(num_channels=64, kernel_size=5, stride=1, padding=2)
model = ResNet(model_architecture, stem_config=stem_config, output_size=10)



In [8]:
from pathlib import Path

run = "nish_49m_deep6_cyclic_lr_bottleneck_v2.pt"
path = "../model/"
file_path = path + run

model_file = Path(file_path)

Need to run a dummy set of data to initialize the lazy modules before we can use torchsummary

In [9]:
if model_file.exists() and model_file.is_file():
  print("Loading model")
  # load our previously trained model
  model.load_state_dict(torch.load(model_file))
  model = model.to(device)
else:
  # intialize a new model
  print("Init new model parameters")
  inputs = torch.empty((BATCH_SIZE, 3, 32, 32))
  inputs.normal_()
  model = model.to(device)
  y = model(inputs.to(device))
  print(y.size())

  model.apply(initialize_parameters)

Init new model parameters
torch.Size([256, 10])


In [10]:
from src.utils import count_parameters

num_params, grad_params = count_parameters(model)
print(f"There are {grad_params:,} trainable parameters.")

There are 4,997,194 trainable parameters.


**Logging**

In [11]:
# setup wandb logging
import wandb

wandb.init(
    project='Submission_Reproduces_Results',
    name=run,
    entity="dlf22_mini_project",
    config={
        "learning_rate_policy":"CyclicLR base=0.0001 ma=x0.5 triangular",
        "epochs": EPOCHS,
        "dropout":DROPOUT,
        "batch_size": BATCH_SIZE,
        "architecture": model_architecture,
        "avg_pool": 4,
        "num_params":grad_params,
        }
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnishantaswani[0m ([33mdlf22_mini_project[0m). Use [1m`wandb login --relogin`[0m to force relogin


**Training configurations**

In [12]:
from src.engine import train_one_epoch, evaluate
from torch.optim.lr_scheduler import CyclicLR

best_loss = float('inf')
EPOCHS  = 200
max_lr = 0.5
base_lr = 0.0001
learning_rate = 0.01
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=learning_rate, cycle_momentum=False)

if model_file.is_file():
  # if we loaded a previously saved iteration, we want to get the current
  # best loss otherwise we could overwrite our save with a worse model
  loss, acc = evaluate(model.to(device), test_iterator, criterion, device)
  best_loss = loss
  print(f"Prevous best loss: {loss:.4f}, acc: {acc * 100:.2f}%")

**Training**

In [13]:
for epoch in range(1, EPOCHS+1):
    start = time.time()

    print(f"Epoch {epoch}")
    train_loss, train_acc = train_one_epoch(model, train_iterator, criterion, optimizer, device)
    train_mins, train_secs = epoch_time(start, time.time())

    print(f"\tTrain elapsed: {train_mins}:{train_secs}, loss: {train_loss:.4f}, acc: {train_acc * 100:.2f}%")

    start = time.time()
    val_loss, val_acc = evaluate(model, valid_iterator, criterion, device)
    val_mins, val_secs = epoch_time(start, time.time())
    scheduler.step()

    wandb.log({
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
        "epoch": epoch,
        "current_lr": scheduler.get_last_lr()[0],
    })

    print(f"\tValidation elapsed: {val_mins}:{val_secs}, loss: {val_loss:.4f}, acc: {val_acc * 100:.2f}%")

    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), file_path)

Epoch 1
	Train elapsed: 0:33, loss: 3.4149, acc: 19.00%
	Validation elapsed: 0:1, loss: 2.3849, acc: 23.68%
Epoch 2
	Train elapsed: 0:33, loss: 2.1200, acc: 28.06%
	Validation elapsed: 0:1, loss: 2.1305, acc: 30.54%
Epoch 3
	Train elapsed: 0:33, loss: 1.9064, acc: 33.33%
	Validation elapsed: 0:1, loss: 1.9975, acc: 35.91%
Epoch 4
	Train elapsed: 0:33, loss: 1.7723, acc: 37.49%
	Validation elapsed: 0:1, loss: 1.7380, acc: 42.25%
Epoch 5
	Train elapsed: 0:33, loss: 1.6757, acc: 40.38%
	Validation elapsed: 0:1, loss: 1.6902, acc: 44.77%
Epoch 6
	Train elapsed: 0:33, loss: 1.5960, acc: 42.89%
	Validation elapsed: 0:1, loss: 1.6527, acc: 45.57%
Epoch 7
	Train elapsed: 0:34, loss: 1.5372, acc: 44.75%
	Validation elapsed: 0:1, loss: 1.5254, acc: 48.59%
Epoch 8
	Train elapsed: 0:34, loss: 1.4835, acc: 46.64%
	Validation elapsed: 0:1, loss: 1.4648, acc: 50.87%
Epoch 9
	Train elapsed: 0:34, loss: 1.4298, acc: 48.96%
	Validation elapsed: 0:1, loss: 1.5470, acc: 51.29%
Epoch 10
	Train elapsed: 0:3

**Evaluate the model**

In [14]:
from src.engine import evaluate
model.load_state_dict(torch.load([file_path]))
test_loss, test_acc = evaluate(model.to(device), test_iterator, criterion, device)
print(f"Test Loss: {test_loss:.4f}\nTest Accuracy: {test_acc * 100:.2f}%")

wandb.log({
    "test_loss": test_loss,
    "test_acc": test_acc,
})

Test Loss: 0.3867
Test Accuracy: 91.46%
