In [None]:
# !pip install wandb

In [None]:
import os
import datetime
from google.colab import drive
drive.mount('/content/gdrive')

os.chdir('/content/gdrive/My Drive/Deep Learning CS6910')
# !ls

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms,models
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader
import torch.optim as optim
import numpy as np
import wandb
import time
from tqdm import tqdm

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using ', device)

Using  cuda:0


In [None]:
def freeze_grads(model):
  for param in model.parameters():
    param.requires_grad = False

In [None]:
def transfer_resnet(device,num_classes=10):
  model = models.resnet18(pretrained=True)
  freeze_grads(model)
  num_ftrs = model.fc.in_features
  model.fc = nn.Linear(num_ftrs, num_classes)
  input_size = 224
  model.to(device)
  return model

In [None]:
def learn(model,optimizer,criterion,train_loader,val_loader,verbose=False):
  log={}
  # Train the model
  model.train()
  for epoch in tqdm(range(40),ncols=120):  # loop over the dataset multiple times

    running_loss = 0.0
    correct = 0
    total = 0
    for inputs,labels in train_loader:
      # get the inputs; data is a list of [inputs, labels]
      inputs,labels = inputs.to(device), labels.to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    
    if verbose:
      print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f}, Accuracy = {(100 * correct / total):.2f}%")
    log['train_loss'] = running_loss/len(train_loader)
    log['train_acc'] = 100 * correct / total

    # Validation loop
    model.eval()
    with torch.no_grad():
      running_loss = 0.0
      correct = 0
      total = 0
      
      for inputs, labels in val_loader:
        inputs,labels = inputs.to(device),labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
      # Print validation statistics
      if verbose:
        print(f"Validation: Loss = {running_loss/len(val_loader):.4f}, Accuracy = {(100 * correct / total):.2f}%")
      log['val_loss'] = running_loss/len(train_loader)
      log['val_acc'] = 100 * correct / total

    if verbose == False:
      wandb.log(log)

  print('Finished Training')

In [None]:
# Initialize a new wandb run
wandb.init(entity='viswa_ee', project="CS6910_cnn_inat")
wandb.run.name = 'resnet_fine_tune_inat'
# Config is a variable that holds and saves hyperparameters and inputs
train_data_path = './inaturalist_12K/train'
data_aug = 'yes'                            
if data_aug == 'yes':
  train_transforms = transforms.Compose([transforms.RandomRotation(30),transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),transforms.ToTensor(), 
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
                                      )
  train_transforms = transforms.Compose([
  transforms.Resize((224, 224)),  # resize the images to 224 x 224
  transforms.ToTensor(),  # convert the images to PyTorch tensors
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # normalize the images
  transforms.RandomRotation(30),
  transforms.RandomHorizontalFlip()
  ])
                                          
else:
  train_transforms = transforms.Compose([
  transforms.Resize((224, 224)),  # resize the images to 224 x 224
  transforms.ToTensor(),  # convert the images to PyTorch tensors
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # normalize the images
  ])
train_data = ImageFolder(root=train_data_path, transform=train_transforms)
train_data, val_data = random_split(train_data, [int(0.8*len(train_data)), len(train_data)-int(0.8*len(train_data))])

# Define the batch size for the data loader
batch_size = 64

# Create data loaders for the train, validation, and test data
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
model = transfer_resnet(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=1e-4)
print('.... Model Created .....')
print(model)
learn(model,optimizer,criterion,train_loader,val_loader)

[34m[1mwandb[0m: Currently logged in as: [33mviswa_ee[0m. Use [1m`wandb login --relogin`[0m to force relogin




.... Model Created .....
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rel

100%|████████████████████████████████████████████████████████████████████████████████| 40/40 [2:49:45<00:00, 254.64s/it]

Finished Training



