In [1]:
# for downloading the data and unziping it
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip

--2024-04-09 11:16:59--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.206.207, 173.194.74.207, 209.85.145.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.206.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: ‘nature_12K.zip’


2024-04-09 11:17:26 (135 MB/s) - ‘nature_12K.zip’ saved [3816687935/3816687935]



In [None]:
!pip install wandb argparse

In [9]:
%%writefile train.py
import torch
import wandb
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = 10
num_epochs = 10
img_size = 256

wandb.login()

def cmd_parser():
  args = argparse.ArgumentParser()
  args.add_argument("--wandb_project", "-wp", default="Assignment2")
  args.add_argument("--wandb_entity", "-we", default="Assignment2")
  args.add_argument("--batch_norm", "-bn", default="true", choices=["true", "false"])
  args.add_argument("--batch_size","-b", type=int, default=32)
  args.add_argument("--data_aug", "-da", default="true", choices=["true", "false"])
  args.add_argument("--dropout", "-dp", default=0, type=float)
  args.add_argument("--filt_org", "-fo", default="double", choices=["equal", "double", "half"])
  args.add_argument("--kernel_size", "-ks", default=[3,3,3,3,3])
  args.add_argument("--num_dense", "-nd", default=64, type=int)
  args.add_argument("--num_filters","-nf", default=128, type=int)
  args.add_argument("--optimizer","-o", default= "adam", choices=["adam","nadam"])
  args.add_argument("--learning_rate","-lr", default=0.003, type=float)
  args.add_argument("--activation", "-a", default="mish", choices=["relu","gelu","silu","mish"])
  args.add_argument("--strategy", "-s", default=1, type=int)
  return args.parse_args()

args = cmd_parser()

# Load and transform the data
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

transform_aug = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomRotation(degrees=30),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset=None
if(args.data_aug == 'true'):
    train_dataset = torchvision.datasets.ImageFolder(root='/content/inaturalist_12K/train', transform=transform_aug)
else:
    train_dataset = torchvision.datasets.ImageFolder(root='/content/inaturalist_12K/train', transform=transform)

train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [8000, 1999])
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=True)

test_dataset = torchvision.datasets.ImageFolder(root='/content/inaturalist_12K/val', transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=True)

def resnet_1():
    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    for param in model.parameters():
        param.requires_grad = False
    for param in model.fc.parameters():
        param.requires_grad = True
    return model

def resnet_2(k):
    model = models.resnet50(pretrained=True)
    params = list(model.parameters())
    for param in params[:k]:
        param.requires_grad = False #freezing
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def resnet_3(num_dense):
    model = models.resnet50(pretrained=True)
    for params in model.parameters():
        params.requires_grad = False
    model.fc = nn.Sequential(
      nn.Linear(model.fc.in_features,num_dense),
      nn.ReLU(),
      nn.Dropout(0.2),
      nn.Linear(num_dense, num_classes)
    )
    for param in model.fc.parameters():
        param.requires_grad = True
    return model

def accuracy(model, criterion, loader):
    correct = 0
    total = 0
    loss = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loss += criterion(outputs, labels).item() * labels.size(0)
    accuracy = correct / total
    loss /= total
    return accuracy, loss

def train(model, criterion, optimizer):
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        train_loss = 0
        correct = 0
        for i, (images, labels) in enumerate(train_loader):
            # Forward pass
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            if (i+1)%10 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Avg Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, train_loss/(i+1)))

        train_loss /= total_step
        train_acc = correct / (total_step * args.batch_size)

        val_acc, val_loss = accuracy(model, criterion, val_loader)

        print("Train:\nAccuracy:", train_acc, "Loss:", train_loss)
        print("Validation:\nAccuracy:", val_acc, "Loss:", val_loss, "\n")

optimizers = {
        'adam': optim.Adam,
        'nadam': optim.NAdam
}

wandb.init()
bn = 0
aug = 0
org = 1
ks = ""

if(args.batch_norm == 'true'):
    bn = 1

if(args.data_aug == 'true'):
    aug = 1

if(args.filt_org == 'double'):
    org = 2
elif(args.filt_org == 'half'):
    org = 0.5

for i in range(0,5,2):
    ks += str(args.kernel_size[i])

wandb.run.name =  (args.activation + "-bn_"+str(bn) + "-aug_"+str(aug) + "-drop_"+str(args.dropout) +
                    "-bs_"+str(args.batch_size) +"-lr_"+str(args.learning_rate) + "-filt_"+str(args.num_filters) +
                    "-org_"+str(org) + "-ks_"+ks + "-fc_"+str(args.num_dense) + "-"+args.optimizer + "-strategy_"+str(args.strategy))
model = None
if(args.strategy == 1):
    model = resnet_1()
elif(args.strategy == 2):
    model = resnet_2(10)
else:
    model = resnet_3(256)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optimizers[args.optimizer](model.parameters(), lr=args.learning_rate)
train(model, criterion, optimizer)

wandb.finish()

Overwriting train.py


In [2]:
!python3 train.py

[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: 2
[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs23m017[0m ([33marun_cs23m017[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/content/wandb/run-20240409_090437-s27nfkl4[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to tu

In [10]:
!python3 train.py -s 2

[34m[1mwandb[0m: Currently logged in as: [33mcs23m017[0m ([33marun_cs23m017[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/content/wandb/run-20240409_112920-goi9gewx[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mhelpful-donkey-17[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/arun_cs23m017/uncategorized[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/arun_cs23m017/uncategorized/runs/goi9gewx[0m
Epoch [1/10], Step [10/250], Avg Loss: 2.9057
Epoch [1/10], Step [20/250], Avg Loss: 2.6330
Epoch [1/10], Step [30/250], Avg Loss: 2.5751
Epoch [1/10], Step [40/250], Avg Loss: 2.5233
Epoch [1/10], Step [50/250], Avg Loss: 2.4789
Epoch [1/10], Step [60/250], Avg Loss: 2.4486
Epoch [1/10], Step [70/250], Avg Loss: 2.4303
Epoch [1/10], Step [80/250], Avg Loss: 2.

In [11]:
!python3 train.py -s 3

[34m[1mwandb[0m: Currently logged in as: [33mcs23m017[0m ([33marun_cs23m017[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.16.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/content/wandb/run-20240409_120107-9ytzjcem[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mchocolate-forest-18[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/arun_cs23m017/uncategorized[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/arun_cs23m017/uncategorized/runs/9ytzjcem[0m
Epoch [1/10], Step [10/250], Avg Loss: 2.7388
Epoch [1/10], Step [20/250], Avg Loss: 2.4131
Epoch [1/10], Step [30/250], Avg Loss: 2.1973
Epoch [1/10], Step [40/250], Avg Loss: 2.0590
Epoch [1/10], Step [50/250], Avg Loss: 1.9472
Epoch [1/10], Step [60/250], Avg Loss: 1.8561
Epoch [1/10], Step [70/250], Avg Loss: 1.7825
Epoch [1/10], Step [80/250], Avg Loss: 