<a href="https://colab.research.google.com/github/VrijKun/CS6910_Assignment_2/blob/main/Assignment_2_DL_ED23D015.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In Part A and Part B of this assignment you will build and experiment with CNN based image classifiers using a subset of the iNaturalist dataset. In Part C you will take a pre-trained object detection model and use it for a novel application.

In [45]:
pip install --upgrade pytorch-lightning



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as functionals
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import pytorch_lightning as pyligtining

In [3]:
print(pyligtining.__version__)

2.2.1


#Question 1 (5 Marks)
Build a small CNN model consisting of 5 convolution layers. Each convolution layer would be followed by a ReLU activation and a max pooling layer. Here is sample code for building one such conv-relu-maxpool block in keras.

model = Sequential()
model.add(Conv2D(16, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
After 5 such conv-relu-maxpool blocks of layers you should have one dense layer followed by the output layer containing 10 neurons (1 for each of the 10 classes). The input layer should be compatible with the images in the iNaturalist dataset.

**The code should be flexible such that the number of filters, size of filters and activation function in each layer can be changed. You should also be able to change the number of neurons in the dense layer.**

(a) What is the total number of computations done by your network? (assume mmm filters in each layer of size k×kk\times kk×k and nnn neurons in the dense layer)

(b) What is the total number of parameters in your network? (assume mmm filters in each layer of size k×kk\times kk×k and nnn neurons in the dense layer)


In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import pytorch_lightning as pl

class ECNN(pl.LightningModule):
    def __init__(self, input_shape, num_classes, num_filters, filter_size, dense_neurons, batch_normalization='True',dropout=0.2, activation='relu'):
        super().__init__()
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.dense_neurons = dense_neurons
        self.activation = activation
        self.batch_normalization=batch_normalization

        # Define convolutional layers
        self.conv1 = nn.Conv2d(3, num_filters, filter_size)
        self.conv2 = nn.Conv2d(num_filters, num_filters, filter_size)
        self.conv3 = nn.Conv2d(num_filters, num_filters, filter_size)
        self.conv4 = nn.Conv2d(num_filters, num_filters, filter_size)
        self.conv5 = nn.Conv2d(num_filters, num_filters, filter_size)

        # Define batch normalization layers
        self.bn1 = nn.BatchNorm2d(num_filters)
        self.bn2 = nn.BatchNorm2d(num_filters)
        self.bn3 = nn.BatchNorm2d(num_filters)
        self.bn4 = nn.BatchNorm2d(num_filters)
        self.bn5 = nn.BatchNorm2d(num_filters)
        '''

        #########################
        self.conv_layers = nn.ModuleList([
            nn.Conv2d(input_shape[0] if i == 0 else num_filters, num_filters, filter_size)
            for i in range(5)
        ])

        # Define batch normalization layers
        self.bn_layers = nn.ModuleList([
            nn.BatchNorm2d(num_filters)
            for _ in range(5)
        ])
        ###########################
        '''

        # Define pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # Define dropout
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        self.dropout4 = nn.Dropout(dropout)
        self.dropout5 = nn.Dropout(dropout)

        # Calculate the size of the feature maps after max pooling
        self.flatten_size = self.calculate_flatten_size()

        # Define dense layers
        self.fc1 = nn.Linear(self.flatten_size, dense_neurons)
        self.fc2 = nn.Linear(dense_neurons, num_classes)

    '''
    ########################################
    def forward(self, x):
        for conv, bn in zip(self.conv_layers, self.bn_layers):
            x = F.relu(bn(conv(x)))
            x = self.pool(x)
        x = x.view(-1, self.flatten_size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def calculate_flatten_size(self):
        # Dummy input to calculate the size of the feature maps after max pooling
        x = torch.randn(1, *self.input_shape)
        for conv, bn in zip(self.conv_layers, self.bn_layers):
            x = F.relu(bn(conv(x)))
            x = self.pool(x)
        return x.view(x.size(0), -1).size(1)

    #####################################
    '''

    def forward(self, x):
        if self.batch_normalization:
          x = F.relu(self.bn1(self.conv1(x)))
        else:
          x = F.relu(self.conv1(x))
        x=self.dropout1(x)
        x = self.pool(x)

        if self.batch_normalization:
          x = F.relu(self.bn2(self.conv2(x)))
        else:
          x = F.relu(self.conv2(x))
        x=self.dropout2(x)
        x = self.pool(x)

        if self.batch_normalization:
          x = F.relu(self.bn3(self.conv3(x)))
        else:
          x = F.relu(self.conv3(x))
        x=self.dropout3(x)
        x = self.pool(x)

        if self.batch_normalization:
          x = F.relu(self.bn4(self.conv4(x)))
        else:
          x = F.relu(self.conv4(x))
        x=self.dropout4(x)
        x = self.pool(x)

        if self.batch_normalization:
          x = F.relu(self.bn5(self.conv5(x)))
        else:
          x = F.relu(self.conv5(x))
        x=self.dropout5(x)

        x = self.pool(x)
        x = x.view(-1, self.flatten_size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def calculate_flatten_size(self):
        # Dummy input to calculate the size of the feature maps after max pooling
        x = torch.randn(1, *self.input_shape)
        x = self.pool(self.conv1(x))
        x = self.pool(self.conv2(x))
        x = self.pool(self.conv3(x))
        x = self.pool(self.conv4(x))
        x = self.pool(self.conv5(x))
        return x.view(x.size(0), -1).size(1)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.nll_loss(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.nll_loss(y_hat, y)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.nll_loss(y_hat, y)
        self.log('test_loss', loss)

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load dataset
train_dataset = datasets.ImageFolder(root='/content/drive/MyDrive/Colab Notebooks/nature_12K/inaturalist_12K/train', transform=transform)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_data, val_data = random_split(train_dataset, [train_size, val_size])

test_data =  datasets.ImageFolder(root='/content/drive/MyDrive/Colab Notebooks/nature_12K/inaturalist_12K/val', transform=transform)
# Create data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
'''

# Initialize model
model = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=32, filter_size=3, dense_neurons=128)

# Initialize Lightning trainer with GPU support
trainer = pyligtining.Trainer(max_epochs=1, accelerator="auto") # not GPU but accelerator

# Train the model
trainer.fit(model, train_loader, val_loader)

# Test the model
trainer.test()
'''


'\n\n# Initialize model\nmodel = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=32, filter_size=3, dense_neurons=128)\n\n# Initialize Lightning trainer with GPU support\ntrainer = pyligtining.Trainer(max_epochs=1, accelerator="auto") # not GPU but accelerator\n\n# Train the model\ntrainer.fit(model, train_loader, val_loader)\n\n# Test the model\ntrainer.test()\n'

# Sweep Iteration 1

In [47]:
# wandb sweep
!pip install --upgrade wandb
!wandb login 3c21150eb43b007ee446a1ff6e87f640ec7528c4 #my API key for wandb login
import wandb

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [48]:
from torchsummary import summary
from pytorch_lightning.callbacks import EarlyStopping
sweep_config = {
    'method': 'bayes', #grid, random,bayes
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'activation': {
            'values': ['relu','gelu','elu','silu']
        },
        'num_filter': {
            'values': [[32,32,32,32,32] ,[64,64,64,64,64] ,[128,128,128,128,128],
                       [64,128,256,512,1024]]
        },
        'filter_size':{
            'values':[[3,3,3,5,5],[5,5,5,5,5],[3,3,3,3,3]]
        },
        'dropout':{
            'values':[0.2,0.3]
        },
        'batch_normal':{
            'values':[True,False]
        },
        'data_augmentation':{
            'values':[True,False]
        },

    }
}

sweep_id = wandb.sweep(sweep_config, entity='ed23d015', project="DL_Assignment_2")

def sweep_train():
  # Default hyper-parameters
  config_defaults = {
      'activation':'relu',
      'num_filter':[32,32,32,32,32],
      'filter_size':[3,3,3,3,3],
      'dropout':0.3,
      'batch_normal':True,
      'data_augmentation':True,
  }

  # Initialize wandb run
  wandb.init(project='DL_Assignment_2', entity='ed23d015',config=config_defaults)
  wandb.run.name = 'act:'+ str(wandb.config.activation)+' ;filter:'+str(wandb.config.num_filter)+ ' ;ker:'+str(wandb.config.filter_size)+ ' ;drop:'+str(wandb.config.dropout)+' ;b_n:'+str(wandb.config.batch_normal)+' ;d_a:'+str(wandb.config.data_augmentation)

  config = wandb.config
  activation = config.activation
  num_filter = config.num_filter
  filter_size = config.filter_size
  dropout = config.dropout
  batch_normal = config.batch_normal
  data_augmentation = config.data_augmentation
  '''
  # It's training time

  model = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=num_filter, filter_size=filter_size, dense_neurons=128, batch_normalization=batch_normal, dropout=dropout, activation=activation)

  # Initialize Lightning trainer with GPU support
  #trainer = pyligtining.Trainer(max_epochs=10, accelerator="auto") # not GPU but accelerator
  trainer = pl.Trainer(max_epochs=10, gpus=1 if torch.cuda.is_available() else None, progress_bar_refresh_rate=30, callbacks=[EarlyStopping(monitor='val_loss')])


  # Train the model
  trainer.fit(model, train_loader, val_loader)

  # Test the model
  #trainer.test()

  # Test the model
  trainer.test(test_dataloaders=val_loader)
  '''
  # It's training time
  model = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=num_filter, filter_size=filter_size, dense_neurons=128, batch_normalization=batch_normal, dropout=dropout, activation=activation)

  # Define loss function and optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)

  # Initialize Lightning trainer with GPU support
  trainer = pl.Trainer(max_epochs=10, gpus=1 if torch.cuda.is_available() else None, progress_bar_refresh_rate=30, callbacks=[EarlyStopping(monitor='val_loss')])

  # Training loop
  for epoch in range(trainer.max_epochs):
      # Train the model
      model.train()
      train_loss = 0.0
      correct_train = 0
      total_train = 0

      for images, labels in train_loader:
          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          train_loss += loss.item()
          _, predicted = torch.max(outputs, 1)
          total_train += labels.size(0)
          correct_train += (predicted == labels).sum().item()

      train_accuracy = 100 * correct_train / total_train
      train_loss /= len(train_loader)

      # Validate the model
      model.eval()
      val_loss = 0.0
      correct_val = 0
      total_val = 0

      with torch.no_grad():
          for images, labels in val_loader:
              outputs = model(images)
              loss = criterion(outputs, labels)
              val_loss += loss.item()
              _, predicted = torch.max(outputs, 1)
              total_val += labels.size(0)
              correct_val += (predicted == labels).sum().item()

      val_accuracy = 100 * correct_val / total_val
      val_loss /= len(val_loader)

      # Log metrics to wandb
      wandb.log({"epoch": epoch + 1, "train_loss": train_loss, "train_accuracy": train_accuracy, "val_loss": val_loss, "val_accuracy": val_accuracy})

      # Print training progress
      print(f"Epoch {epoch + 1}/{trainer.max_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")

  # Test the model
  trainer.test(test_dataloaders=val_loader)

#RUNNING THE SWEEP
wandb.agent(sweep_id, function=sweep_train, count=120)

Create sweep with ID: 3iatfv08
Sweep URL: https://wandb.ai/ed23d015/DL_Assignment_2/sweeps/3iatfv08


[34m[1mwandb[0m: Agent Starting Run: e25r36fz with config:
[34m[1mwandb[0m: 	activation: silu
[34m[1mwandb[0m: 	batch_normal: False
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	filter_size: [3, 3, 3, 5, 5]
[34m[1mwandb[0m: 	num_filter: [64, 128, 256, 512, 1024]


VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.09366846408070968, max=1.…

Run e25r36fz errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "<ipython-input-48-0d0d6c1b55d6>", line 77, in sweep_train
    model = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=num_filter, filter_size=filter_size, dense_neurons=128, batch_normalization=batch_normal, dropout=dropout, activation=activation)
  File "<ipython-input-46-cd8f3cde1627>", line 22, in __init__
    self.conv1 = nn.Conv2d(3, num_filters, filter_size)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 447, in __init__
    super().__init__(
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 90, in __init__
    if out_channels % groups != 0:
TypeError: unsupported operand type(s) for %: 'list' and 'int'

[34m[1mwandb[0m: [32m[41mERROR[0m Run e25r36fz errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent 

VBox(children=(Label(value='0.001 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.09397611367797054, max=1.…

Run yjqf5rgm errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "<ipython-input-48-0d0d6c1b55d6>", line 77, in sweep_train
    model = ECNN(input_shape=(3, 224, 224), num_classes=10, num_filters=num_filter, filter_size=filter_size, dense_neurons=128, batch_normalization=batch_normal, dropout=dropout, activation=activation)
  File "<ipython-input-46-cd8f3cde1627>", line 22, in __init__
    self.conv1 = nn.Conv2d(3, num_filters, filter_size)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 447, in __init__
    super().__init__(
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 90, in __init__
    if out_channels % groups != 0:
TypeError: unsupported operand type(s) for %: 'list' and 'int'

[34m[1mwandb[0m: [32m[41mERROR[0m Run yjqf5rgm errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent 

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
