# Imports & Setup

In [None]:
from IPython.display import clear_output

!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip /content/nature_12K.zip

clear_output()

In [3]:
from PIL import Image

import os
from glob import glob
import time


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchsummary import summary

# Dataloader

- Dataset Class for Setting up the data loading process
- Sections to fill in this script: `_init_transform()`

In [4]:
class inaturalist(Dataset):
    def __init__(self, root_dir, mode = 'train', transform = True):
        self.data_dir = root_dir
        self.mode = mode
        self.transforms = transform      
        self._init_dataset()
        if transform:
            self._init_transform()

    def _init_dataset(self):
        self.files = []
        self.labels = []
        dirs = sorted(os.listdir(os.path.join(self.data_dir, 'train')))
        if self.mode == 'train': 
            for dir in range(len(dirs)):
                files = sorted(glob(os.path.join(self.data_dir, 'train', dirs[dir], '*.jpg')))
                self.labels += [dir]*len(files)            
                self.files += files
        elif self.mode == 'val':
            for dir in range(len(dirs)):
                files = sorted(glob(os.path.join(self.data_dir, 'val', dirs[dir], '*.jpg')))
                self.labels += [dir]*len(files)            
                self.files += files
        else:
            print("No Such Dataset Mode")
            return None
    
    def _init_transform(self):
        self.transform = transforms.Compose([
            # Useful link for this part: https://pytorch.org/vision/stable/transforms.html
            #----------------YOUR CODE HERE---------------------#
        ])
    
    def __getitem__(self, index):
        img = Image.open(self.files[index]).convert('RGB')
        label = self.labels[index]

        if self.transforms:
            img = self.transform(img)

        label = torch.tensor(label, dtype = torch.long)

        return img, label

    def __len__(self):
        return len(self.files)

# Model

- Class to define the model which we will use for training
- Stuff to fill in: The Architecture of your model, the `forward` function to define the forward pass

NOTE!: You are NOT allowed to use pretrained models for this task

In [5]:
class Classifier(nn.Module):
    def __init__(self, n_classes):
        super(Classifier, self).__init__()
        # Useful Link: https://pytorch.org/docs/stable/nn.html
        #------------ENTER YOUR MODEL HERE----------------#        

    def forward(self, x):
        #---------Assuming x to be the input to the model, define the forward pass-----------#
        return F.softmax(x)

# Training

- Sections to Fill: Define `loss` function, `optimizer` and model, `train` and `eval` functions and the training loop


## Hyperparameters

Feel free to change these hyperparams based on your machine's capactiy

In [6]:
batch_size = 32
epochs = 10
learning_rate = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataloader

In [None]:
trainset = inaturalist(root_dir='Data/inaturalist_12K', mode='train')
valset = inaturalist(root_dir='Data/inaturalist_12K', mode = 'val')

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
valloader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=4)

## Loss Function and Optimizer

In [7]:
# USEFUL LINK: https://pytorch.org/docs/stable/nn.html#loss-functions
#---Define the loss function to use, model object and the optimizer for training---#

## Checkpoints

To save your model weights

In [8]:
checkpoint_dir = 'checkpoints'
if not os.path.isdir(checkpoint_dir):
    os.makedirs(checkpoint_dir)

## Utility Functions

In [12]:
def get_model_summary(model, input_tensor_shape):
    summary(model, input_tensor_shape)

def accuracy(y_pred, y):
    _, predicted = torch.max(y_pred.data, 1)
    total = y.size(0)
    correct = (predicted == y).sum().item()
    return correct/total

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

## Train

In [10]:
def train(model, dataset, optimizer, criterion, device):
    '''
    Write the function to train the model for one epoch
    Feel free to use the accuracy function defined above as an extra metric to track
    '''
    #------YOUR CODE HERE-----#

## Eval

In [11]:
def eval(model, dataset, criterion, device):
    '''
    Write the function to validate the model after each epoch
    Feel free to use the accuracy function defined above as an extra metric to track
    '''
    #------YOUR CODE HERE-----#

## Training

In [13]:
best_valid_loss = float('inf')

for epoch in range(epochs):
    
    start_time = time.monotonic()
    
    '''
    Insert code to train and evaluate the model (Hint: use the functions you previously made :P)
    Also save the weights of the model in the checkpoint directory
    '''
    #------YOUR CODE HERE-----#

    end_time = time.monotonic()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print("\n\n\n TIME TAKEN FOR THE EPOCH: {} mins and {} seconds".format(epoch_mins, epoch_secs))


print("OVERALL TRAINING COMPLETE")




 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds



 TIME TAKEN FOR THE EPOCH: 0 mins and 0 seconds
OVERALL TRAINING COMPLETE
