# Importing Libraries and Dependencies

In [17]:
import numpy as np
import random
import torch.nn as nn
import torch
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.transforms import RandomHorizontalFlip, ToTensor, Compose, RandomRotation, Resize
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from PIL import Image
torch.manual_seed(74)
random.seed(74)
np.random.seed(74)
import torch
import torchvision.models as models

## Check if CUDA is available, else use CPU

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Functions for getting pre-trained desired model, freeze layers and print which layers require gradients 

In [19]:
# Function to get desired pre-trained model
def get_desired_model(model_name):
    if model_name.lower() == "googlenet":
        model = models.googlenet(pretrained=True)
    elif model_name.lower() == "resnet18":
        model = models.resnet50(pretrained=True)
    else:
        model = models.alexnet(pretrained=True)
    return model

# Function to freeze layers in the model
def freeze_layers(model, freeze_from_layer, freeze_to_layer):
    for idx, (name, param) in enumerate(model.named_children()):
        if idx >= freeze_from_layer and idx < freeze_to_layer:
            for param in param.parameters():
                param.requires_grad = False

# Function to print which layers require gradients
def print_requires_grad(model):
    for name, param in model.named_parameters():
        print(name, param.requires_grad)


## Train_model function

In [20]:
def train_model(data_aug, model, train_dir, val_dir, num_epochs, batch_size, learning_rate, optimizer_name, img_size):
    # Define transforms for data augmentation and normalization

    if data_aug:
        transform_train = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            RandomRotation(10),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    else:
        transform_train = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
#             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

    transform_val = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        # transforms.CenterCrop(32),
        transforms.ToTensor(),
#         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Load the datasets using ImageFolder
    train_dataset = ImageFolder(train_dir, transform=transform_train)
    val_dataset = ImageFolder(val_dir, transform=transform_val)


    labels = train_dataset.classes
    train_set, val_set = random_split(train_dataset, [8000, 1999])

    # Create data loaders for training and validation
    train_loader = DataLoader(train_set, batch_size=batch_size, num_workers = 4,shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size,num_workers = 4, shuffle=True)
    test_loader = DataLoader(val_dataset, batch_size=batch_size,num_workers = 4, shuffle=True)

    criterion = nn.CrossEntropyLoss()
    if optimizer_name.lower() == 'sgd':
        # print("SGD")
        optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum = 0.9)
    elif optimizer_name.lower() == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr = learning_rate, alpha = 0.99, eps = 1e-8)
        # print("RMSPROP")
    elif optimizer_name.lower() == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr = learning_rate, lr_decay = 0, weight_decay = 0, initial_accumulator_value = 0, eps = 1e-10)
        # print("ADAGRAD")
    else:
        optimizer = optim.Adam(model.parameters(), lr = learning_rate, betas = (0.9, 0.999), eps = 1e-8)
        # print("ADAM")


    epoch = 0
    while epoch < num_epochs:
        model.train()  # Set model to training mode
        count = 0
        running_loss, train_correct_p, train_total_p = 0.0, 0, 0

        for i, data in train_loader:
            # print(i)
            inputs = i.to(device)
            labels = data.to(device)
#             inputs, labels = i, data

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss = running_loss + loss.item()
            _, pred = torch.max(outputs.data, 1)
            train_total_p += labels.size(0)
            train_correct_p += (pred == labels).sum().item()

            running_loss = running_loss / len(train_loader)
            train_accuracy = (train_correct_p / train_total_p) * 100
            if count%32 == 31:
                print(f'Epoch {epoch+1}, Count {count+1}, Train Loss: {running_loss:.3f}, Train Accuracy: {train_accuracy:.2f}%')
            count += 1
#         print(count)

        # Validate the model after each epoch

        model.eval()  # Set model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for i, data in val_loader:
                inputs, labels = i.to(device), data.to(device)
#                 inputs, labels = i, data
                outputs = model.forward(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        # Print validation statistics
        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total
        print(f'Epoch {epoch + 1}, Validation Loss: {val_loss:.3f}, Validation Accuracy: {val_accuracy:.2f}%')
        print()
        epoch += 1
    print('Training finished')
    return model, val_accuracy


# Define parameters and directories

In [21]:
model_name = "googlenet"
model = get_desired_model(model_name)
model.fc = nn.Linear(in_features =  model.fc.in_features, out_features = 10)
train_dir = '/kaggle/input/nature/inaturalist_12K/train'
val_dir = '/kaggle/input/nature/inaturalist_12K/val'

1. Technique - Freezing first k layers

In [None]:
freeze_from_layer = 0         # Start freezing from the first layer
freeze_to_layer = 8           # Freeze layers up to the kth layer
# print_requires_grad(model)
freeze_layers(model, freeze_from_layer, freeze_to_layer)
# print_requires_grad(model)
model = model.to(device)

num_epochs = 10
learning_rate = 0.001
batch_size = 250
# Instantiate the ConvNet model
img_size = 256
optimizer_name = "ADAM"




Execueting train_model function

In [22]:
# def train_model(data_aug, model, train_dir, val_dir, num_epochs, batch_size, learning_rate, optimizer_name, img_size):
model, va = train_model(False, model, train_dir, val_dir, num_epochs, batch_size, learning_rate, optimizer_name, img_size)

Epoch 1, Count 32, Train Loss: 0.028, Train Accuracy: 63.05%
Epoch 1, Validation Loss: 0.912, Validation Accuracy: 69.68%

Epoch 2, Count 32, Train Loss: 0.013, Train Accuracy: 85.11%
Epoch 2, Validation Loss: 0.876, Validation Accuracy: 70.94%

Epoch 3, Count 32, Train Loss: 0.006, Train Accuracy: 94.47%
Epoch 3, Validation Loss: 0.937, Validation Accuracy: 72.29%

Epoch 4, Count 32, Train Loss: 0.003, Train Accuracy: 96.97%
Epoch 4, Validation Loss: 1.020, Validation Accuracy: 72.64%

Epoch 5, Count 32, Train Loss: 0.001, Train Accuracy: 98.15%
Epoch 5, Validation Loss: 1.209, Validation Accuracy: 69.23%

Epoch 6, Count 32, Train Loss: 0.003, Train Accuracy: 97.92%
Epoch 6, Validation Loss: 1.133, Validation Accuracy: 71.49%

Epoch 7, Count 32, Train Loss: 0.003, Train Accuracy: 97.00%
Epoch 7, Validation Loss: 1.214, Validation Accuracy: 71.04%

Epoch 8, Count 32, Train Loss: 0.003, Train Accuracy: 97.15%
Epoch 8, Validation Loss: 1.340, Validation Accuracy: 69.18%

Epoch 9, Count 3

2. Technique - Freezing some middle layers parameter defined parameters that are required

In [None]:
freeze_from_layer = 9        # Start freezing from the k+1 layer
freeze_to_layer = 12           # Freeze layers up to the some layer
# print_requires_grad(model)
freeze_layers(model, freeze_from_layer, freeze_to_layer)
# print_requires_grad(model)
model = model.to(device)

num_epochs = 10
learning_rate = 0.001
batch_size = 250
# Instantiate the ConvNet model
img_size = 256
optimizer_name = "ADAM"

model, va = train_model(False, model, train_dir, val_dir, num_epochs, batch_size, learning_rate, optimizer_name, img_size)

3. Technique - Freezing last k layers parameter defined paramters that are required

In [None]:

freeze_from_layer = 13        # Start freezing from the k+1 layer
freeze_to_layer = 19           # Freeze layers up to the last layer
# print_requires_grad(model)
freeze_layers(model, freeze_from_layer, freeze_to_layer)
# print_requires_grad(model)
model = model.to(device)

num_epochs = 10
learning_rate = 0.001
batch_size = 250
# Instantiate the ConvNet model
img_size = 256
optimizer_name = "ADAM"

model, va = train_model(False, model, train_dir, val_dir, num_epochs, batch_size, learning_rate, optimizer_name, img_size)