<a href="https://www.kaggle.com/code/mirabirhossain/model-from-scratch?scriptVersionId=144365389" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
!pip install torchsummary

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as f
from torch.utils import data
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import torchvision.models as models
from torchsummary import summary
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# Setting the seed so that the outcome of the model is reproducable
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

In [None]:
# Train and Test data directory
train_data_dir = '/kaggle/input/mangoleafbdomdena/MangoLeafBDomdena/Train'
test_data_dir = '/kaggle/input/mangoleafbdomdena/MangoLeafBDomdena/Test'

In [None]:
# Defining the augmentation for train and test data
# For train data we are randomly flipping the images horizontally or vertically 30% of the time
# We resize the image to (224, 224) for both train and test data
# We normalize the images to have a 0.5 mean and 0.5 std which is often used in literature

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.3),
    transforms.RandomVerticalFlip(0.3),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
# Creating the PyTorch Dataset from train and test directory cause
# PyTorch can work with PyTorch Datasets
train_dataset = ImageFolder(train_data_dir, transform=train_transform)
test_dataset = ImageFolder(test_data_dir, transform=test_transform)

In [None]:
# Checking the length of the train and test dataset
len(train_dataset), len(test_dataset)

In [None]:
# Dividing the train data into train set and validation set
# Creating train and validation indices
# using "stratify" is making sure that every class gets equal portion of indices while spliting

train_indices, valid_indices = train_test_split(range(len(train_dataset)), test_size=0.1,
                                               stratify=train_dataset.targets,
                                               random_state=42)

In [None]:
# Using the train and validation indices to randomly sample them for train and validation set
train_sampler = data.SubsetRandomSampler(train_indices)
valid_sampler = data.SubsetRandomSampler(valid_indices)

# Creating PyTorch DataLoaders to divide the dataset into batches to feed the model
# Models built in PyTorch can only take data from DataLoaders
trainloader = DataLoader(train_dataset, batch_size=64, sampler=train_sampler)
validloader = DataLoader(train_dataset, batch_size=64, sampler=valid_sampler)
testloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
# Designing the model
# This is the final model after some training and parameter tuning

# Depthwise Separable Convolution block
class DSCBlock(nn.Module):
    def __init__(self, in_channels, out_channels, pool='max'):
        super().__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, groups=in_channels),
            nn.Conv2d(in_channels, out_channels, kernel_size=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )
        if pool == 'max':
            self.pooling = nn.MaxPool2d(2, 2)
        elif pool == 'avg':
            self.pooling = nn.AvgPool2d(13, 13)
    
    def forward(self, x):
        x = self.features(x)
        x = self.pooling(x)
        return x
        
# Full architecture using the Depthwise Separable Convolution blocks
class CNNarch(nn.Module):
    def __init__(self, block):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2), # 111
            block(32, 64), # 55
            block(64, 128), # 27
            block(128, 256), # 13
            block(256, 256, 'avg'), 
        )
        
        self.linear = nn.Sequential(
            nn.Linear(256, 48),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(48, 8)
        )
        
    def forward(self, x):
        x = self.blocks(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x
        

In [None]:
# Creating a instance of model to use the torchsummary to see the summary of the model
model = CNNarch(DSCBlock).to(device)
m1 = model(torch.randn(1, 3, 224, 224).to(device))
summary(model, (3, 224, 224))

**From above, the model has 129,144 parameters, which is very lightweight and parameter size is only 0.49MB**

In [None]:
# Creating class weights because in the dataset there is class imbalance

class_weights = []

total_samples = len(trainloader.dataset)
num_classes = len(trainloader.dataset.classes)

for class_idx in range(num_classes):
    class_count = torch.sum(torch.tensor(trainloader.dataset.targets) == class_idx)
    class_weight = total_samples / (num_classes * class_count)
    class_weights.append(class_weight)

class_weights = torch.FloatTensor(class_weights).to(device)

In [None]:
# We can see the class weights in the output cell
class_weights

In [None]:
# Defining the Loss function and the algorithm for model training
# Cross Entropy Loss as loss function and Adam as optimizer

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.005)

In [None]:
# Training the model for 40 epochs with learning rate of 0.005

num_epochs = 40

train_losses = []
valid_losses = []

train_accuracies = []
valid_accuracies = []

for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for inputs, targets in tqdm(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += targets.size(0)
        correct_train += (predicted == targets).sum().item()
    
    train_accuracy = 100 * correct_train / total_train
    train_losses.append(train_loss / len(trainloader))
    train_accuracies.append(train_accuracy)
    
    # Validation
    model.eval()
    valid_loss = 0.0
    correct_valid = 0
    total_valid = 0
    
    with torch.no_grad():
        for inputs, targets in tqdm(validloader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_valid += targets.size(0)
            correct_valid += (predicted == targets).sum().item()
    
    valid_accuracy = 100 * correct_valid / total_valid
    valid_losses.append(valid_loss / len(validloader))
    valid_accuracies.append(valid_accuracy)
    
    print(f'Epoch [{epoch + 1}/{num_epochs}]')
    print(f'Training Loss: {train_losses[-1]:.4f} | Training Accuracy: {train_accuracy:.2f}%')
    print(f'Validation Loss: {valid_losses[-1]:.4f} | Validation Accuracy: {valid_accuracy:.2f}%')




In [None]:
# Plot training and validation losses
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(valid_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Losses')

# Plot training and validation accuracies
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(valid_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.title('Training and Validation Accuracies')

plt.tight_layout()
plt.show()

In [None]:
# Evaluating the model performance with the testset
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, targets in testloader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        y_true.extend(targets.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())




In [None]:
# Calculate and print the confusion matrix
confusion_mat = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(confusion_mat)

# Calculate and print classification report
report = classification_report(y_true, y_pred, target_names=testloader.dataset.classes)
print("Classification Report:")
print(report)

# Calculate and print additional metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

**As we can see we get a very good accuracy and precision in test set**

In [None]:
# Saving the model and the weights
torch.save(model, 'full_model.pth')
torch.save(model.state_dict(), 'model.pth')

In [None]:
# Checking the model size

def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

def model_size_mb(model):
    total_params = count_parameters(model)
    model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
    model_size_mb = model_size_bytes / (1024 * 1024)  # Convert to megabytes (MB)
    return model_size_mb

size_mb = model_size_mb(model)

print(f"Model size: {size_mb:.2f} MB")

**As we can see, we have a very light model of only 0.49 MB**

In [None]:
# Testing some random images and plot them to see their acutal label and predicted label

model.eval()

def plot_random_test_samples(model, test_loader, num_samples=12):
    # Get a batch of test data
    data_iter = iter(test_loader)
    images, labels = next(data_iter)
    
    # Move data to GPU
    images = images.to(device)
    labels = labels.to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
    
    # Plot random samples
    num_rows = 3
    num_cols = 4

    plt.figure(figsize=(12, 9))

    for i in range(num_samples):
        index = np.random.randint(0, images.size(0))
        image = images[index].cpu().numpy()
        label = labels[index].item()
        prediction = predicted[index].item()

        plt.subplot(num_rows, num_cols, i + 1)
        plt.imshow(np.transpose(image, (1, 2, 0)), cmap='gray')
        plt.title(f"True: {label}, Pred: {prediction}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()


plot_random_test_samples(model, testloader, num_samples=12)