<a href="https://colab.research.google.com/github/gatienc/multimodal_product_data_classification/blob/main/notebooks/gatien_image_model_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hyperparameters



In [None]:
BATCH_SIZE=275#10.4/15GO

In [None]:
num_classes=27

In [None]:
train_percentage=0.80
test_percentage=0.10
valid_percentage=0.10

# Imports

In [None]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import torchvision


from tqdm.auto import tqdm # for cool loading bar


from sklearn.metrics import f1_score

import zipfile
import os
import copy

import matplotlib.pyplot as plt




In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
device


# Datasets import
downlaod the datasets from google drive (if needed)

saves the dataeset in a temporary datasets folder

In [None]:
# mount the drive where your dataset is availabledevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from google.colab import drive
drive.mount('/content/drive')
filepath='/content/drive/MyDrive/datasets/multimodal_product_classification/' # add your own path. Where to save the dataset


In [None]:
if not os.path.exists('datasets'):
  os.makedirs('datasets')
  with zipfile.ZipFile(filepath+'images.zip', 'r') as zip_ref:
      zip_ref.extractall('datasets')

In [None]:
class ImageDataLoader(torchvision.datasets.ImageFolder):
   def __init__(self, root, transform=None, target_transform=None):
       # Define the transformations
       self.transform = torchvision.transforms.Compose([
           torchvision.transforms.ToTensor(),
           torchvision.transforms.Normalize((0.5,), (0.5,))
       ])
       super().__init__(root, transform=self.transform, target_transform=target_transform)


In [None]:
dataset = ImageDataLoader('datasets/images/image_train')

In [None]:
train_size = int(train_percentage * len(dataset))
valid_size = int(test_percentage * len(dataset))
test_size = len(dataset) - train_size - valid_size

In [None]:
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
plt.figure(figsize=(15, 15))
for index, (images, labels) in enumerate(train_dataloader):
  for i in range(25):
   # Your training code here
    plt.subplot(5,5,i+1)
    plt.imshow(images[i].permute(1, 2, 0).numpy())
    plt.title(labels[i])
  break
plt.show()

# model definition


In [None]:
dataloaders={
    'train':train_dataloader,
    'val':valid_dataloader,
    'test':test_dataloader
    }

In [None]:
model = torchvision.models.resnet18(weights="IMAGENET1K_V1")

# Freeze parameters so we don't backprop through them
for param in model.parameters():
   param.requires_grad = False

# Replace the last fully-connected layer
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, num_classes) # num_classes is the number of classes in your dataset

model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()

# All parameters are being optimized
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
def calculate_weighted_f1_score(y_true, y_pred):
  return f1_score(y_true, y_pred, average='weighted')

In [None]:
def plot_loss(train_losses,val_losses):
  plt.figure(figsize=(12, 6))
  plt.plot(train_losses, label='Training Loss')
  plt.plot(val_losses, label='Validation Loss')
  plt.title('Training and Validation Loss')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.legend()
  plt.grid(True)
  plt.show()


In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=15):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    epoch_losses = {'train': [], 'val': []}
    epoch_accs = {'train': [], 'val': []}
    epoch=0
    loop_on_epoch=tqdm(range(num_epochs))
    for epoch in loop_on_epoch:
        loop_on_epoch.set_description("Train -> Loss: {:.4f} Acc: {:.4f} | Val -> {:.4f} Acc: {:.4f}".format(epoch_losses["train"][-1],epoch_accs["train"][-1],epoch_losses["val"][-1],epoch_accs["val"][-1])if epoch>=1 else "first epoch")
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train() # Set model to training mode
            else:
                model.eval() # Set model to evaluate mode

            running_loss = 0.0
            running_preds=np.array([])
            running_labels=np.array([])


            # Iterate over data.
            loop_on_batch=tqdm(dataloaders[phase])
            for inputs, labels in loop_on_batch:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)#batch size*actual loss
                running_preds=np.append(running_preds,preds.cpu().numpy())
                running_labels=np.append(running_labels,labels.data.cpu().numpy())
                loop_on_batch.set_description('{}| Loss: {:.4f}'.format(phase, loss.item()))


            #End of phase Epoch (train or validation)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc=calculate_weighted_f1_score(running_preds,running_labels)

            epoch_losses[phase]=np.append(epoch_losses[phase],epoch_loss)
            epoch_accs[phase]=np.append(epoch_accs[phase],epoch_acc)

            if phase == 'train':
                scheduler.step()

        #End of Epoch (train+validation epoch)
        plot_loss(epoch_losses["train"],epoch_losses["val"])#plot the loss at each epoch

        if epoch==10: #Unfreeze the model after the 10th epoch
            for param in model.parameters():
                param.requires_grad = True
        # deep copy the model
        if phase == 'val' and epoch_acc > best_acc:

            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), '/content/drive/MyDrive/Lessons/Models/multimodal_classification/' + 'Resnet16-best'+str(epoch)+'.ckpt')


        plot_loss(epoch_losses["train"],epoch_losses["val"])#plot the loss at each epoch

    print('Best val Acc: {:4f}'.format(best_acc))
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, epoch_losses, epoch_accs

In [None]:
model=train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=15)

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/272 [00:00<?, ?it/s]

In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Lessons/Models/multimodal_classification/' + 'Resnet18-best.ckpt')


In [None]:
# model = torchvision.models.resnet18(weights="IMAGENET1K_V1")

# # Freeze parameters so we don't backprop through them
# for param in model.parameters():
#    param.requires_grad = False

# # Replace the last fully-connected layer
# num_ftrs = model.fc.in_features
# model.fc = torch.nn.Linear(num_ftrs, num_classes) # num_classes is the number of classes in your dataset

# model = model.to(device)
# model.load_state_dict(torch.load("/content/drive/MyDrive/Lessons/Models/multimodal_classification/Image-epoch0006.ckpt",map_location=device))

In [None]:
model.eval() # Set model to evaluation mode
predictions = []
true_labels = []

for inputs, labels in tqdm(test_dataloader):
   inputs = inputs.to(device)
   labels = labels.to(device)

   # Forward pass
   outputs = model(inputs)
   _, preds = torch.max(outputs, 1)

   predictions.extend(preds.cpu().numpy())
   true_labels.extend(labels.cpu().numpy())

# Now you have the predictions and true labels, you can calculate metrics like accuracy, precision, recall etc.


In [None]:
predictions=np.array(predictions)

In [None]:
true_labels=np.array(true_labels)

In [None]:
sum(predictions-true_labels!=0)/len(predictions)