In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import importlib
import sys

# Determine the environment and import preprocessing module accordingly
def is_kaggle():
    return 'KAGGLE_KERNEL_RUN_TYPE' in os.environ

if is_kaggle():
    print("Running on Kaggle")
    # Assuming 'preprocessing.py' and other scripts are in '/kaggle/input'
    kaggle_input_path = '/kaggle/usr/lib'
    #sys.path.append(kaggle_input_path)
    
    #import preprocessing_py.preprocessing_py as preprocessing
    #import models_py.models_py as models
    #import utils_py.utils_py as utils
   
    
    # Install missing libraries on kaggle
    ! pip install torchsummary
    ! pip install mlflow
else:
    print("Running locally")
    import scripts.preprocessing as preprocessing
    import scripts.models as models
    import scripts.utils as utils
    
    
    
# Reload the module (if necessary)
#importlib.reload(preprocessing)
#importlib.reload(models)
#importlib.reload(utils)

# Other imports
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torchsummary
import torch.optim as optim

import tqdm
import mlflow
import mlflow.pytorch

Running on Kaggle
Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
Collecting mlflow
  Downloading mlflow-2.13.2-py3-none-any.whl.metadata (29 kB)
Collecting cachetools<6,>=5.0.0 (from mlflow)
  Downloading cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting querystring-parser<2 (from mlflow)
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl.metadata (559 bytes)
Collecting gunicorn<23 (from mlflow)
  Downloading gunicorn-22.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.3-py3-none-any.whl.metadata (10 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading g

In [2]:
PATH = "/kaggle/usr/lib/models_py/models_py.py"
sys.path.insert(1, PATH)
import models_py as models

In [3]:
PATH = "/kaggle/usr/lib/utils_py/utils_py.py"
sys.path.insert(1, PATH)
import utils_py as utils

In [4]:
PATH = "/kaggle/usr/lib/preprocessing_py/preprocessing_py.py"
sys.path.insert(1, PATH)
import preprocessing_py as preprocessing

In [5]:
importlib.reload(preprocessing)
importlib.reload(models)
importlib.reload(utils)

<module 'utils_py' (<_frozen_importlib_external._NamespaceLoader object at 0x7fc6737dc2b0>)>

In [6]:
device = 'cuda' #utils.use_GPU()

In [7]:
class FireStorm(nn.Module):
    # This model is based on Fire module from SqueezeNet with the addition of BatchNorm 
    # and the change of ReLU to LeakyReLU

    # autoencoder
    def __init__(
        self,
        inplanes: int,
        squeeze_planes: int,
        expand1x1_planes: int,
        expand3x3_planes: int,
        
    ) -> None:
        super().__init__()
        self.inplanes = inplanes

        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
        self.squeeze_activation = nn.LeakyReLU(inplace=True)
        self.squeeze_bn = nn.BatchNorm2d(squeeze_planes)
       
        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)
        self.expand1x1_activation = nn.LeakyReLU(inplace=True)
        self.expand1x1_bn = nn.BatchNorm2d(expand1x1_planes)
        
        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)
        self.expand3x3_activation = nn.LeakyReLU(inplace=True)
        self.expand3x3_bn = nn.BatchNorm2d(expand3x3_planes)

    def forward(self, x):
        x = self.squeeze_bn(self.squeeze_activation(self.squeeze(x)))
        return torch.cat([
            self.expand1x1_bn(
                self.expand1x1_activation(self.expand1x1(x))
            ),
            self.expand3x3_bn(
                self.expand3x3_activation(self.expand3x3(x)) 
            ),
            ], 1)
            
        


class StormModel2(nn.Module):
    # version with more parameters

    def __init__(self, num_classes: int = 251, dropout: float = 0.5) -> None:
        super().__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            
            nn.Conv2d(3, 64, kernel_size=3, stride=2),
            nn.LeakyReLU(inplace=True),

            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

            FireStorm(64, 16, 64, 64),
            FireStorm(128, 16, 64, 64),

            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),

            FireStorm(128, 32, 128, 128),
            FireStorm(256, 32, 128, 128),
            FireStorm(256, 48, 192, 192),

            nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
      
            FireStorm(384, 64, 192, 192),
            FireStorm(384, 64, 256, 256),  # added module
            
        )


        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(512, 512),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(512, self.num_classes),
        )
       
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.classifier(x)
        return torch.flatten(x, 1)



In [8]:
if is_kaggle():
    path_SSL = "/kaggle/input/ssl/pytorch/uploaded/1"
    path_Storm = "/kaggle/input/stormmodelpretrained/pytorch/dict-gruccia/1"
else:
    path_SSL = "."
    path_Storm = "."
    

#model = models.StormModel()
#StormModel = model.to(device)


#model = torch.load(os.path.join(path_Storm,"StormModel1.0.pth"))
#StormModel30 = model.to(device)


#model = models.StormModel()  # 
#model = StormModel2()
#StormModel2 = model.to(device)
#model.load_state_dict(torch.load(os.path.join(path_SSL,"StormModelPretrained.pth")))
#model.load_state_dict(torch.load("/kaggle/input/stormmodelpretrained/pytorch/dict-gruccia/1/StormModelPretrained.pth"))
#SSLmodel = model.to(device)


model = models.StormModel()
#model.to(device)
model.load_state_dict(torch.load("/kaggle/input/jigsawpretrain/pytorch/same_data/1/MAINjigsawpretrained_same_data.pth"))
#model.load_state_dict(torch.load("/kaggle/input/jigsawpretrain/pytorch/first/1/MAINjigsawpretrained.pth"))
#model=torch.load("/kaggle/input/final_pretrained/pytorch/samedataset_norm_12ep/1/SSLmodel_pretrained.pth")
SSL_StormModel = model.to(device)

In [9]:
torchsummary.summary(model, (3, 224, 224));

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 111, 111]           1,792
         LeakyReLU-2         [-1, 64, 111, 111]               0
         MaxPool2d-3           [-1, 64, 55, 55]               0
            Conv2d-4           [-1, 16, 55, 55]           1,040
         LeakyReLU-5           [-1, 16, 55, 55]               0
       BatchNorm2d-6           [-1, 16, 55, 55]              32
            Conv2d-7           [-1, 64, 55, 55]           1,088
         LeakyReLU-8           [-1, 64, 55, 55]               0
       BatchNorm2d-9           [-1, 64, 55, 55]             128
           Conv2d-10           [-1, 64, 55, 55]           9,280
        LeakyReLU-11           [-1, 64, 55, 55]               0
      BatchNorm2d-12           [-1, 64, 55, 55]             128
        FireStorm-13          [-1, 128, 55, 55]               0
           Conv2d-14           [-1, 16,

In [10]:
# Load data
if not is_kaggle():

    # check if the train folder is  already created
    if not os.path.exists('data/train'):
        folder_structure = preprocessing.create_dataset()
        # transform it in a dataframe and list the number of images per class in the folders
        a = pd.DataFrame([(k, len(v)) for k,v in folder_structure[0].items()], 
                            columns=['class', 'count'])
        b = pd.DataFrame([(k, len(v)) for k,v in folder_structure[1].items()], 
                            columns=['class', 'count'])
        image_counts = pd.merge(a, 
                                b, 
                                on='class', 
                                how='outer', 
                                suffixes=('_train', '_test'))
        
        
        image_counts.loc[np.argmin(image_counts['count_train']),:]
        # create a validation set
        preprocessing.create_validation(42);

In [11]:
if is_kaggle():
    im_dir = '/kaggle/input/food-dataset-sl/'
else:
    im_dir ='.'  

In [12]:
im_dir

'/kaggle/input/food-dataset-sl/'

In [13]:
transform = transforms.Compose([
    # resize 
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    #transforms.RandomCrop(224),
    transforms.ToTensor(),
    # Normalize pixel values
    transforms.Normalize(mean= [0.6388, 0.5445, 0.4448],  std =  [0.2713, 0.2864, 0.3131]),
])


# Load the training dataset
trainset = torchvision.datasets.ImageFolder(root=os.path.join(im_dir,'data/train'), transform=transform)

# Create data loader for training data with batch size 4 and shuffling
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=4)

valset = torchvision.datasets.ImageFolder(root=os.path.join(im_dir,'data/val'), transform=transform)

valloader = torch.utils.data.DataLoader(valset, batch_size=16, shuffle=True, num_workers=4)

testset = torchvision.datasets.ImageFolder(root=os.path.join(im_dir,'data/test'), transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=4)


In [14]:
# Define criterion, optimizer and scheduler and other parameters for training
opt = "Adam"                        # optimizer to be used: ["Adam" or "SGD"]
momentum = 0.9                      # momentum ONLY for SGD optimizer
weight_decay = 1e-3                 # weight decay ONLY on Adam optimizer
step_size = 7                       # step size for the scheduler
gamma = 0.1                         # gamma for the scheduler

batch_size = 8                      # batch size
num_epochs=10                       # number of epochs
patience = 3                        # patience for early stopping
criterion ="CrossEntropyLoss"       # loss function to be used: ["CrossEntropyLoss", "MSELoss", "L1Loss", "NLLLoss"]
lr = 1e-4                           # learning rate

model_name = "squeezenet"           # model name
model = model       # model



#set the optimizer
if opt == "Adam":
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
elif opt == "SGD":
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
else:
    print("Invalid optimizer")

#set the criterion
if criterion == "CrossEntropyLoss":
    criterion = nn.CrossEntropyLoss()
elif criterion == "MSELoss":
    criterion = nn.MSELoss()
elif criterion == "L1Loss":
    criterion = nn.L1Loss()
elif criterion == "NLLLoss":
    criterion = nn.NLLLoss()
else:
    print("Invalid criterion")

#set the scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

# Upload model to correct device
model = model.to(device)

In [15]:
def set_training_parameters(model, model_name, opt, lr, weight_decay, momentum, criterion, step_size, gamma, num_epochs, patience, device):

    #set the optimizer
    if opt == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif opt == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    else:
        print("Invalid optimizer")

    #set the criterion
    if criterion == "CrossEntropyLoss":
        criterion = nn.CrossEntropyLoss()
    elif criterion == "MSELoss":
        criterion = nn.MSELoss()
    elif criterion == "L1Loss":
        criterion = nn.L1Loss()
    elif criterion == "NLLLoss":
        criterion = nn.NLLLoss()
    else:
        print("Invalid criterion")

    #set the scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    # Upload model to correct device
    model = model.to(device)
    
    training_parameters = {}
    training_parameters['model'] = model
    training_parameters['model_name'] = model_name
    training_parameters['criterion'] = criterion
    training_parameters['optimizer'] = optimizer
    training_parameters['scheduler'] = scheduler
    training_parameters['num_epochs'] = num_epochs
    training_parameters['patience'] = patience
    
    return training_parameters

In [16]:
# Training function based on above parameters
def train_model(model, model_name, trainloader, valloader, criterion, optimizer, scheduler, num_epochs=10, patience=3 ):

    mlflow.start_run(run_name=model_name)

    # Log model parameters
    mlflow.log_param("optimizer", opt)
    mlflow.log_param("learning_rate", lr)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("momentum", momentum)
    mlflow.log_param("weight_decay", weight_decay)
    mlflow.log_param("step_size", step_size)
    mlflow.log_param("gamma", gamma)
    mlflow.log_param("patience", patience)

    starting_step=0

    patience_counter = 0
    best_model = None
    best_loss = np.inf
        
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        train_loader_tqdm = tqdm.tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}", 
                                unit="batch")
        ind_rloss=1
        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Print statistics
            running_loss += loss.item()
            train_loader_tqdm.set_postfix(loss=running_loss / ind_rloss)
            ind_rloss +=1

        epoch_loss = running_loss / len(trainloader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

        scheduler.step()
        mlflow.log_metric("train_loss", epoch_loss, step=starting_step+epoch)
        
        # Validation loop (optional)
        model.eval()  # Set model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in valloader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(valloader)
        val_accuracy = 100 * correct / total
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
        
        # Log validation loss and accuracy
        mlflow.log_metric("val_loss", val_loss, step=starting_step+epoch)
        mlflow.log_metric("val_accuracy", val_accuracy, step=starting_step+epoch)
        
        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter > patience:
                print("Early stopping")
                break
                    
    # Log the model
    mlflow.pytorch.log_model(best_model, model_name)

    # End the MLflow run
    mlflow.end_run()

    print('Finished Training')


In [17]:
def Save_mlruns():
    print("moving to working directory...")
    ! cd /kaggle/working/
    print("zipping directory...")
    ! zip -r mlruns.zip mlruns
    print("!!REMEMBER TO DOWNLOAD IT FROM THE OUTPUT SECTION!!")
    print("back to home directory...")
    ! cd 

In [18]:
# example of training 

# Define criterion, optimizer and scheduler and other parameters for training
opt = "Adam"                        # optimizer to be used: ["Adam" or "SGD"]
momentum = 0.9                      # momentum ONLY for SGD optimizer
weight_decay = 1e-4                 # weight decay ONLY on Adam optimizer
step_size = 20                     # step size for the scheduler
gamma = 0.5                         # gamma for the scheduler

batch_size = 64                      # batch size
num_epochs= 10                     # number of epochs
patience = 5                        # patience for early stopping
criterion ="CrossEntropyLoss"       # loss function to be used: ["CrossEntropyLoss", "MSELoss", "L1Loss", "NLLLoss"]
lr = 1e-4                          # learning rate

model_name = "SSL_Storm_Model_jigsaw"           # model name
model =  SSL_StormModel               # model

tr_param = set_training_parameters(model=model,model_name = model_name, opt=opt, lr=lr, weight_decay=weight_decay, 
                                   momentum=momentum, criterion=criterion, step_size=step_size, gamma=gamma, num_epochs=num_epochs, 
                                   patience=patience, device=device)

#stop eventual mlflow runs
mlflow.end_run()
train_model(**tr_param, trainloader=trainloader, valloader=valloader)

Epoch 1/10: 100%|██████████| 11860/11860 [04:40<00:00, 42.34batch/s, loss=5.2]


Epoch [1/10], Loss: 5.1991
Validation Loss: 4.8452, Validation Accuracy: 4.78%


Epoch 2/10: 100%|██████████| 11860/11860 [04:26<00:00, 44.48batch/s, loss=4.87]

Epoch [2/10], Loss: 4.8680





Validation Loss: 4.6033, Validation Accuracy: 7.11%


Epoch 3/10: 100%|██████████| 11860/11860 [04:25<00:00, 44.59batch/s, loss=4.66]

Epoch [3/10], Loss: 4.6566





Validation Loss: 4.3450, Validation Accuracy: 10.15%


Epoch 4/10: 100%|██████████| 11860/11860 [04:25<00:00, 44.59batch/s, loss=4.48]

Epoch [4/10], Loss: 4.4818





Validation Loss: 4.1744, Validation Accuracy: 12.62%


Epoch 5/10: 100%|██████████| 11860/11860 [04:25<00:00, 44.63batch/s, loss=4.35]

Epoch [5/10], Loss: 4.3464





Validation Loss: 4.0580, Validation Accuracy: 14.36%


Epoch 6/10: 100%|██████████| 11860/11860 [04:23<00:00, 44.98batch/s, loss=4.23]

Epoch [6/10], Loss: 4.2349





Validation Loss: 3.9597, Validation Accuracy: 15.98%


Epoch 7/10: 100%|██████████| 11860/11860 [04:25<00:00, 44.75batch/s, loss=4.14]

Epoch [7/10], Loss: 4.1386





Validation Loss: 3.8763, Validation Accuracy: 17.00%


Epoch 8/10: 100%|██████████| 11860/11860 [04:30<00:00, 43.81batch/s, loss=4.05]

Epoch [8/10], Loss: 4.0500





Validation Loss: 3.7933, Validation Accuracy: 18.56%


Epoch 9/10: 100%|██████████| 11860/11860 [04:26<00:00, 44.58batch/s, loss=3.97]

Epoch [9/10], Loss: 3.9673





Validation Loss: 3.7512, Validation Accuracy: 19.31%


Epoch 10/10: 100%|██████████| 11860/11860 [04:22<00:00, 45.10batch/s, loss=3.9]

Epoch [10/10], Loss: 3.9032





Validation Loss: 3.6525, Validation Accuracy: 20.99%
Finished Training
