In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import importlib

import scripts.preprocessing as preprocessing
importlib.reload(preprocessing)

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torchsummary
import torch.optim as optim
import scripts.models as models
import tqdm
import mlflow
import mlflow.pytorch


importlib.reload(models)

<module 'scripts.models' from '/home/ronin/Dev/notebooks/machinelearningformodeling/supervised/project/scripts/models.py'>

In [2]:
model = models.SqueezeNet()

In [3]:
torchsummary.summary(model, (3, 224, 224));

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 13, 13]         --
|    └─Conv2d: 2-1                       [-1, 64, 111, 111]        1,792
|    └─ReLU: 2-2                         [-1, 64, 111, 111]        --
|    └─MaxPool2d: 2-3                    [-1, 64, 55, 55]          --
|    └─Fire: 2-4                         [-1, 128, 55, 55]         --
|    |    └─Conv2d: 3-1                  [-1, 16, 55, 55]          1,040
|    |    └─ReLU: 3-2                    [-1, 16, 55, 55]          --
|    |    └─Conv2d: 3-3                  [-1, 64, 55, 55]          1,088
|    |    └─ReLU: 3-4                    [-1, 64, 55, 55]          --
|    |    └─Conv2d: 3-5                  [-1, 64, 55, 55]          9,280
|    |    └─ReLU: 3-6                    [-1, 64, 55, 55]          --
|    └─Fire: 2-5                         [-1, 128, 55, 55]         --
|    |    └─Conv2d: 3-7                  [-1, 16, 55, 55]          2,064


In [46]:
# Load data
#folder_structure = preprocessing.create_dataset()
# transform it in a dataframe and list the number of images per class in the folders
a = pd.DataFrame([(k, len(v)) for k,v in folder_structure[0].items()], 
                    columns=['class', 'count'])
b = pd.DataFrame([(k, len(v)) for k,v in folder_structure[1].items()], 
                    columns=['class', 'count'])
image_counts = pd.merge(a, 
                        b, 
                        on='class', 
                        how='outer', 
                        suffixes=('_train', '_test'))

Populating data/train
Populating data/test


In [35]:
image_counts.head()

Unnamed: 0,class,count_train,count_test
0,dolmas,546,42
1,coquilles_saint_jacques,518,52
2,veal_cordon_bleu,277,23
3,shirred_egg,440,42
4,barbecued_wing,602,34


In [36]:
image_counts.loc[np.argmin(image_counts['count_train']),:]

class          marble_cake
count_train             34
count_test              49
Name: 120, dtype: object

In [37]:
#preprocessing.create_validation(42);

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

transform = transforms.Compose([
    # resize 
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    # Normalize pixel values
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the training dataset
trainset = torchvision.datasets.ImageFolder(root='data/train', transform=transform)

# Create data loader for training data with batch size 4 and shuffling
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)

valset = torchvision.datasets.ImageFolder(root='data/val', transform=transform)

valloader = torch.utils.data.DataLoader(valset, batch_size=16, shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root='data/test', transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)

In [5]:
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [10]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=5e-5, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [7]:
# Training loop
num_epochs = 10

mlflow.start_run()

# Log model parameters
mlflow.log_param("optimizer", "Adam")
mlflow.log_param("learning_rate", 0.001)
mlflow.log_param("batch_size", 32)
mlflow.log_param("num_epochs", 10)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    train_loader_tqdm = tqdm.tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}", 
                             unit="batch")

    for inputs, labels in train_loader_tqdm:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        train_loader_tqdm.set_postfix(loss=running_loss / len(trainloader))

    epoch_loss = running_loss / len(trainloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

    scheduler.step()
    mlflow.log_metric("train_loss", epoch_loss, step=epoch)

    # Validation loop (optional)
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in valloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(valloader)
    val_accuracy = 100 * correct / total
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')
    
    # Log validation loss and accuracy
    mlflow.log_metric("val_loss", val_loss, step=epoch)
    mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)

# Log the model
mlflow.pytorch.log_model(model, "squeezenet_model")

# End the MLflow run
mlflow.end_run()

print('Finished Training')


Epoch 1/10: 100%|██████████| 14810/14810 [24:23<00:00, 10.12batch/s, loss=5.35]

Epoch [1/10], Loss: 5.3523





Validation Loss: 5.1526, Validation Accuracy: 3.14%


Epoch 2/10: 100%|██████████| 14810/14810 [24:28<00:00, 10.09batch/s, loss=5.05]  

Epoch [2/10], Loss: 5.0455





Validation Loss: 4.9290, Validation Accuracy: 5.44%


Epoch 3/10: 100%|██████████| 14810/14810 [21:57<00:00, 11.24batch/s, loss=4.87]  

Epoch [3/10], Loss: 4.8736





Validation Loss: 4.7826, Validation Accuracy: 7.21%


Epoch 4/10: 100%|██████████| 14810/14810 [23:40<00:00, 10.43batch/s, loss=4.74]  

Epoch [4/10], Loss: 4.7403





Validation Loss: 4.6470, Validation Accuracy: 9.03%


Epoch 5/10: 100%|██████████| 14810/14810 [22:02<00:00, 11.20batch/s, loss=4.63]

Epoch [5/10], Loss: 4.6307





Validation Loss: 4.6337, Validation Accuracy: 9.03%


Epoch 6/10: 100%|██████████| 14810/14810 [22:03<00:00, 11.19batch/s, loss=4.52]  

Epoch [6/10], Loss: 4.5229





Validation Loss: 4.4446, Validation Accuracy: 11.57%


Epoch 7/10: 100%|██████████| 14810/14810 [22:03<00:00, 11.19batch/s, loss=4.43]

Epoch [7/10], Loss: 4.4255





Validation Loss: 4.3907, Validation Accuracy: 12.06%


Epoch 8/10: 100%|██████████| 14810/14810 [21:56<00:00, 11.25batch/s, loss=4.34]

Epoch [8/10], Loss: 4.3449





Validation Loss: 4.2601, Validation Accuracy: 14.07%


Epoch 9/10: 100%|██████████| 14810/14810 [22:01<00:00, 11.21batch/s, loss=4.26]

Epoch [9/10], Loss: 4.2583





Validation Loss: 4.1684, Validation Accuracy: 15.04%


Epoch 10/10: 100%|██████████| 14810/14810 [22:04<00:00, 11.19batch/s, loss=4.17]

Epoch [10/10], Loss: 4.1726





Validation Loss: 4.0651, Validation Accuracy: 16.89%
Finished Training


In [4]:
## parametrized training loop
def train_model(model, model_name, trainloader, valloader, criterion, optimizer, scheduler, num_epochs=10, device='cpu'):
    mlflow.start_run(run_name=model_name)

    # Log model parameters
    mlflow.log_param("optimizer", optimizer.__class__.__name__)
    mlflow.log_param("learning_rate", optimizer.param_groups[0]['lr'])
    mlflow.log_param("batch_size", trainloader.batch_size)
    mlflow.log_param("num_epochs", num_epochs)

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        train_loader_tqdm = tqdm.tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")

        for inputs, labels in train_loader_tqdm:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Print statistics
            running_loss += loss.item()
            train_loader_tqdm.set_postfix(loss=running_loss / len(trainloader))

        epoch_loss = running_loss / len(trainloader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

        scheduler.step()
        mlflow.log_metric("train_loss", epoch_loss, step=epoch)

        # Validation loop (optional)
        model.eval()  # Set model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in valloader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(valloader)
        val_accuracy = 100 * correct / total
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        # Log validation loss and accuracy
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)

    # Log the model
    mlflow.pytorch.log_model(model, f"{model_name}_model")

    # End the MLflow run
    mlflow.end_run()

    print('Finished Training')


In [None]:

# Example usage:
# model = ...  # Your model definition
# trainloader = ...  # Your training data loader
# valloader = ...  # Your validation data loader
# criterion = ...  # Your loss function
# optimizer = ...  # Your optimizer
# scheduler = ...  # Your learning rate scheduler
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# train_model(model, "squeezenet", trainloader, valloader, criterion, optimizer, scheduler, num_epochs=10, device=device)



In [11]:
model_from_scratch = models.SqueezeNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

mlflow.end_run()
train_model(model_from_scratch,
            "netFromScratch", 
            trainloader, 
            valloader, 
            criterion, 
            optimizer, 
            scheduler,
            num_epochs=10, 
            device=device)

Epoch 1/10: 100%|██████████| 14810/14810 [21:25<00:00, 11.52batch/s, loss=5.6] 

Epoch [1/10], Loss: 5.6000





Validation Loss: 5.6158, Validation Accuracy: 0.41%


Epoch 2/10: 100%|██████████| 14810/14810 [21:59<00:00, 11.23batch/s, loss=5.6] 

Epoch [2/10], Loss: 5.6001





Validation Loss: 5.6157, Validation Accuracy: 0.40%


Epoch 3/10:  39%|███▉      | 5845/14810 [09:11<14:05, 10.60batch/s, loss=2.21]   


KeyboardInterrupt: 

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
#mlflow.end_run()
model = models.SqueezeNet()
model.load_state_dict(torch.load('netFromSSL.pth'))
train_model(model,
            "netFromSSL",
            trainloader,
            valloader,
            criterion,
            optimizer,
            scheduler,
            num_epochs=4,
            device=device)

Epoch 1/4: 100%|██████████| 14810/14810 [22:05<00:00, 11.18batch/s, loss=5.55]  

Epoch [1/4], Loss: 5.5479





Validation Loss: 5.5522, Validation Accuracy: 0.39%


Epoch 2/4: 100%|██████████| 14810/14810 [21:32<00:00, 11.46batch/s, loss=5.55]

Epoch [2/4], Loss: 5.5481





Validation Loss: 5.5522, Validation Accuracy: 0.39%


Epoch 3/4: 100%|██████████| 14810/14810 [21:28<00:00, 11.50batch/s, loss=5.55]

Epoch [3/4], Loss: 5.5479





Validation Loss: 5.5522, Validation Accuracy: 0.39%


Epoch 4/4: 100%|██████████| 14810/14810 [21:28<00:00, 11.49batch/s, loss=5.55]

Epoch [4/4], Loss: 5.5478





Validation Loss: 5.5522, Validation Accuracy: 0.39%
Finished Training


In [17]:
mlflow.end_run()

In [None]:
#best hyperparameters from github non provato

# transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]),
}

data_dir = 'path_to_foodx251_dataset'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Initialize the model
model = models.squeezenet1_1(pretrained=True)
model.classifier[1] = nn.Conv2d(512, len(class_names), kernel_size=(1,1), stride=(1,1))
model.num_classes = len(class_names)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Training function
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / dataset_sizes['train']
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
        scheduler.step()

        # Validation loop
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in dataloaders['val']:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_loss /= dataset_sizes['val']
        val_accuracy = correct / total
        print(f'Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

    return model

# Train the model
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=25)