In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import importlib
import scripts.preprocessing as preprocessing
importlib.reload(preprocessing)
import torch
import torchvision
from torchvision import transforms
import torchsummary
import torch.optim as optim
import scripts.models as models
from utils import *

In [3]:
# check if CUDA is available
device = use_GPU()

CUDA is available!  Training on GPU ...
cuda:0


In [7]:
model = models.SqueezeNet()
model.to(device);

In [8]:
torchsummary.summary(model, (3, 256, 256));

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 127, 127]           1,792
              ReLU-2         [-1, 64, 127, 127]               0
         MaxPool2d-3           [-1, 64, 63, 63]               0
            Conv2d-4           [-1, 16, 63, 63]           1,040
              ReLU-5           [-1, 16, 63, 63]               0
            Conv2d-6           [-1, 64, 63, 63]           1,088
              ReLU-7           [-1, 64, 63, 63]               0
            Conv2d-8           [-1, 64, 63, 63]           9,280
              ReLU-9           [-1, 64, 63, 63]               0
             Fire-10          [-1, 128, 63, 63]               0
           Conv2d-11           [-1, 16, 63, 63]           2,064
             ReLU-12           [-1, 16, 63, 63]               0
           Conv2d-13           [-1, 64, 63, 63]           1,088
             ReLU-14           [-1, 64,

In [9]:
# Load data
folder_structure = preprocessing.create_dataset()

Populating data/train
Populating data/test


In [10]:
# transform it in a dataframe
a = pd.DataFrame([(k, len(v)) for k,v in folder_structure[0].items()], columns=['class', 'count'])
b = pd.DataFrame([(k, len(v)) for k,v in folder_structure[1].items()], columns=['class', 'count'])
image_counts = pd.merge(a, b, on='class', how='outer', suffixes=('_train', '_test'))

In [11]:
image_counts

Unnamed: 0,class,count_train,count_test
0,adobo,498,43
1,ambrosia_food,569,35
2,apple_pie,549,41
3,apple_turnover,504,43
4,applesauce,484,39
...,...,...,...
246,vol_au_vent,447,57
247,waffle,376,44
248,welsh_rarebit,319,41
249,wonton,513,48


In [13]:
image_counts.loc[np.argmin(image_counts['count_train']),:]

class          marble_cake
count_train             34
count_test              49
Name: 143, dtype: object

In [14]:
preprocessing.create_validation(42);

In [15]:
transform = transforms.Compose([
    # Convert PIL images to PyTorch tensors
    transforms.ToTensor(),
    # Normalize pixel values
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # Resize images to a common size
    transforms.Resize(size=(64,64)) #forse da ridurre? abbiamo troppe immagini
])

# Load the training dataset
trainset = torchvision.datasets.ImageFolder(root='data/train', transform=transform)

# Create data loader for training data with batch size 4 and shuffling
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)

valset = torchvision.datasets.ImageFolder(root='data/val', transform=transform)

valloader = torch.utils.data.DataLoader(valset, batch_size=16, shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root='data/test', transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)

In [16]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class EfficientCNN(nn.Module):
    def __init__(self, num_classes=251):
        super(EfficientCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        
        # Batch Normalization layers
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        
        # Fully connected layers
        self.fc1 = nn.Linear(256 * 4 , 512)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Convolutional layers with ReLU and MaxPooling
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.max_pool2d(x, 4)
        
        # Flatten the tensor
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [18]:
model = EfficientCNN()
model.to(device)

EfficientCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=251, bias=True)
)

In [19]:
# Print model summary
torchsummary.summary(model, (3, 64, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 64, 64]             896
       BatchNorm2d-2           [-1, 32, 64, 64]              64
            Conv2d-3           [-1, 64, 32, 32]          18,496
       BatchNorm2d-4           [-1, 64, 32, 32]             128
            Conv2d-5          [-1, 128, 16, 16]          73,856
       BatchNorm2d-6          [-1, 128, 16, 16]             256
            Conv2d-7            [-1, 256, 8, 8]         295,168
       BatchNorm2d-8            [-1, 256, 8, 8]             512
            Linear-9                  [-1, 512]         524,800
           Linear-10                  [-1, 251]         128,763
Total params: 1,042,939
Trainable params: 1,042,939
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.05
Forward/backward pass size (MB): 3.76
Params size (MB): 3.98
Estimat

In [29]:
## train the model
epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
patience = 5


In [30]:
train_loss=[] # store the training loss

best_val_loss = np.inf # initialize the best validation loss to infinity for patience
patience_counter = 0 # initialize the counter for patience

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    epoch_loss = 0.0

    for i, data in enumerate(trainloader):
        # Get the inputs
        inputs, labels = data
        # Move the inputs and labels to the device
        inputs, labels = inputs.to(device), labels.to(device)

        # clear the gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(inputs)
        # Calculate the loss
        loss = criterion(outputs, labels)
        # Backward pass
        loss.backward()
        # Update the weights
        optimizer.step()
        # Add the loss to the training set's running loss
        running_loss += loss.item()
        epoch_loss += loss.item()
        # we  keep the variable separated because we want to print the running loss 
        #every 2000 mini-batches and len(trainloader)%2000 is not 0

        # Print the loss every 2000 mini-batches
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        
    # Add the epoch loss to the training loss list
    train_loss.append(epoch_loss/len(trainloader))
    if(epoch_loss/len(trainloader) < best_val_loss):
        best_val_loss = epoch_loss/len(trainloader)
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter > patience:
            print("Early stopping at epoch ", epoch)
            break
        
    # Step the scheduler  
    scheduler.step()

[1,  2000] loss: 5.349
[1,  4000] loss: 5.341
[1,  6000] loss: 5.328
[1,  8000] loss: 5.333
[1, 10000] loss: 5.323
[2,  2000] loss: 5.293
[2,  4000] loss: 5.293
[2,  6000] loss: 5.288
[2,  8000] loss: 5.265
