In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import importlib
import scripts.preprocessing as preprocessing
importlib.reload(preprocessing)
import torch
import torchvision
from torchvision import transforms
import torchsummary
import torch.optim as optim
import scripts.models as models

In [15]:
model = models.SqueezeNet()

In [19]:
torchsummary.summary(model, (3, 256, 256));

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 15, 15]         --
|    └─Conv2d: 2-1                       [-1, 64, 127, 127]        1,792
|    └─ReLU: 2-2                         [-1, 64, 127, 127]        --
|    └─MaxPool2d: 2-3                    [-1, 64, 63, 63]          --
|    └─Fire: 2-4                         [-1, 128, 63, 63]         --
|    |    └─Conv2d: 3-1                  [-1, 16, 63, 63]          1,040
|    |    └─ReLU: 3-2                    [-1, 16, 63, 63]          --
|    |    └─Conv2d: 3-3                  [-1, 64, 63, 63]          1,088
|    |    └─ReLU: 3-4                    [-1, 64, 63, 63]          --
|    |    └─Conv2d: 3-5                  [-1, 64, 63, 63]          9,280
|    |    └─ReLU: 3-6                    [-1, 64, 63, 63]          --
|    └─Fire: 2-5                         [-1, 128, 63, 63]         --
|    |    └─Conv2d: 3-7                  [-1, 16, 63, 63]          2,064


In [32]:
# Load data
folder_structure = preprocessing.create_dataset()

Populating data/train
Populating data/test


In [34]:
# transform it in a dataframe
a = pd.DataFrame([(k, len(v)) for k,v in folder_structure[0].items()], columns=['class', 'count'])
b = pd.DataFrame([(k, len(v)) for k,v in folder_structure[1].items()], columns=['class', 'count'])
image_counts = pd.merge(a, b, on='class', how='outer', suffixes=('_train', '_test'))

In [35]:
image_counts

Unnamed: 0,class,count_train,count_test
0,dolmas,546,42
1,coquilles_saint_jacques,518,52
2,veal_cordon_bleu,277,23
3,shirred_egg,440,42
4,barbecued_wing,602,34
...,...,...,...
246,chicken_wing,508,43
247,grilled_cheese_sandwich,541,69
248,beef_carpaccio,478,53
249,hot_dog,332,43


In [36]:
image_counts.loc[np.argmin(image_counts['count_train']),:]

class          marble_cake
count_train             34
count_test              49
Name: 120, dtype: object

In [38]:
preprocessing.create_validation(42);

In [97]:
transform = transforms.Compose([
    # Convert PIL images to PyTorch tensors
    transforms.ToTensor(),
    # Normalize pixel values
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    # Resize images to a common size
    transforms.Resize(size=(64,64)) #forse da ridurre? abbiamo troppe immagini
])

# Load the training dataset
trainset = torchvision.datasets.ImageFolder(root='data/train', transform=transform)

# Create data loader for training data with batch size 4 and shuffling
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)

valset = torchvision.datasets.ImageFolder(root='data/val', transform=transform)

valloader = torch.utils.data.DataLoader(valset, batch_size=16, shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root='data/test', transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)

In [79]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class EfficientCNN(nn.Module):
    def __init__(self, num_classes=251):
        super(EfficientCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        
        # Batch Normalization layers
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        
        # Fully connected layers
        self.fc1 = nn.Linear(256 * 4 , 512)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Convolutional layers with ReLU and MaxPooling
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)
        
        x = F.relu(self.bn4(self.conv4(x)))
        x = F.max_pool2d(x, 4)
        
        # Flatten the tensor
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [80]:
model = EfficientCNN()

In [81]:
# Print model summary
torchsummary.summary(model, (3, 64, 64))

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 32, 64, 64]          896
├─BatchNorm2d: 1-2                       [-1, 32, 64, 64]          64
├─Conv2d: 1-3                            [-1, 64, 32, 32]          18,496
├─BatchNorm2d: 1-4                       [-1, 64, 32, 32]          128
├─Conv2d: 1-5                            [-1, 128, 16, 16]         73,856
├─BatchNorm2d: 1-6                       [-1, 128, 16, 16]         256
├─Conv2d: 1-7                            [-1, 256, 8, 8]           295,168
├─BatchNorm2d: 1-8                       [-1, 256, 8, 8]           512
├─Linear: 1-9                            [-1, 512]                 524,800
├─Linear: 1-10                           [-1, 251]                 128,763
Total params: 1,042,939
Trainable params: 1,042,939
Non-trainable params: 0
Total mult-adds (M): 60.82
Input size (MB): 0.05
Forward/backward pass size (MB): 3.76
Params size (MB): 3.98
Estimated To

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 32, 64, 64]          896
├─BatchNorm2d: 1-2                       [-1, 32, 64, 64]          64
├─Conv2d: 1-3                            [-1, 64, 32, 32]          18,496
├─BatchNorm2d: 1-4                       [-1, 64, 32, 32]          128
├─Conv2d: 1-5                            [-1, 128, 16, 16]         73,856
├─BatchNorm2d: 1-6                       [-1, 128, 16, 16]         256
├─Conv2d: 1-7                            [-1, 256, 8, 8]           295,168
├─BatchNorm2d: 1-8                       [-1, 256, 8, 8]           512
├─Linear: 1-9                            [-1, 512]                 524,800
├─Linear: 1-10                           [-1, 251]                 128,763
Total params: 1,042,939
Trainable params: 1,042,939
Non-trainable params: 0
Total mult-adds (M): 60.82
Input size (MB): 0.05
Forward/backward pass size (MB): 3.76
Params size (MB): 3.98
Estimated To

In [87]:
## train the model
epochs = 1
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)


In [104]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
    scheduler.step()

[1,  2000] loss: 5.520
[1,  4000] loss: 5.516
[1,  6000] loss: 5.514
[1,  8000] loss: 5.511
