# Convolutional Neural Networks

In this task you are suppose to implement a convolutional neural network using a high level library, e.g. PyTorch.
The classification should be about food.
* Download the food 11 dataset https://mmspg.epfl.ch/food-image-datasets or https://www.kaggle.com/vermaavi/food11/data
* Predict the 11 classes: Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit
* Try some standard networks convolutional networks before more complex ones.
* Hint: Start with a subset of the dataset
* Choose the network architecture with care.
* Train and validate all algorithms.
* Make the necessary assumptions.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from PIL import Image
import os
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np


# Step 1: Load and preprocess the dataset
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


class FoodDataset(Dataset):
   def __init__(self, root_dir, transform=None):
       self.root_dir = root_dir
       self.transform = transform
       self.file_list = os.listdir(root_dir)

   def __len__(self):
       return len(self.file_list)

   def __getitem__(self, idx):
       img_name = os.path.join(self.root_dir, self.file_list[idx])
       image = Image.open(img_name)
       if self.transform:
           image = self.transform(image)
       label = int(self.file_list[idx].split("_")[0])
       return image, label


train_dir = 'food-11/training'
val_dir = 'food-11/validation'

train_dataset = FoodDataset(train_dir, transform=transform)
val_dataset = FoodDataset(val_dir, transform=transform)

# Calculate the number of samples to use for training (10%)
n_train = len(train_dataset)
indices = list(range(n_train))
np.random.shuffle(indices)
split = int(np.floor(0.1 * n_train))

# Get the random indices for the subset
train_idx = indices[:split]

# Create a sampler
train_sampler = SubsetRandomSampler(train_idx)


#train_loader = DataLoader(train_dataset, batch_size=32, sampler=train_sampler)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [12]:
# Step 2: Define the CNN architecture with Residual Connections
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.relu(out)
        out = self.conv2(out)
        out += identity  # Adding the identity (skip connection)
        out = self.relu(out)
        return out


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.res1 = ResidualBlock(32, 32)  # Residual Block
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.res2 = ResidualBlock(64, 64)  # Residual Block
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 11)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.res1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.res2(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x


# Step 3: Train the CNN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_correct = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        train_loss += loss.item() * images.size(0)
        train_correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_dataset)
    train_accuracy = train_correct / len(train_dataset)

    model.eval()
    test_loss = 0
    test_correct = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            test_loss += loss
            
    print('Epoch: {} \tTraining Loss: {:.6f} \tTraining Accuracy: {:.6f} \tTest Loss: {:.6f}'.format(
        epoch + 1, train_loss, train_accuracy, test_loss / len(val_dataset)))

Epoch: 1 	Training Loss: 2.122848 	Training Accuracy: 0.242347 	Test Loss: 0.061338
Epoch: 2 	Training Loss: 1.827335 	Training Accuracy: 0.351308 	Test Loss: 0.053971
Epoch: 3 	Training Loss: 1.532036 	Training Accuracy: 0.462599 	Test Loss: 0.052508
Epoch: 4 	Training Loss: 1.181444 	Training Accuracy: 0.595074 	Test Loss: 0.056330
Epoch: 5 	Training Loss: 0.714246 	Training Accuracy: 0.758463 	Test Loss: 0.063680
Epoch: 6 	Training Loss: 0.284513 	Training Accuracy: 0.910197 	Test Loss: 0.086689
Epoch: 7 	Training Loss: 0.108634 	Training Accuracy: 0.966045 	Test Loss: 0.120588
Epoch: 8 	Training Loss: 0.061455 	Training Accuracy: 0.978613 	Test Loss: 0.133979
Epoch: 9 	Training Loss: 0.076802 	Training Accuracy: 0.974052 	Test Loss: 0.135566
Epoch: 10 	Training Loss: 0.057276 	Training Accuracy: 0.983073 	Test Loss: 0.137426


In [13]:
from sklearn.metrics import classification_report

eval_dir = 'food-11/evaluation'
eval_dataset = FoodDataset(eval_dir, transform=transform)
eval_loader = DataLoader(eval_dataset, batch_size=32)

# Step 4: Evaluate the CNN
model.eval()

true_labels = []
predictions = []

with torch.no_grad():
    for images, labels in eval_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy().tolist())
        true_labels.extend(labels.cpu().numpy().tolist())

#The 11 categories are Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit.
target_names = ['Bread', 'Dairy product', 'Dessert', 'Egg', 'Fried food', 'Meat', 'Noodles/Pasta', 'Rice', 'Seafood', 'Soup', 'Vegetable/Fruit']
# Calculate Recall, Precision, and F1-score
print(classification_report(true_labels, predictions, target_names=target_names))

                 precision    recall  f1-score   support

          Bread       0.33      0.32      0.33       368
  Dairy product       0.34      0.22      0.27       148
        Dessert       0.34      0.40      0.37       500
            Egg       0.30      0.33      0.31       335
     Fried food       0.45      0.31      0.37       287
           Meat       0.51      0.48      0.50       432
  Noodles/Pasta       0.36      0.54      0.43       147
           Rice       0.26      0.38      0.31        96
        Seafood       0.50      0.35      0.41       303
           Soup       0.62      0.68      0.65       500
Vegetable/Fruit       0.65      0.64      0.65       231

       accuracy                           0.44      3347
      macro avg       0.42      0.42      0.42      3347
   weighted avg       0.44      0.44      0.44      3347
