# Convolutional Neural Networks

In this task you are suppose to implement a convolutional neural network using a high level library, e.g. PyTorch.
The classification should be about food.
* Download the food 11 dataset https://mmspg.epfl.ch/food-image-datasets or https://www.kaggle.com/vermaavi/food11/data
* Predict the 11 classes: Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit
* Try some standard networks convolutional networks before more complex ones.
* Hint: Start with a subset of the dataset
* Choose the network architecture with care.
* Train and validate all algorithms.
* Make the necessary assumptions.

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset
from PIL import Image
import os
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np


# Step 1: Load and preprocess the dataset
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


class FoodDataset(Dataset):
   def __init__(self, root_dir, transform=None):
       self.root_dir = root_dir
       self.transform = transform
       self.file_list = os.listdir(root_dir)

   def __len__(self):
       return len(self.file_list)

   def __getitem__(self, idx):
       img_name = os.path.join(self.root_dir, self.file_list[idx])
       image = Image.open(img_name)
       if self.transform:
           image = self.transform(image)
       label = int(self.file_list[idx].split("_")[0])
       return image, label


train_dir = 'food-11/training'
val_dir = 'food-11/validation'

train_dataset = FoodDataset(train_dir, transform=transform)
val_dataset = FoodDataset(val_dir, transform=transform)

# Calculate the number of samples to use for training (10%)
n_train = len(train_dataset)
indices = list(range(n_train))
np.random.shuffle(indices)
split = int(np.floor(0.1 * n_train))

# Get the random indices for the subset
train_idx = indices[:split]

# Create a sampler
train_sampler = SubsetRandomSampler(train_idx)


#train_loader = DataLoader(train_dataset, batch_size=32, sampler=train_sampler)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [25]:
# Step 2: Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(64 * 30 * 30, 128)  # instead of nn.Linear(64 * 15 * 15, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 11)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

# Step 3: Train the CNN
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_correct = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)
        train_loss += loss.item() * images.size(0)
        train_correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_dataset)
    train_accuracy = train_correct / len(train_dataset)

    model.eval()
    test_loss = 0
    test_correct = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            _, predicted = torch.max(outputs.data, 1)
            test_loss += loss
            
    print('Epoch: {} \tTraining Loss: {:.6f} \tTraining Accuracy: {:.6f} \tTest Loss: {:.6f}'.format(
        epoch + 1, train_loss, train_accuracy, test_loss / len(val_dataset)))

Epoch: 1 	Training Loss: 2.096089 	Training Accuracy: 0.258565 	Test Loss: 0.059631
Epoch: 2 	Training Loss: 1.760275 	Training Accuracy: 0.378877 	Test Loss: 0.054637
Epoch: 3 	Training Loss: 1.516161 	Training Accuracy: 0.469795 	Test Loss: 0.052070
Epoch: 4 	Training Loss: 1.246069 	Training Accuracy: 0.567200 	Test Loss: 0.053496
Epoch: 5 	Training Loss: 0.908189 	Training Accuracy: 0.693594 	Test Loss: 0.059498
Epoch: 6 	Training Loss: 0.554893 	Training Accuracy: 0.820495 	Test Loss: 0.075193
Epoch: 7 	Training Loss: 0.276336 	Training Accuracy: 0.916785 	Test Loss: 0.090224
Epoch: 8 	Training Loss: 0.134404 	Training Accuracy: 0.962599 	Test Loss: 0.110442
Epoch: 9 	Training Loss: 0.071679 	Training Accuracy: 0.981350 	Test Loss: 0.123245
Epoch: 10 	Training Loss: 0.034728 	Training Accuracy: 0.992398 	Test Loss: 0.142721
Epoch: 11 	Training Loss: 0.067772 	Training Accuracy: 0.980032 	Test Loss: 0.149828


KeyboardInterrupt: 

In [26]:
from sklearn.metrics import classification_report

eval_dir = 'food-11/evaluation'
eval_dataset = FoodDataset(eval_dir, transform=transform)
eval_loader = DataLoader(eval_dataset, batch_size=32)

# Step 4: Evaluate the CNN
model.eval()

true_labels = []
predictions = []

with torch.no_grad():
    for images, labels in eval_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy().tolist())
        true_labels.extend(labels.cpu().numpy().tolist())

#The 11 categories are Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit.
target_names = ['Bread', 'Dairy product', 'Dessert', 'Egg', 'Fried food', 'Meat', 'Noodles/Pasta', 'Rice', 'Seafood', 'Soup', 'Vegetable/Fruit']
# Calculate Recall, Precision, and F1-score
print(classification_report(true_labels, predictions, target_names=target_names))

                 precision    recall  f1-score   support

          Bread       0.33      0.32      0.32       368
  Dairy product       0.32      0.21      0.25       148
        Dessert       0.35      0.38      0.36       500
            Egg       0.34      0.27      0.30       335
     Fried food       0.42      0.41      0.41       287
           Meat       0.48      0.53      0.50       432
  Noodles/Pasta       0.51      0.33      0.40       147
           Rice       0.41      0.23      0.29        96
        Seafood       0.40      0.50      0.44       303
           Soup       0.59      0.70      0.64       500
Vegetable/Fruit       0.76      0.62      0.68       231

       accuracy                           0.45      3347
      macro avg       0.44      0.41      0.42      3347
   weighted avg       0.44      0.45      0.44      3347
