In [1]:
#Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from datasets import load_dataset
from torch import optim
from torch import nn
from torch.utils.data import DataLoader

import torchvision

import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Hyperparameters
input_size = 128*128
learning_rate = 0.001
num_epochs = 10
batch_size = 32

In [3]:
#load datasets
ds = load_dataset("Scuccorese/food-ingredients-dataset", split="train[:2000]")
ingredients = sorted(set(ds["ingredient"]))
ds = ds.train_test_split(test_size=0.2) #split data into training data and validation data
print(ds["train"][0])

{'category': 'vegetables', 'subcategory': 'stem', 'ingredient': 'rhubarb', 'image': <PIL.WebPImagePlugin.WebPImageFile image mode=RGB size=1067x1690 at 0x7FFE470EE900>}


In [4]:
#create dictionary that maps label name to integer
label2id = {name: str(i) for i, name in enumerate(ingredients)}
id2label = {str(i): name for i, name in enumerate(ingredients)}

num_classes = len(ingredients)
print("Number of Classes:", num_classes)

print(id2label["0"])

Number of Classes: 100
adzuki beans


In [5]:
#process image into tensor
transform = transforms.Compose([
    transforms.Resize((128,128)),          # Resize images to 128x128 (this number can be changed for different resolutions)
    transforms.ToTensor(),                 # Convert PIL image to tensor
])

def transform_fn(data):
    image = data["image"].convert("RGB") #make all images RGB to avoid initial channel mismatches
    data["image"] = transform(image)
    data["label"] = int(label2id[data["ingredient"]])
    return data

ds["train"] = ds["train"].map(transform_fn)
ds["test"]  = ds["test"].map(transform_fn)

#convert to pytorch tensor
ds["train"] = ds["train"].with_format("torch", columns=["image", "label"])
ds["test"] = ds["test"].with_format("torch", columns=["image", "label"])

#compute mean and std for normalization
all_images = torch.stack([img for img in ds["train"]["image"]], dim=0)
mean, std = all_images.mean(dim=[0, 2, 3]), all_images.std(dim=[0, 2, 3])
print("mean: ", mean, "standard deviation: ", std)

#normalize dataset
transform_norm = transforms.Compose([
    transforms.Normalize(mean, std)
])

def transform_norm_fn(data):
    data["image"] = transform_norm(data["image"])
    return data

ds["train"] = ds["train"].map(transform_norm_fn)
ds["test"] = ds["test"].map(transform_norm_fn)

#create dataloaders
train_loader = DataLoader(dataset=ds["train"], batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=ds["test"], batch_size=batch_size, shuffle=False)

Map: 100%|██████████| 1600/1600 [01:28<00:00, 18.12 examples/s]
Map: 100%|██████████| 400/400 [00:28<00:00, 14.00 examples/s]


mean:  tensor([0.6131, 0.5764, 0.4677]) standard deviation:  tensor([0.2957, 0.2862, 0.3245])


Map: 100%|██████████| 1600/1600 [00:12<00:00, 123.79 examples/s]
Map: 100%|██████████| 400/400 [00:03<00:00, 122.59 examples/s]


In [9]:
#Simple CNN
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),   
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),   
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*32*32, 128),  # 64 channels, 32x32 feature map
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [10]:
model = SimpleCNN(num_classes=num_classes)

#Send model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#Train the model
#Loop through dataset and update the model's weights
for epoch in range(num_epochs):
    model.train() #switch to train mode
    running_loss = 0.0
    correct = 0
    total = 0  

    print(f"Epoch [{epoch + 1}/{num_epochs}]") #display current epoch

    for batch in train_loader:
        images = batch["image"].to(device)
        labels = batch["label"].to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #measure running loss
        running_loss += loss.item()

        #measure running accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    #print statistics
    epoch_loss = running_loss / len(train_loader) #loss scaled for size of dataset
    epoch_acc = 100 * correct / total

    print(f'[{epoch+1}/{num_epochs}], loss: {epoch_loss:.3f} accuracy: {epoch_acc:.3f}')


print('Finished Training')




Epoch [1/10]
[1/10], loss: 7.756 accuracy: 1.438
Epoch [2/10]
[2/10], loss: 4.594 accuracy: 1.250
Epoch [3/10]
[3/10], loss: 4.591 accuracy: 1.625
Epoch [4/10]
[4/10], loss: 4.563 accuracy: 2.875
Epoch [5/10]
[5/10], loss: 4.528 accuracy: 3.188
Epoch [6/10]
[6/10], loss: 4.506 accuracy: 3.188
Epoch [7/10]
[7/10], loss: 4.477 accuracy: 3.500
Epoch [8/10]
[8/10], loss: 4.461 accuracy: 3.312
Epoch [9/10]
[9/10], loss: 4.430 accuracy: 3.625
Epoch [10/10]
[10/10], loss: 4.408 accuracy: 3.375
Finished Training


In [19]:
#Save trained model
PATH = './food_net.pth'
torch.save(model.state_dict(), PATH)

In [11]:
#Find the accuracy of the network on the validation dataset
correct = 0
total = 0

model.eval()

with torch.no_grad():
    for data in val_loader:
        images = data["image"].to(device)
        labels = data["label"].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the test images: {100 * correct / total} %')

Accuracy of the network on the test images: 2.75 %
