In [1]:
#Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from datasets import load_dataset
from torch import optim
from torch import nn
from torch.utils.data import DataLoader

import torchvision
import torchvision.models as models

import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#Hyperparameters
input_size = 128*128
learning_rate = 0.0001
num_epochs = 10
batch_size = 64

In [3]:
#load datasets
ds = load_dataset("Scuccorese/food-ingredients-dataset", split="train[:2000]") #2000 images pulled 
ingredients = sorted(set(ds["ingredient"]))
ds = ds.train_test_split(test_size=0.2) #split data into training data and validation data
print(ds["train"][0])



{'category': 'legumes', 'subcategory': 'beans', 'ingredient': 'lima beans', 'image': <PIL.Image.Image image mode=RGB size=800x600 at 0x19DA04C0590>}


In [4]:
#create dictionary that maps label name to integer
label2id = {name: str(i) for i, name in enumerate(ingredients)}
id2label = {str(i): name for i, name in enumerate(ingredients)}

num_classes = len(ingredients)
print("Number of Classes:", num_classes)

print(id2label["0"])

Number of Classes: 100
adzuki beans


In [5]:
#process image into tensor
transform = transforms.Compose([
    transforms.Resize((128,128)),          # Resize images to 128x128 (this number can be changed for different resolutions)
    transforms.ToTensor(),                 # Convert PIL image to tensor
])

def transform_fn(data):
    image = data["image"].convert("RGB") #make all images RGB to avoid initial channel mismatches
    data["image"] = transform(image)
    data["label"] = int(label2id[data["ingredient"]])
    return data

ds["train"] = ds["train"].map(transform_fn)
ds["test"]  = ds["test"].map(transform_fn)

#convert to pytorch tensor
ds["train"] = ds["train"].with_format("torch", columns=["image", "label"])
ds["test"] = ds["test"].with_format("torch", columns=["image", "label"])

#compute mean and std for normalization
all_images = torch.stack([img for img in ds["train"]["image"]], dim=0)
mean, std = all_images.mean(dim=[0, 2, 3]), all_images.std(dim=[0, 2, 3])
print("mean: ", mean, "standard deviation: ", std)

#normalize dataset
transform_norm = transforms.Compose([
    transforms.Normalize(mean, std)
])

def transform_norm_fn(data):
    data["image"] = transform_norm(data["image"])
    return data

ds["train"] = ds["train"].map(transform_norm_fn)
ds["test"] = ds["test"].map(transform_norm_fn)

#create dataloaders
train_loader = DataLoader(dataset=ds["train"], batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=ds["test"], batch_size=batch_size, shuffle=False)

Map: 100%|██████████| 1600/1600 [01:41<00:00, 15.73 examples/s]
Map: 100%|██████████| 400/400 [00:36<00:00, 11.00 examples/s]


mean:  tensor([0.6120, 0.5740, 0.4665]) standard deviation:  tensor([0.2957, 0.2867, 0.3240])


Map: 100%|██████████| 1600/1600 [00:15<00:00, 101.21 examples/s]
Map: 100%|██████████| 400/400 [00:04<00:00, 97.00 examples/s] 


In [6]:
model = models.resnet18(weights="IMAGENET1K_V1")
model.fc = nn.Linear(model.fc.in_features, num_classes)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\arb77/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:02<00:00, 19.4MB/s]


In [None]:

patience = 3  # Number of epochs to wait for improvement before stopping
best_loss = float('inf')  # Start with very large loss so first epoch always improves
epochs_without_improvement = 0  # Counter for consecutive non-improving epochs


for epoch in range(num_epochs):

    model.train()  # Set model to training mode (enables dropout, batchnorm updates)

    running_loss = 0.0  # Accumulate loss over the epoch
    correct = 0         # Count correct predictions
    total = 0           # Count total samples processed

    print(f"Epoch [{epoch + 1}/{num_epochs}]")

    for batch in train_loader:

        # Move data to GPU (if available)
        images = batch["image"].to(device)
        labels = batch["label"].to(device)

        # Forward pass: compute predictions
        outputs = model(images)

        # Compute loss between predictions and true labels
        loss = criterion(outputs, labels)

        # Clear previous gradients (PyTorch accumulates gradients by default)
        optimizer.zero_grad()

        # Backward pass: compute gradients
        loss.backward()

        # Update model weights
        optimizer.step()

        # Add this batch's loss to running total
        running_loss += loss.item()

        # Get predicted class (index of max logit)
        _, predicted = torch.max(outputs, 1)

        # Update total sample count
        total += labels.size(0)

        # Count correct predictions
        correct += (predicted == labels).sum().item()

    # Compute average loss for the epoch
    epoch_loss = running_loss / len(train_loader)

    # Compute accuracy percentage
    epoch_acc = 100 * correct / total

    print(f'Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.2f}%')

    # If loss improved, reset counter
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        epochs_without_improvement = 0
    else:
        # If loss did not improve, increment counter
        epochs_without_improvement += 1

    # If model has not improved for 'patience' epochs, stop training
    if epochs_without_improvement >= patience:
        print("Early stopping triggered.")
        break


print("Finished Training")

Epoch [1/10]
Loss: 0.167 | Accuracy: 94.06%
Epoch [2/10]
Loss: 0.136 | Accuracy: 95.19%
Epoch [3/10]
Loss: 0.124 | Accuracy: 94.88%
Epoch [4/10]
Loss: 0.097 | Accuracy: 96.00%
Epoch [5/10]
Loss: 0.082 | Accuracy: 96.06%
Epoch [6/10]
Loss: 0.087 | Accuracy: 95.62%
Epoch [7/10]
Loss: 0.094 | Accuracy: 95.12%
Epoch [8/10]
Loss: 0.083 | Accuracy: 95.69%
Early stopping triggered.
Finished Training


In [12]:
#Save trained model
PATH = './food_net.pth'
torch.save(model.state_dict(), PATH)

In [13]:
#Find the accuracy of the network on the validation dataset
correct = 0
total = 0

model.eval()

with torch.no_grad():
    for data in val_loader:
        images = data["image"].to(device)
        labels = data["label"].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the test images: {100 * correct / total} %')

Accuracy of the network on the test images: 43.25 %
