In [2]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

Setup Custom Dataset Class:

In [3]:
class FoodDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform
        self.classes = sorted(set([os.path.dirname(fp).split('/')[-1] for fp in file_paths]))

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = Image.open(file_path).convert("RGB")
        label = self.classes.index(os.path.dirname(file_path).split('/')[-1])

        if self.transform:
            image = self.transform(image)
        return image, label

Load Training and Testing Splits:

In [4]:
def load_split(file_path, dataset_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()
    return [os.path.join(dataset_path, line.strip() + ".jpg") for line in lines]

# Paths
dataset_path = "food-101/food-101/images"
meta_path = "food-101/food-101/meta"

train_files = load_split(os.path.join(meta_path, "train.txt"), dataset_path)
test_files = load_split(os.path.join(meta_path, "test.txt"), dataset_path)

Transformations:

In [5]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Data Loaders:

In [6]:
train_dataset = FoodDataset(train_files, transform=train_transform)
test_dataset = FoodDataset(test_files, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Defining the Model:

In [7]:
# Load Pretrained ResNet
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.resnet18(pretrained=True)
num_classes = len(train_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)



Loss and Optimizer:

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training Loop:

In [9]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for images, labels in tqdm(dataloader, desc="Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

Evaluation Loop:

In [10]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return total_loss / len(dataloader), accuracy

Main Script:

In [11]:
num_epochs = 10
best_accuracy = 0.0

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_accuracy = evaluate(model, test_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    # Save the best model
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_model_info = {
            'epoch': epoch + 1,  # Save the current epoch (1-based index)
            'model_state_dict': model.state_dict(),
            'accuracy': val_accuracy
        }
        torch.save(best_model_info, "best_model.pth")
        print(f"Best model saved at epoch {epoch + 1} with accuracy: {val_accuracy:.4f}")
    

print("Training Complete.")

Epoch 1/10


Training: 100%|██████████| 2368/2368 [2:41:32<00:00,  4.09s/it]  
Evaluating: 100%|██████████| 790/790 [32:01<00:00,  2.43s/it]


Train Loss: 2.6837
Validation Loss: 1.9606, Accuracy: 0.4939
Model saved!
Epoch 2/10


Training:  25%|██▌       | 595/2368 [35:17<1:45:10,  3.56s/it]


UnidentifiedImageError: cannot identify image file 'food-101/food-101/images/baby_back_ribs/3485878.jpg'

Load the best model for inference. Classify a sample image:

In [None]:
#DISREGARD THIS CODE!!!!!


# Load best model
model.load_state_dict(torch.load("best_model.pth"))

# Perform inference
model.eval()
sample_image_path = "path/to/sample.jpg"
image = Image.open(sample_image_path).convert("RGB")
image = test_transform(image).unsqueeze(0).to(device)

with torch.no_grad():
    outputs = model(image)
    _, predicted = torch.max(outputs, 1)

print(f"Predicted Class: {train_dataset.classes[predicted.item()]}")