In [60]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt

Setup Custom Dataset Class (Normal):

In [63]:
class FoodDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform
        self.classes = sorted(set([os.path.dirname(fp).split('/')[-1] for fp in file_paths]))

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        image = Image.open(file_path).convert("RGB")
        #image = plt.imread(file_path)
        label = self.classes.index(os.path.dirname(file_path).split('/')[-1])

        if self.transform:
            image = self.transform(image)
        return image, label

Load Training and Testing Splits:

In [65]:
def load_split(file_path, dataset_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()
    return [os.path.join(dataset_path, line.strip() + ".jpg") for line in lines]

# Paths
dataset_path = "food-101/images"
meta_path = "food-101/meta"

train_files = load_split(os.path.join(meta_path, "train.txt"), dataset_path)
test_files = load_split(os.path.join(meta_path, "test.txt"), dataset_path)

Transformations:

In [66]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Data Loaders:

In [67]:
train_dataset = FoodDataset(train_files, transform=train_transform)
test_dataset = FoodDataset(test_files, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Defining the Model:

In [68]:
# Load Pretrained ResNet
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.resnet18(pretrained=True)
num_classes = len(train_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

Loss and Optimizer:

In [69]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training Loop:

In [70]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for images, labels in tqdm(dataloader, desc="Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

Evaluation Loop:

In [71]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return total_loss / len(dataloader), accuracy

Main Script (with Chckpoints):

In [72]:
checkpoint_dir = 'checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

def save_checkpoint(epoch, model, optimizer, loss, accuracy, filename="checkpoint.pth"):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
        'accuracy': accuracy
    }
    checkpoint_path = os.path.join(checkpoint_dir, filename)
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")

def load_checkpoint(filename="checkpoint.pth"):
    checkpoint_path = os.path.join(checkpoint_dir, filename)
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        print(f"Checkpoint loaded from {checkpoint_path}")
        return checkpoint
    else:
        print("No checkpoint found. Starting from scratch.")
        return None

# Modify the training loop to integrate checkpointing
num_epochs = 10
best_accuracy = 0.0

# Check if there is a saved checkpoint to resume from
checkpoint = load_checkpoint()

if checkpoint:
    # Load the model and optimizer states from checkpoint
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1  # Resume from the next epoch
    best_accuracy = checkpoint['accuracy']  # Retain best accuracy
else:
    start_epoch = 0

for epoch in range(start_epoch, num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_accuracy = evaluate(model, test_loader, criterion, device)

    print(f"Train Loss: {train_loss:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    # Save the best model
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        save_checkpoint(epoch, model, optimizer, val_loss, val_accuracy, filename="best_model.pth")

    # Save a checkpoint periodically (e.g., after every epoch)
    save_checkpoint(epoch, model, optimizer, val_loss, val_accuracy, filename=f"checkpoint_epoch_{epoch+1}.pth")

print("Training Complete.")

No checkpoint found. Starting from scratch.
Epoch 1/10


Training: 100%|██████████| 2368/2368 [2:18:39<00:00,  3.51s/it]  
Evaluating: 100%|██████████| 790/790 [15:17<00:00,  1.16s/it]


Train Loss: 2.6589
Validation Loss: 2.0902, Accuracy: 0.4764
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_1.pth
Epoch 2/10


Training: 100%|██████████| 2368/2368 [2:02:00<00:00,  3.09s/it]  
Evaluating: 100%|██████████| 790/790 [15:56<00:00,  1.21s/it]


Train Loss: 1.8540
Validation Loss: 1.5668, Accuracy: 0.5811
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_2.pth
Epoch 3/10


Training: 100%|██████████| 2368/2368 [2:05:11<00:00,  3.17s/it]  
Evaluating: 100%|██████████| 790/790 [15:52<00:00,  1.21s/it]


Train Loss: 1.5223
Validation Loss: 1.4937, Accuracy: 0.6128
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_3.pth
Epoch 4/10


Training: 100%|██████████| 2368/2368 [1:52:19<00:00,  2.85s/it]
Evaluating: 100%|██████████| 790/790 [13:07<00:00,  1.00it/s]


Train Loss: 1.2923
Validation Loss: 1.2698, Accuracy: 0.6604
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_4.pth
Epoch 5/10


Training: 100%|██████████| 2368/2368 [1:37:02<00:00,  2.46s/it]
Evaluating: 100%|██████████| 790/790 [12:53<00:00,  1.02it/s]


Train Loss: 1.0916
Validation Loss: 1.3141, Accuracy: 0.6565
Checkpoint saved at checkpoints/checkpoint_epoch_5.pth
Epoch 6/10


Training: 100%|██████████| 2368/2368 [1:36:22<00:00,  2.44s/it]
Evaluating: 100%|██████████| 790/790 [13:01<00:00,  1.01it/s]


Train Loss: 0.9286
Validation Loss: 1.2713, Accuracy: 0.6710
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_6.pth
Epoch 7/10


Training: 100%|██████████| 2368/2368 [1:35:41<00:00,  2.42s/it]
Evaluating: 100%|██████████| 790/790 [12:39<00:00,  1.04it/s]


Train Loss: 0.7856
Validation Loss: 1.2670, Accuracy: 0.6831
Checkpoint saved at checkpoints/best_model.pth
Checkpoint saved at checkpoints/checkpoint_epoch_7.pth
Epoch 8/10


Training: 100%|██████████| 2368/2368 [1:31:53<00:00,  2.33s/it]
Evaluating: 100%|██████████| 790/790 [11:22<00:00,  1.16it/s]


Train Loss: 0.6590
Validation Loss: 1.3343, Accuracy: 0.6751
Checkpoint saved at checkpoints/checkpoint_epoch_8.pth
Epoch 9/10


Training: 100%|██████████| 2368/2368 [1:22:07<00:00,  2.08s/it]
Evaluating: 100%|██████████| 790/790 [11:17<00:00,  1.17it/s]


Train Loss: 0.5619
Validation Loss: 1.3852, Accuracy: 0.6724
Checkpoint saved at checkpoints/checkpoint_epoch_9.pth
Epoch 10/10


Training: 100%|██████████| 2368/2368 [1:22:44<00:00,  2.10s/it]
Evaluating: 100%|██████████| 790/790 [11:25<00:00,  1.15it/s]


Train Loss: 0.4756
Validation Loss: 1.4353, Accuracy: 0.6794
Checkpoint saved at checkpoints/checkpoint_epoch_10.pth
Training Complete.
