In [1]:
import torch
import logging
import torchvision
import torchvision.transforms as transforms
from torchvision import models, datasets
from torch.utils.data import DataLoader
from torch import nn, optim
import os

Set Up Dataset and DataLoader
We'll set up the dataset with transformations to preprocess the images for MobileNetV3, which typically requires input images of size 224x224 pixels. Additionally, we'll normalize the images with the parameters mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225] as these are standard for models pre-trained on ImageNet.

In [3]:
data_dir = '/Users/bestc/Code Projects/COMO AI/data/processed/Ingredients Images'

transform = transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = datasets.ImageFolder(root=data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

Load and Modify MobileNetV3
We'll load a pre-trained MobileNetV3 model and modify its classifier to match the number of classes in your dataset (73 in your case).

In [4]:
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights

# Load the pre-trained model with the new syntax
weights = MobileNet_V3_Large_Weights.IMAGENET1K_V1
model = mobilenet_v3_large(weights=weights)

# Modify the classifier
model.classifier[3] = nn.Linear(model.classifier[3].in_features, 73)

# Move the model to the device (GPU or CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bi

Define Loss Function and Optimizer
We'll use Cross-Entropy Loss and an Adam optimizer.

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training Loop
Now, we'll write a simple training loop. Note that for brevity, this example doesn't include validation or saving the model. You should add those according to your project needs.

In [6]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

epochs = 10
for epoch in range(epochs):
  running_loss = 0.0
  correct_predictions = 0
  total_predictions = 0

  for inputs, labels in dataloader:
    inputs, labels = inputs.to(device), labels.to(device)

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)
    total_predictions += labels.size(0)
    correct_predictions += (predicted == labels).sum().item()

  epoch_loss = running_loss / len(dataloader)
  epoch_accuracy = (correct_predictions / total_predictions) * 100

  logging.info(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")


