In [2]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")


CUDA Available: True
GPU Name: NVIDIA GeForce RTX 2060


In [3]:
pip install torch torchvision scikit-learn umap-learn hdbscan numpy tqdm Pillow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import f1_score
from PIL import Image
import umap
import hdbscan
from tqdm import tqdm
import warnings
import time
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score


# --- Configuration ---
folders = ["kaggle_train/food-101/apple_pie", "kaggle_train/food-101/beef_carpaccio", "kaggle_train/food-101/beef_tartare", "kaggle_train/food-101/caesar_salad", "kaggle_train/food-101/caprese_salad", "kaggle_train/food-101/carrot_cake", "kaggle_train/food-101/cheesecake", "kaggle_train/food-101/club_sandwich", "kaggle_train/food-101/creme_brulee", "kaggle_train/food-101/croque_madame", "kaggle_train/food-101/cup_cakes", "kaggle_train/food-101/donuts", "kaggle_train/food-101/escargots", "kaggle_train/food-101/hamburger", "kaggle_train/food-101/hot_and_sour_soup", "kaggle_train/food-101/hummus", "kaggle_train/food-101/miso_soup", "kaggle_train/food-101/oysters", "kaggle_train/food-101/paella", "kaggle_train/food-101/pho", "kaggle_train/food-101/pork_chop", "kaggle_train/food-101/ramen", "kaggle_train/food-101/samosa", "kaggle_train/food-101/sashimi", "kaggle_train/food-101/shrimp_and_grits", "kaggle_train/food-101/spaghetti_bolognese", "kaggle_train/food-101/strawberry_shortcake", "kaggle_train/food-101/tacos", "kaggle_train/food-101/takoyaki", "kaggle_train/food-101/tiramisu"]
num_classes = len(folders)
img_size = 224 # EfficientNet-B0 input size
batch_size = 16 # Adjust based on your CPU RAM. Lower if you run out of memory.
learning_rate = 0.001
num_epochs = 10   # *** CRITICAL: Fine-tuning on CPU is VERY SLOW. Start with few epochs (1-3). ***
test_split_ratio = 0.2
random_state = 782 # for reproducibility

# --- Device Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cpu':
    warnings.warn("GPU not available, running on CPU. Fine-tuning and prediction will be slower.")


# --- 1. Data Loading and Preparation ---

def get_image_paths_and_labels(folder_list):
    image_paths = []
    labels = []
    label_map = {folder_path: i for i, folder_path in enumerate(folder_list)}
    for i, folder_path in enumerate(folder_list):
        if not os.path.isdir(folder_path):
             warnings.warn(f"Folder not found: {folder_path}. Skipping.")
             continue
        print(f"Loading images from: {os.path.basename(folder_path)} (Class {i})")
        try:
            for filename in tqdm(os.listdir(folder_path)):
                if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                    image_paths.append(os.path.join(folder_path, filename))
                    labels.append(i)
        except Exception as e:
            print(f"Error reading folder {folder_path}: {e}")
    print(f"Found {len(image_paths)} images across {len(set(labels))} classes.")
    return image_paths, labels, label_map

all_image_paths, all_labels, label_map = get_image_paths_and_labels(folders)

if not all_image_paths:
    raise ValueError("No images found. Please check your 'folders' paths.")

# Split data
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_image_paths,
    all_labels,
    test_size=test_split_ratio,
    random_state=random_state,
    stratify=all_labels # Ensure proportional representation of classes
)

print(f"Training samples: {len(train_paths)}, Testing samples: {len(test_paths)}")

# Define transformations
# EfficientNet expects specific normalization
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize,
])

test_transform = transforms.Compose([
    transforms.Resize(256), # Resize slightly larger first
    transforms.CenterCrop(img_size),
    transforms.ToTensor(),
    normalize,
])

# Custom Dataset
class FoodDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        try:
            # Ensure image is loaded in RGB format
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
        except Exception as e:
            print(f"Warning: Error loading image {img_path}: {e}. Returning dummy data.")
            # Return dummy data of the correct shape to avoid crashing the DataLoader
            image = torch.zeros((3, img_size, img_size))
            label = -1 # Indicate an error
        return image, label

train_dataset = FoodDataset(train_paths, train_labels, transform=train_transform)
test_dataset = FoodDataset(test_paths, test_labels, transform=test_transform)

# Filter out potential dummy data introduced by loading errors before creating DataLoader
train_dataset.image_paths = [p for i, p in enumerate(train_dataset.image_paths) if train_dataset.labels[i] != -1]
train_dataset.labels = [l for l in train_dataset.labels if l != -1]
test_dataset.image_paths = [p for i, p in enumerate(test_dataset.image_paths) if test_dataset.labels[i] != -1]
test_dataset.labels = [l for l in test_dataset.labels if l != -1]
# Need to update the actual test_labels list used later for evaluation
test_labels = test_dataset.labels # Update test_labels after filtering

print(f"Filtered Training samples: {len(train_dataset)}, Filtered Testing samples: {len(test_dataset)}")

num_workers = 4 if device.type == 'cuda' else 0
print(f"Using {num_workers} workers for DataLoaders.")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# --- 2. Model Selection and Modification ---

print("Loading pre-trained EfficientNet-B0 model...")
# Choose EfficientNet-B0: Smallest version, best for CPU
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)

# Freeze all parameters initially
for param in model.parameters():
    param.requires_grad = False

# Identify parameters to unfreeze (last layers)
# Unfreeze the final classifier layer and the last convolutional block (features[-1])
num_ftrs = model.classifier[1].in_features # Get input features of the original classifier
layers_to_unfreeze = [model.classifier]
if hasattr(model, 'features') and len(model.features) > 0:
     layers_to_unfreeze.append(model.features[-1]) # Unfreeze last feature block

print("Unfreezing layers:")
for layer in layers_to_unfreeze:
     print(f"  - {layer.__class__.__name__}")
     for param in layer.parameters():
          param.requires_grad = True

# Replace the final classifier layer for our 30 classes
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True), # Standard dropout for EfficientNet
    nn.Linear(num_ftrs, num_classes)
)
# Ensure the *new* classifier parameters require gradients
for param in model.classifier.parameters():
     param.requires_grad = True

model = model.to(device)

# Count trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")






  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda
Loading images from: apple_pie (Class 0)


100%|██████████| 700/700 [00:00<00:00, 233591.60it/s]


Loading images from: beef_carpaccio (Class 1)


100%|██████████| 700/700 [00:00<00:00, 139990.12it/s]


Loading images from: beef_tartare (Class 2)


100%|██████████| 700/700 [00:00<00:00, 174908.42it/s]


Loading images from: caesar_salad (Class 3)


100%|██████████| 700/700 [00:00<00:00, 174627.54it/s]


Loading images from: caprese_salad (Class 4)


100%|██████████| 700/700 [00:00<00:00, 176136.11it/s]


Loading images from: carrot_cake (Class 5)


100%|██████████| 700/700 [00:00<00:00, 175368.10it/s]


Loading images from: cheesecake (Class 6)


100%|██████████| 700/700 [00:00<00:00, 139950.08it/s]


Loading images from: club_sandwich (Class 7)


100%|██████████| 700/700 [00:00<00:00, 175117.07it/s]


Loading images from: creme_brulee (Class 8)


100%|██████████| 700/700 [00:00<00:00, 236889.85it/s]


Loading images from: croque_madame (Class 9)


100%|██████████| 700/700 [00:00<00:00, 152758.21it/s]


Loading images from: cup_cakes (Class 10)


100%|██████████| 700/700 [00:00<00:00, 70011.75it/s]


Loading images from: donuts (Class 11)


100%|██████████| 700/700 [00:00<00:00, 77779.29it/s]


Loading images from: escargots (Class 12)


100%|██████████| 700/700 [00:00<00:00, 140230.83it/s]


Loading images from: hamburger (Class 13)


100%|██████████| 700/700 [00:00<00:00, 174440.78it/s]


Loading images from: hot_and_sour_soup (Class 14)


100%|██████████| 700/700 [00:00<00:00, 173862.31it/s]


Loading images from: hummus (Class 15)


100%|██████████| 700/700 [00:00<00:00, 350652.43it/s]


Loading images from: miso_soup (Class 16)


100%|██████████| 700/700 [00:00<00:00, 233498.71it/s]


Loading images from: oysters (Class 17)


100%|██████████| 700/700 [00:00<00:00, 231565.01it/s]


Loading images from: paella (Class 18)


100%|██████████| 700/700 [00:00<00:00, 174960.54it/s]


Loading images from: pho (Class 19)


100%|██████████| 700/700 [00:00<00:00, 175179.76it/s]


Loading images from: pork_chop (Class 20)


100%|██████████| 700/700 [00:00<00:00, 174752.26it/s]


Loading images from: ramen (Class 21)


100%|██████████| 700/700 [00:00<00:00, 139990.12it/s]


Loading images from: samosa (Class 22)


100%|██████████| 700/700 [00:00<00:00, 174970.97it/s]


Loading images from: sashimi (Class 23)


100%|██████████| 700/700 [00:00<00:00, 140010.15it/s]


Loading images from: shrimp_and_grits (Class 24)


100%|██████████| 700/700 [00:00<00:00, 233461.58it/s]


Loading images from: spaghetti_bolognese (Class 25)


100%|██████████| 700/700 [00:00<00:00, 174627.54it/s]


Loading images from: strawberry_shortcake (Class 26)


100%|██████████| 700/700 [00:00<00:00, 174991.82it/s]


Loading images from: tacos (Class 27)


100%|██████████| 700/700 [00:00<00:00, 174970.97it/s]


Loading images from: takoyaki (Class 28)


100%|██████████| 700/700 [00:00<00:00, 175263.42it/s]


Loading images from: tiramisu (Class 29)


100%|██████████| 700/700 [00:00<00:00, 175096.18it/s]


Found 21000 images across 30 classes.
Training samples: 16800, Testing samples: 4200
Filtered Training samples: 16800, Filtered Testing samples: 4200
Using 4 workers for DataLoaders.
Loading pre-trained EfficientNet-B0 model...
Unfreezing layers:
  - Sequential
  - Conv2dNormActivation
Total parameters: 4,045,978
Trainable parameters: 450,590


In [None]:

# --- 3. Fine-tuning ---

criterion = nn.CrossEntropyLoss().to(device)
print(f"Model on: {next(model.parameters()).device}")  # Should print 'cuda'

# Filter parameters for the optimizer to only include those that require gradients
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)

print(f"\nStarting fine-tuning for {num_epochs} epochs on {device}...")

start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    scaler = torch.amp.GradScaler('cuda')
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)

        # Skip batches with errors (label == -1)
        valid_indices = labels != -1
        if not torch.any(valid_indices):
            continue
        inputs = inputs[valid_indices]
        labels = labels[valid_indices]

        optimizer.zero_grad()
        with torch.amp.autocast('cuda'):  # Enables mixed precision
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()
        progress_bar.set_postfix(loss=loss.item())


    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions / total_samples
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")

end_time = time.time()
print(f"Fine-tuning finished in {(end_time - start_time)/60:.2f} minutes.")

# --- 4. Feature Extraction ---

print("\nExtracting features from the fine-tuned model using test data...")

# We want features before the final classification layer.
# Modify the model to output features from the layer before the classifier.
# This is typically the output of the AdaptiveAvgPool layer in EfficientNet.
feature_extractor = nn.Sequential(*list(model.children())[:-1]) # Select all layers except the last one (classifier)
feature_extractor.to(device)
feature_extractor.eval() # Set to evaluation mode

all_features = []
all_test_labels = [] # Store labels corresponding to extracted features

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Extracting Features"):
        inputs, labels = inputs.to(device), labels.to(device)

        # Skip batches with errors (label == -1)
        valid_indices = labels != -1
        if not torch.any(valid_indices):
            continue
        inputs = inputs[valid_indices]
        labels = labels[valid_indices]

        features = feature_extractor(inputs)
        # The output of avgpool might need flattening
        features = torch.flatten(features, 1)
        all_features.append(features.cpu().numpy())
        all_test_labels.append(labels.cpu().numpy())

# Concatenate features and labels from all batches
features_np = np.concatenate(all_features, axis=0)
# Make sure the labels used here correspond exactly to the extracted features
true_labels_np = np.concatenate(all_test_labels, axis=0)

print(f"Extracted features shape: {features_np.shape}")
print(f"Corresponding true labels shape: {true_labels_np.shape}")
# Check if shapes match the filtered test dataset size
if features_np.shape[0] != len(test_dataset):
     warnings.warn(f"Mismatch in feature count ({features_np.shape[0]}) and filtered test set size ({len(test_dataset)}). Check for batch drop issues.")

Model on: cuda:0

Starting fine-tuning for 10 epochs on cuda...


Epoch 1/10:   0%|          | 0/1050 [00:00<?, ?it/s]

In [None]:
print("\nPerforming classification prediction on test data...")
model.eval() # Set model to evaluation mode

all_preds = []
all_true = []

with torch.no_grad(): # Disable gradient calculations for inference
    for inputs, labels in tqdm(test_loader, desc="Predicting"):
        # ***** Move data to GPU *****
        inputs, labels = inputs.to(device), labels.to(device)
        # ***************************

        # Skip batches with errors (label == -1)
        valid_indices = labels != -1
        if not torch.any(valid_indices):
            continue
        inputs = inputs[valid_indices]
        labels = labels[valid_indices]

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)

        # Move predictions and labels back to CPU for collection
        all_preds.append(predicted.cpu().numpy())
        all_true.append(labels.cpu().numpy())

# Concatenate all predictions and true labels
predictions_np = np.concatenate(all_preds)
true_labels_np = np.concatenate(all_true) # Use the labels collected directly from the test loader loop

# Ensure the collected true labels match the filtered dataset labels length
if len(true_labels_np) != len(test_labels):
     warnings.warn(f"Mismatch in collected true labels ({len(true_labels_np)}) and filtered test set size ({len(test_labels)}). Check data loading/filtering.")
     # If a mismatch occurs, using actual_test_labels_list might be safer if prediction loop didn't skip batches
     if len(predictions_np) == len(test_labels):
         print("Adjusting true labels to match filtered test set size for F1 score.")
         true_labels_np = np.array(test_labels)
     else:
         raise ValueError("Cannot resolve label mismatch for F1 score calculation.")


# Calculate F1 score
# 'weighted' averages the F1 score for each class, weighted by support (number of true instances per class)
# This is generally recommended for multi-class classification, especially with potential imbalance.
f1_weighted = f1_score(true_labels_np, predictions_np, average='weighted')
f1_macro = f1_score(true_labels_np, predictions_np, average='macro') # Unweighted average

print("\n--- Classification Results ---")
print(f"Weighted F1 Score on Test Set: {f1_weighted:.4f}")
print(f"Macro F1 Score on Test Set: {f1_macro:.4f}") # Macro gives equal weight to each class
