In [1]:
# Imports & GPU Setup

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
os.environ["TORCH_CUDA_ARCH_LIST"] = "12.0;8.9;8.6"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.models import mobilenet_v3_small, efficientnet_b0, resnet18
import torchvision.datasets as datasets
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import time
import shutil
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')


torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("="*70)
print(f"üì¶ PyTorch: {torch.__version__}")
print(f"üî• CUDA: {torch.version.cuda}")
print(f"üíé GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
print(f"üìä Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
print(f"üéØ Device: {device}")
print(f"‚úÖ sm_120 Ready: {'12.0' in os.environ.get('TORCH_CUDA_ARCH_LIST', '')}")
print("="*70)



üì¶ PyTorch: 2.9.0+cu130
üî• CUDA: 13.0
üíé GPU: NVIDIA GeForce RTX 4070
üìä Memory: 12.9 GB
üéØ Device: cuda
‚úÖ sm_120 Ready: True


In [2]:
# Dataset Splitter

def split_dataset_once(raw_path=r"D:\FoodNet\Food Classification dataset", split_path=r"D:\FoodNet\data"):
    """Split YOUR dataset into train/val/test (70/15/15)"""
    print("üî™ Splitting FoodNet dataset...")
    os.makedirs(f"{split_path}/train", exist_ok=True)
    os.makedirs(f"{split_path}/val", exist_ok=True)
    os.makedirs(f"{split_path}/test", exist_ok=True)
    
    class_count = 0
    total_images = 0
    
    for class_name in os.listdir(raw_path):
        class_path = os.path.join(raw_path, class_name)
        if not os.path.isdir(class_path):
            continue
            
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        if len(images) == 0:
            continue
            
        class_count += 1
        total_images += len(images)
        
        # 70% train, 15% val, 15% test
        train_imgs, temp_imgs = train_test_split(images, test_size=0.3, random_state=42)
        val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)
        
        # Create class directories
        for split in ['train', 'val', 'test']:
            os.makedirs(f"{split_path}/{split}/{class_name}", exist_ok=True)
        
        # Move files
        for split_imgs, split_name in [(train_imgs, 'train'), (val_imgs, 'val'), (test_imgs, 'test')]:
            for img in split_imgs:
                src = os.path.join(class_path, img)
                dst = f"{split_path}/{split_name}/{class_name}/{img}"
                shutil.move(src, dst)
    
    print(f"‚úÖ FoodNet dataset split complete!")
    print(f"üìÅ {class_count} classes, {total_images} total images")
    print(f"üìÇ New structure: D:\\FoodNet\\data\\train/, val/, test/")

# RUN THIS FIRST (ONLY ONCE)
split_dataset_once()


üî™ Splitting FoodNet dataset...
‚úÖ FoodNet dataset split complete!
üìÅ 0 classes, 0 total images
üìÇ New structure: D:\FoodNet\data\train/, val/, test/


In [3]:
# Data Loaders & Transforms

def get_data_loaders(data_path=r"D:\FoodNet\data", batch_size=16):
    """Standard ImageNet transforms + augmentation"""
    train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # no saturation for grayscale
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229]),
])

    test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229]),
])
    
    # Load splitted datasets
    train_ds = datasets.ImageFolder(f"{data_path}/train", train_transform)
    val_ds = datasets.ImageFolder(f"{data_path}/val", test_transform)
    test_ds = datasets.ImageFolder(f"{data_path}/test", test_transform)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, 
                             num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, 
                           num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, 
                            num_workers=4, pin_memory=True)
    
    print(f"üìä Dataset sizes: Train={len(train_ds)}, Val={len(val_ds)}, Test={len(test_ds)}")
    print(f"üè∑Ô∏è  Classes: {len(train_ds.classes)}")
    
    return train_loader, val_loader, test_loader, train_ds.classes


In [4]:
import torch
import torch.nn as nn
from torchvision.models import (
    mobilenet_v3_small,
    efficientnet_b0,
    resnet18
)

def _convert_first_conv_to_grayscale(conv: nn.Conv2d):
    """
    Convert a Conv2d layer from 3-channel RGB to 1-channel grayscale
    by averaging pretrained weights.
    """
    new_conv = nn.Conv2d(
        in_channels=1,
        out_channels=conv.out_channels,
        kernel_size=conv.kernel_size,
        stride=conv.stride,
        padding=conv.padding,
        dilation=conv.dilation,
        groups=conv.groups,
        bias=(conv.bias is not None),
        padding_mode=conv.padding_mode,
    )

    # Average RGB weights ‚Üí grayscale
    with torch.no_grad():
        new_conv.weight.copy_(conv.weight.mean(dim=1, keepdim=True))
        if conv.bias is not None:
            new_conv.bias.copy_(conv.bias)

    return new_conv


def get_model(model_name, num_classes=35, device="cuda"):
    """
    Model factory:
      - Single-channel input: [B, 1, 128, 128]
      - ImageNet-pretrained backbones
      - MobileNetV3-Small, EfficientNet-B0, ResNet-18
    """
    print(f"üîÑ Loading {model_name} (ImageNet pretrained, grayscale)...")

    if model_name == "mobilenet":
        model = mobilenet_v3_small(weights="IMAGENET1K_V1")
        model.features[0][0] = _convert_first_conv_to_grayscale(
            model.features[0][0]
        )
        model.classifier[3] = nn.Linear(
            model.classifier[3].in_features, num_classes
        )

    elif model_name == "efficientnet":
        model = efficientnet_b0(weights="IMAGENET1K_V1")
        model.features[0][0] = _convert_first_conv_to_grayscale(
            model.features[0][0]
        )
        model.classifier[1] = nn.Linear(
            model.classifier[1].in_features, num_classes
        )

    elif model_name == "resnet":
        model = resnet18(weights="IMAGENET1K_V1")
        model.conv1 = _convert_first_conv_to_grayscale(model.conv1)
        model.fc = nn.Linear(model.fc.in_features, num_classes)

    else:
        raise ValueError("Model must be 'mobilenet', 'efficientnet', or 'resnet'")

    # Print params
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"üìà Params: {total_params/1e6:.2f}M (trainable: {trainable_params/1e6:.2f}M)")

    return model.to(device)


In [5]:
# Training Loop (edited)

import os
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

def train_model(model, train_loader, val_loader, epochs=10, lr=0.001, model_name="model"):
    """GPU OPTIMIZED with AMP + Stability Fixes + Proper Checkpoint Resume"""
    from torch.cuda.amp import autocast, GradScaler

    # ---- device (assumes you already set global device; keep this line if not) ----
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, mode="max", factor=0.1, patience=3)

    scaler = GradScaler(enabled=(device.type == "cuda"))

    best_acc = 0.0
    start_epoch = 0
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}

    # ‚úÖ Use ONE consistent directory (recommended)
    run_dir = Path("runs") / model_name
    run_dir.mkdir(parents=True, exist_ok=True)
    checkpoint_path = run_dir / "best_model.pt"   # runs/<model_name>/best_model.pt
    
 
    # -------------------- LOAD CHECKPOINT --------------------
    if checkpoint_path.exists():
        checkpoint = torch.load(checkpoint_path, map_location=device)

        model.load_state_dict(checkpoint["model_state_dict"])

        # Resume optimizer/scaler/history if present
        if "optimizer_state_dict" in checkpoint:
            optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        if "scaler_state_dict" in checkpoint:
            scaler.load_state_dict(checkpoint["scaler_state_dict"])

        best_acc = float(checkpoint.get("best_val_acc", 0.0))
        history = checkpoint.get("history", history)
        start_epoch = int(checkpoint.get("epoch", -1)) + 1

        print(f"‚úÖ Resumed: {checkpoint_path} | start_epoch={start_epoch} | best_acc={best_acc:.2f}%")
    else:
        print(f"‚ö†Ô∏è No checkpoint found at {checkpoint_path}, starting fresh.")

    # -------------------- TRAIN --------------------
    for epoch in range(start_epoch, epochs):
        # Training phase
        model.train()
        train_loss, train_correct = 0.0, 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)

            # AMP forward
            with autocast(enabled=(device.type == "cuda"), dtype=torch.float16):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            # AMP backward
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += float(loss.item())
            train_correct += (outputs.argmax(dim=1) == labels).sum().item()

        # Validation phase
        model.eval()
        val_loss, val_correct = 0.0, 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)

                with autocast(enabled=(device.type == "cuda"), dtype=torch.float16):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                val_loss += float(loss.item())
                val_correct += (outputs.argmax(dim=1) == labels).sum().item()

        # Metrics
        train_acc = 100.0 * train_correct / len(train_loader.dataset)
        val_acc = 100.0 * val_correct / len(val_loader.dataset)
        train_loss /= max(1, len(train_loader))
        val_loss /= max(1, len(val_loader))

        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)

        print(f"Epoch {epoch+1:2d}/{epochs}: Train={train_acc:6.2f}% | Val={val_acc:6.2f}% | "
              f"Loss T/V={train_loss:.3f}/{val_loss:.3f}")

        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save({
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scaler_state_dict": scaler.state_dict(),
                "best_val_acc": best_acc,
                "history": history
            }, checkpoint_path)
            print(f"üíæ Saved best -> {checkpoint_path} (Val={best_acc:.2f}%)")

        # Scheduler step (mode='max' so use val_acc)
        scheduler.step(val_acc)

    print(f"üèÜ Best Val Accuracy: {best_acc:.2f}%")
    return history


In [6]:
# Evaluation & Matrices

def evaluate_model(model, test_loader, class_names, model_name="model"):
    """Comprehensive evaluation: accuracy, F1, confusion matrix, inference time"""
    model.eval()
    all_preds, all_labels = [], []
    all_probs = []
    
    # Inference time measurement
    start_time = time.time()
    num_images = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            
            probs = torch.softmax(outputs, dim=1)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            
            num_images += inputs.size(0)
    
    inference_time = (time.time() - start_time) / num_images * 1000  # ms per image
    
    # Metrics
    accuracy = 100. * np.mean(np.array(all_preds) == np.array(all_labels))
    
    print(f"\nüìä {model_name.upper()} TEST RESULTS:")
    print(f"   Accuracy: {accuracy:.2f}%")
    print(f"   Inference: {inference_time:.2f}ms/image")
    
    # Save classification report
    report = classification_report(all_labels, all_preds, target_names=class_names, 
                                  output_dict=True)
    pd.DataFrame(report).round(3).to_csv(f"runs/{model_name}/classification_report.csv")
    
    # Confusion Matrix
    plt.figure(figsize=(12, 10))
    cm = confusion_matrix(all_labels, all_preds)
    sns.heatmap(cm, annot=False, cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title(f'{model_name.upper()} Confusion Matrix')
    plt.savefig(f"runs/{model_name}/confusion_matrix.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    return accuracy, inference_time


In [7]:
#  Feature Visualization (t-SNE/PCA)

def evaluate_model(model, test_loader, class_names, model_name="model"):
    
    from torch.cuda.amp import autocast 
    
    model.eval()
    all_preds, all_labels = [], []
    all_probs = []
    
    # Inference time measurement
    start_time = time.time()
    num_images = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            
            with autocast(dtype=torch.float16):
                outputs = model(inputs)
            
            probs = torch.softmax(outputs, dim=1)
            _, predicted = outputs.max(1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            
            num_images += inputs.size(0)
    
    inference_time = (time.time() - start_time) / num_images * 1000  # ms per image
    
    # Metrics
    accuracy = 100. * np.mean(np.array(all_preds) == np.array(all_labels))
    
    print(f"\nüìä {model_name.upper()} TEST RESULTS:")
    print(f"   Accuracy: {accuracy:.2f}%")
    print(f"   Inference: {inference_time:.2f}ms/image")
    
    # Save classification report
    report = classification_report(all_labels, all_preds, target_names=class_names, 
                                  output_dict=True)
    pd.DataFrame(report).round(3).to_csv(f"runs/{model_name}/classification_report.csv")
    
    # Confusion Matrix (RTX memory safe)
    plt.figure(figsize=(12, 10))
    cm = confusion_matrix(all_labels, all_preds)
    sns.heatmap(cm, annot=False, cmap='Blues', 
                xticklabels=class_names[:10] + ['...'] + class_names[-5:],  # Truncated for 35 classes
                yticklabels=class_names[:10] + ['...'] + class_names[-5:])
    plt.title(f'{model_name.upper()} Confusion Matrix (35 Food Classes)')
    plt.savefig(f"runs/{model_name}/confusion_matrix.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    return accuracy, inference_time


In [8]:
# Main Experiment Runner

def run_experiment(model_name, data_path=r"D:\FoodNet\data", epochs=1):
    """Run complete experiment for one model with flexible device."""
    print(f"\n{'='*70}")
    print(f"üöÄ STARTING {model_name.upper()} EXPERIMENT ({epochs} epochs)")
    print(f"{'='*70}")
    
    # Data
    train_loader, val_loader, test_loader, class_names = get_data_loaders(data_path)
    
    # Model
    model = get_model(model_name, len(class_names))  # must use global `device`
    
    # Train
    history = train_model(model, train_loader, val_loader, epochs, model_name=model_name)
    
    # Checkpoint
    checkpoint_path = f"runs/{model_name}/best_model.pt"
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint["model_state_dict"])
        best_val_acc = checkpoint.get("best_val_acc", max(history["val_acc"]))
    else:
        print(f"‚ö†Ô∏è No checkpoint found for {model_name}, using current model")
        best_val_acc = max(history["val_acc"]) if history["val_acc"] else 0
    
    # Evaluate
    accuracy, inference_time = evaluate_model(model, test_loader, class_names, model_name)
    
    
    # Results
    results = {
        "model": model_name,
        "best_val_acc": best_val_acc,
        "test_acc": accuracy,
        "inference_time_ms": inference_time,
        "num_classes": len(class_names),
    }
    pd.DataFrame([results]).to_csv(f"runs/{model_name}/results_summary.csv", index=False)
    
    print(f"‚úÖ {model_name.upper()} COMPLETE! Test Acc: {accuracy:.2f}%")
    return results

print("üéØ FoodNet: Starting 10-epoch experiments...")
print(f"Initial device: {device}")



üéØ FoodNet: Starting 10-epoch experiments...
Initial device: cuda


In [9]:
results = []

for model_name in ["mobilenet", "efficientnet", "resnet"]:
    print(f"\n‚ñ∂ Running {model_name.upper()} on {device}...")
    
    try:
        # Try on current device (likely cuda)
        result = run_experiment(model_name, epochs=20)
        results.append(result)
    
    except RuntimeError as e:
        msg = str(e)
        print(f"‚ùå {model_name.upper()} FAILED on {device}: {msg}")
        
        # Check for CUDA kernel image errors or generic CUDA failures
        if "no kernel image is available" in msg or "CUDA error" in msg:
            print("‚ö†Ô∏è CUDA issue detected. Switching to safe CPU mode for this model...")
            
            # Switch global device to CPU
            device = torch.device("cpu")
            torch.cuda.empty_cache()
            print(f"üîÑ New device: {device}")
            
            # IMPORTANT: rerun with fresh model on CPU
            try:
                result = run_experiment(model_name, epochs=10)
                results.append(result)
            except Exception as e2:
                print(f"‚ùå {model_name.upper()} also failed on CPU: {e2}")
        else:
            print("‚ö†Ô∏è Non-CUDA error, not retrying on CPU for this model.")

# Final comparison
if results:
    comparison_df = pd.DataFrame(results)
    comparison_df.to_csv(r"D:\runs\model_comparison.csv", index=False)

    print("\n" + "="*70)
    print("üèÅ FOODNET 10-EPOCH EXPERIMENTS COMPLETE (with GPU‚ÜíCPU fallback)!")
    print(comparison_df.round(2))
    print("\nüìÅ Results saved: D:\\FoodNet\\runs\\model_comparison.csv")
else:
    print("\n‚ö†Ô∏è No successful runs to summarize.")


‚ñ∂ Running MOBILENET on cuda...

üöÄ STARTING MOBILENET EXPERIMENT (20 epochs)
üìä Dataset sizes: Train=16700, Val=3580, Test=3593
üè∑Ô∏è  Classes: 34
üîÑ Loading mobilenet (ImageNet pretrained, grayscale)...
üìà Params: 1.55M (trainable: 1.55M)
‚úÖ Resumed: runs\mobilenet\best_model.pt | start_epoch=20 | best_acc=69.61%
üèÜ Best Val Accuracy: 69.61%

üìä MOBILENET TEST RESULTS:
   Accuracy: 68.08%
   Inference: 5.64ms/image
‚úÖ MOBILENET COMPLETE! Test Acc: 68.08%

‚ñ∂ Running EFFICIENTNET on cuda...

üöÄ STARTING EFFICIENTNET EXPERIMENT (20 epochs)
üìä Dataset sizes: Train=16700, Val=3580, Test=3593
üè∑Ô∏è  Classes: 34
üîÑ Loading efficientnet (ImageNet pretrained, grayscale)...
üìà Params: 4.05M (trainable: 4.05M)
‚úÖ Resumed: runs\efficientnet\best_model.pt | start_epoch=20 | best_acc=80.50%
üèÜ Best Val Accuracy: 80.50%

üìä EFFICIENTNET TEST RESULTS:
   Accuracy: 77.68%
   Inference: 4.93ms/image
‚úÖ EFFICIENTNET COMPLETE! Test Acc: 77.68%

‚ñ∂ Running RESNET on c