In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import os
import torchvision


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
np.random.seed(42)

In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kmader/food41")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/food41


In [7]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Load dataset WITHOUT transform first
dataset_path = os.path.join(path, 'images')
full_dataset = datasets.ImageFolder(root=dataset_path, transform=None)  # NO TRANSFORM!

# Split indices
total_size = len(full_dataset)
train_size = int(0.75 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

# Create index splits
np.random.seed(42)
indices = np.random.permutation(total_size)
train_indices = indices[:train_size]
val_indices = indices[train_size:train_size+val_size]
test_indices = indices[train_size+val_size:]

# Create proper datasets with transforms
class TransformedSubset(torch.utils.data.Dataset):
    def __init__(self, dataset, indices, transform=None):
        self.dataset = dataset
        self.indices = indices
        self.transform = transform
    
    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, idx):
        image, label = self.dataset[self.indices[idx]]
        if self.transform:
            image = self.transform(image)
        return image, label

# Create datasets
train_dataset = TransformedSubset(full_dataset, train_indices, train_transforms)
val_dataset = TransformedSubset(full_dataset, val_indices, test_transforms)
test_dataset = TransformedSubset(full_dataset, test_indices, test_transforms)

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"Dataset Loaded Successfully!")
print(f"Train: {train_size:,} | Val: {val_size:,} | Test: {test_size:,}")


Dataset Loaded Successfully!
Train: 75,750 | Val: 15,150 | Test: 10,100


In [None]:
# Class names

class_names = full_dataset.classes #classes is a keyword
num_classes = len(class_names)
print(f"Total Classes: {num_classes}")
print(f"Example Classes: {class_names[:10]}")


# Analyze class distribution
targets = np.array(full_dataset.targets)
unique, counts = np.unique(targets, return_counts=True)
class_counts = pd.DataFrame({'Class': unique, 'Count': counts})
class_counts['Label'] = [class_names[i] for i in unique]

print(class_counts)

In [None]:
import torchvision
def show_batch(loader, n_images=25):
    dataiter = iter(loader)
    images, labels = next(dataiter)
    grid = torchvision.utils.make_grid(images[:n_images], nrow=5, normalize=True)
    npimg = grid.numpy().transpose((1, 2, 0))
    plt.figure(figsize=(10, 10))
    plt.imshow(npimg)
    plt.axis('off')
    plt.title("Sample Images from Food-101 (Train Split)")
    plt.savefig("sample_images_grid.png", dpi=300)
    plt.close()

show_batch(train_loader)
print("✓ Saved sample image grid to 'sample_images_grid.png'")


In [None]:
# Check one batch
images, labels = next(iter(train_loader))
print("Batch shape:", images.shape)
print("Labels shape:", labels.shape)

# Show few sample images
def imshow(img):
    img = img * torch.tensor(std).view(3,1,1) + torch.tensor(mean).view(3,1,1)  # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')

plt.figure(figsize=(10, 5))
imshow(torchvision.utils.make_grid(images[:8]))
plt.show()


 # **MODELLING**

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # Block 1: 224 -> 112
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),  
        )
        self.maxpool1 = nn.MaxPool2d(2, 2)
        
        # Block 2: 112 -> 56
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.maxpool2 = nn.MaxPool2d(2, 2)
        
        # Block 3: 56 -> 28
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
        )
        self.maxpool3 = nn.MaxPool2d(2, 2)
        
        # Block 4: 28 -> 14
        self.conv_block4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        self.maxpool4 = nn.MaxPool2d(2, 2)
        
        # Block 5: 14 -> 7
        self.conv_block5 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )
        self.maxpool5 = nn.MaxPool2d(2, 2)
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(512 * 7 * 7, 2048),
            nn.ReLU(),
            nn.BatchNorm1d(2048),
            nn.Dropout(0.5),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),
            nn.Linear(1024, 101)
        )
    
    def forward(self, x):
        x = self.maxpool1(self.conv_block1(x))
        x = self.maxpool2(self.conv_block2(x))
        x = self.maxpool3(self.conv_block3(x))
        x = self.maxpool4(self.conv_block4(x))
        x = self.maxpool5(self.conv_block5(x))
        x = self.classifier(x)
        return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3
)

# Training function
def train(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc='Training', leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # CRITICAL: Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

def test(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Validating', leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

## Base CNN Model

In [None]:
epochs = 20
train_losses, test_losses = [], []
train_accs, test_accs = [], []
best_val_acc = 0.0

print("\nTraining started...")
for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(model, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step(test_acc)
    
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {test_loss:.4f}, Val Acc: {test_acc:.2f}%")
    
    # Save best model
    if test_acc > best_val_acc:
        best_val_acc = test_acc
        torch.save(model.state_dict(), 'best_baseline_model.pth')
        print(f"  ✓ Best model saved! Val Acc: {test_acc:.2f}%")
    
    # Early stopping if loss explodes
    if test_loss > 100:
        print("  ⚠️ Loss exploding! Stopping early.")
        break

print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"{'='*60}")

### Visualization

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(train_losses, label='Train Loss')
ax1.plot(test_losses, label='Test Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Loss over Epochs')
ax1.legend()
ax1.grid(True)

ax2.plot(train_accs, label='Train Accuracy')
ax2.plot(test_accs, label='Test Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Accuracy over Epochs')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('results.png')
plt.show()

In [None]:
import torch
import torch.nn.functional as F

model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        preds = torch.argmax(F.softmax(outputs, dim=1), dim=1)
        
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())


Classification Report

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_true, y_pred, target_names=class_names))


Top 5 best performing Foods

In [None]:
import numpy as np

y_true = np.array(y_true)
y_pred = np.array(y_pred)

per_class_accuracy = []
for i, class_name in enumerate(class_names):
    idx = np.where(y_true == i)[0]
    acc = np.mean(y_pred[idx] == y_true[idx])
    per_class_accuracy.append(acc * 100)

# Create a sorted table of top classes
import pandas as pd

df = pd.DataFrame({
    'Class': class_names,
    'Accuracy': per_class_accuracy,
    'Sample Size': [np.sum(y_true == i) for i in range(len(class_names))]
})
df = df.sort_values(by='Accuracy', ascending=False).reset_index(drop=True)
df.index += 1
print(df.head(5))


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 6))
plt.bar(df['Class'][:10], df['Accuracy'][:10])
plt.xticks(rotation=45, ha='right')
plt.title('Top 10 Best Performing Food-101 Classes')
plt.ylabel('Accuracy (%)')
plt.show()


Bottom 5 Performing Foods

In [None]:
df = pd.DataFrame({
    'Class': class_names,
    'Accuracy': per_class_accuracy,
    'Sample Size': [np.sum(y_true == i) for i in range(len(class_names))]
})
worst_df = df.sort_values(by='Accuracy', ascending=True).reset_index(drop=True)
worst_df.index += 1
print(worst_df.head(5))


Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

cm = confusion_matrix(y_true, y_pred)
print(cm.shape)   # should be (101, 101)


import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

# Select 10 example classes (e.g., top 10 by accuracy)
top10_classes = df.sort_values(by='Accuracy', ascending=False)['Class'].head(10).tolist()
top10_idx = [class_names.index(c) for c in top10_classes]

# Subset confusion matrix
cm_subset = cm[top10_idx][:, top10_idx]

# Normalize
cm_norm = cm_subset.astype('float') / cm_subset.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(10, 8))
sns.heatmap(cm_norm, annot=True, fmt=".2f", cmap="Blues",
            xticklabels=top10_classes, yticklabels=top10_classes)
plt.title("Normalized Confusion Matrix (Top 10 Classes)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()


In [None]:
import seaborn as sns
plt.figure(figsize=(25, 22))
sns.heatmap(cm, cmap="Blues", cbar=False)
plt.title("Food-101 Confusion Matrix (All Classes)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
np.save("confusion_matrix_food101.npy", cm)


## Part B: Feature Extraction (Frozen) 

In [15]:
# Try this instead:
from torchvision.models import ResNet50_Weights
res50_model1 = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

#freeze all the layers
for param in res50_model1.parameters():
    param.requires_grad=False
    
# Check if weights loaded
sample_weight = res50_model1.layer1[0].conv1.weight[0,0,0,0].item()
print(f"   Sample weight value: {sample_weight:.6f}")
if abs(sample_weight) > 0.5:
    print("   ⚠️ WARNING: Weights look random! Pre-trained might not have loaded.")
else:
    print("   ✓ Pre-trained weights loaded successfully")


num_features = res50_model1.fc.in_features  # Should be 2048

# Replace classifier (2048 → 512 → 101)
res50_model1.fc = nn.Linear(2048, 101)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

res50_model1 = res50_model1.to(device)
criterion = nn.CrossEntropyLoss()

# Verify trainable parameters
total_params = sum(p.numel() for p in res50_model1.parameters())
trainable_params = sum(p.numel() for p in res50_model1.parameters() if p.requires_grad)
print(f"\nModel Statistics:")
print(f"  Total parameters: {total_params:,}")
print(f"  Trainable parameters: {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")
print(f"  Frozen parameters: {total_params - trainable_params:,}")

optimizer = optim.SGD(res50_model1.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3
)

   Sample weight value: 0.003514
   ✓ Pre-trained weights loaded successfully
Using device: cuda

Model Statistics:
  Total parameters: 23,714,981
  Trainable parameters: 206,949 (0.87%)
  Frozen parameters: 23,508,032


In [9]:
print(train_transforms)


Compose(
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
    RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)
    RandomHorizontalFlip(p=0.5)
    RandomRotation(degrees=[-10.0, 10.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=(0.8, 1.2), contrast=(0.8, 1.2), saturation=(0.8, 1.2), hue=None)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


In [8]:
# Training function
def train(res50_model1, loader, criterion, optimizer, device):
    res50_model1.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc='Training', leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = res50_model1(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # CRITICAL: Gradient clipping
        #torch.nn.utils.clip_grad_norm_(res50_model1.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

def test(res50_model1, loader, criterion, device):
    res50_model1.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Validating', leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = res50_model1(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

In [10]:
import torch
import torch.nn as nn
from torchvision import models
from torchvision.models import ResNet50_Weights

print("="*80)
print("COMPLETE DIAGNOSTIC - FINDING THE ISSUE")
print("="*80)

# ============================================================================
# TEST 1: VERIFY PRE-TRAINED WEIGHTS ARE ACTUALLY LOADING
# ============================================================================

print("\n1. Testing Pre-trained Weights Loading...")

# Load with explicit weights
model_test = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

# Check multiple weight values (should be specific non-random values)
w1 = model_test.layer1[0].conv1.weight[0,0,0,0].item()
w2 = model_test.layer4[2].conv3.weight[0,0,0,0].item()
w3 = model_test.conv1.weight[0,0,0,0].item()

print(f"   Conv1 weight:   {w1:.6f}")
print(f"   Layer1 weight:  {w2:.6f}")  
print(f"   Layer4 weight:  {w3:.6f}")

# These should be specific values like -0.012345, not random
if abs(w1) > 0.5 or abs(w2) > 0.5 or abs(w3) > 0.5:
    print("   ❌ PROBLEM: Weights look random! Pre-trained not loading!")
    print("   Try: Download weights manually or check internet connection")
else:
    print("   ✓ Weights look correct")

# ============================================================================
# TEST 2: VERIFY DATA TRANSFORMS
# ============================================================================

print("\n2. Testing Data Pipeline...")

# Get one batch
images, labels = next(iter(train_loader))
print(f"   Batch shape: {images.shape}")
print(f"   Batch dtype: {images.dtype}")
print(f"   Labels shape: {labels.shape}")

# Check normalization
img_mean = images.mean(dim=[0,2,3])
img_std = images.std(dim=[0,2,3])

print(f"   Image channel means: [{img_mean[0]:.3f}, {img_mean[1]:.3f}, {img_mean[2]:.3f}]")
print(f"   Image channel stds:  [{img_std[0]:.3f}, {img_std[1]:.3f}, {img_std[2]:.3f}]")
print(f"   Expected means: ~[0, 0, 0]")
print(f"   Expected stds:  ~[1, 1, 1]")

if abs(img_mean[0]) > 0.5:
    print("   ❌ PROBLEM: Images not normalized!")
else:
    print("   ✓ Normalization looks correct")

# ============================================================================
# TEST 3: TEST ON IMAGENET IMAGES (SANITY CHECK)
# ============================================================================

print("\n3. Testing Model on ImageNet-like Data...")

model_test.eval()
model_test = model_test.to(device)

with torch.no_grad():
    # Create random ImageNet-normalized input
    test_input = torch.randn(8, 3, 224, 224).to(device)
    test_output = model_test(test_input)
    
    # For ImageNet (1000 classes), accuracy on random should be ~0.1%
    # For our data, pre-trained features should give ~20-30% even with random classifier
    
    print(f"   Output shape: {test_output.shape}")
    print(f"   Output range: [{test_output.min():.2f}, {test_output.max():.2f}]")

# ============================================================================
# TEST 4: VERIFY YOUR MODEL SETUP
# ============================================================================

print("\n4. Checking Your Model Configuration...")

# Recreate your exact setup
res50_model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

# Freeze
for param in res50_model.parameters():
    param.requires_grad = False

# Replace FC
res50_model.fc = nn.Linear(2048, 101)
res50_model = res50_model.to(device)

# Count parameters
total = sum(p.numel() for p in res50_model.parameters())
trainable = sum(p.numel() for p in res50_model.parameters() if p.requires_grad)
frozen = total - trainable

print(f"   Total params:     {total:,}")
print(f"   Trainable params: {trainable:,}")
print(f"   Frozen params:    {frozen:,}")

if trainable != 206948:  # 2048*101 + 101
    print(f"   ❌ PROBLEM: Wrong number of trainable params! Should be 206,948")
else:
    print(f"   ✓ Correct number of trainable params")

# ============================================================================
# TEST 5: COMPARE WITH BASELINE
# ============================================================================

print("\n5. Testing Forward Pass Performance...")

res50_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in list(val_loader)[:10]:  # Test on 10 batches
        images, labels = images.to(device), labels.to(device)
        outputs = res50_model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

untrained_acc = 100. * correct / total
print(f"   Random FC accuracy: {untrained_acc:.2f}%")
print(f"   Expected: 15-25% (pre-trained features + random classifier)")

if untrained_acc < 10:
    print(f"   ❌ PROBLEM: Too low! Pre-trained features not working!")
elif untrained_acc < 20:
    print(f"   ⚠️ LOW: Features working but something off")
else:
    print(f"   ✓ Pre-trained features working")

# ============================================================================
# TEST 6: CHECK LEARNING RATE
# ============================================================================

print("\n6. Testing Learning Rate...")

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(res50_model.fc.parameters(), lr=0.01, momentum=0.9)

# Test one batch
res50_model.train()
images, labels = next(iter(train_loader))
images, labels = images.to(device), labels.to(device)

# Before update
fc_weight_before = res50_model.fc.weight[0,0].item()

# One step
optimizer.zero_grad()
outputs = res50_model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

# After update  
fc_weight_after = res50_model.fc.weight[0,0].item()
weight_change = abs(fc_weight_after - fc_weight_before)

print(f"   Weight before: {fc_weight_before:.6f}")
print(f"   Weight after:  {fc_weight_after:.6f}")
print(f"   Weight change: {weight_change:.6f}")

if weight_change < 0.0001:
    print(f"   ❌ PROBLEM: Weights barely changing! LR too low or gradient issue")
elif weight_change > 0.1:
    print(f"   ⚠️ WARNING: Weights changing a lot! LR might be too high")
else:
    print(f"   ✓ Weight updates look reasonable")

# ============================================================================
# TEST 7: CHECK DATASET BALANCE
# ============================================================================

print("\n7. Checking Dataset...")

# Count labels in training set
label_counts = {}
for _, labels in train_loader:
    for label in labels:
        label_id = label.item()
        label_counts[label_id] = label_counts.get(label_id, 0) + 1

unique_classes = len(label_counts)
min_samples = min(label_counts.values())
max_samples = max(label_counts.values())

print(f"   Unique classes: {unique_classes}")
print(f"   Min samples per class: {min_samples}")
print(f"   Max samples per class: {max_samples}")

if unique_classes != 101:
    print(f"   ❌ PROBLEM: Should have 101 classes, found {unique_classes}!")
elif max_samples / min_samples > 2:
    print(f"   ⚠️ WARNING: Imbalanced dataset")
else:
    print(f"   ✓ Dataset balance looks good")

# ============================================================================
# SUMMARY
# ============================================================================

print("\n" + "="*80)
print("DIAGNOSTIC SUMMARY")
print("="*80)

problems_found = []

if abs(w1) > 0.5:
    problems_found.append("Pre-trained weights not loading correctly")
if abs(img_mean[0]) > 0.5:
    problems_found.append("Data normalization incorrect")
if trainable != 206948:
    problems_found.append("Wrong number of trainable parameters")
if untrained_acc < 15:
    problems_found.append("Pre-trained features not working")
if weight_change < 0.0001:
    problems_found.append("Learning rate too low or no gradient flow")
if unique_classes != 101:
    problems_found.append("Dataset has wrong number of classes")

if problems_found:
    print("\n❌ PROBLEMS FOUND:")
    for i, problem in enumerate(problems_found, 1):
        print(f"   {i}. {problem}")
else:
    print("\n✅ All checks passed!")
    print("   Model setup looks correct.")
    print("   Issue might be:")
    print("   - Need different learning rate")
    print("   - Need more epochs")
    print("   - Dataset difficulty higher than expected")

print("\n" + "="*80)

COMPLETE DIAGNOSTIC - FINDING THE ISSUE

1. Testing Pre-trained Weights Loading...
   Conv1 weight:   0.003514
   Layer1 weight:  0.004994
   Layer4 weight:  0.013335
   ✓ Weights look correct

2. Testing Data Pipeline...
   Batch shape: torch.Size([32, 3, 224, 224])
   Batch dtype: torch.float32
   Labels shape: torch.Size([32])
   Image channel means: [0.352, -0.030, -0.369]
   Image channel stds:  [1.250, 1.261, 1.230]
   Expected means: ~[0, 0, 0]
   Expected stds:  ~[1, 1, 1]
   ✓ Normalization looks correct

3. Testing Model on ImageNet-like Data...
   Output shape: torch.Size([8, 1000])
   Output range: [-4.66, 8.42]

4. Checking Your Model Configuration...
   Total params:     23,714,981
   Trainable params: 206,949
   Frozen params:    23,508,032
   ❌ PROBLEM: Wrong number of trainable params! Should be 206,948

5. Testing Forward Pass Performance...
   Random FC accuracy: 0.31%
   Expected: 15-25% (pre-trained features + random classifier)
   ❌ PROBLEM: Too low! Pre-trained f

In [9]:
from torchvision.models import ResNet50_Weights
import torch
import torch.nn as nn
import torch.optim as optim

print("="*80)
print("RESNET50 FEATURE EXTRACTION - CORRECTED VERSION")
print("="*80)

# Load pre-trained ResNet50
print("\n1. Loading ResNet50...")
res50_model1 = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

# Check weights loaded
sample_weight = res50_model1.layer1[0].conv1.weight[0,0,0,0].item()
print(f"   Sample weight: {sample_weight:.6f}")
if abs(sample_weight) > 0.5:
    print("   ⚠️ WARNING: Weights look random!")
else:
    print("   ✓ Pre-trained weights loaded")

# Freeze backbone
print("\n2. Freezing backbone...")
for param in res50_model1.parameters():
    param.requires_grad = False

# Replace classifier
print("\n3. Replacing classifier...")
num_features = res50_model1.fc.in_features
res50_model1.fc = nn.Linear(num_features, 101)

# Move to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\n4. Moving to device: {device}")
res50_model1 = res50_model1.to(device)

# Verify parameters
total_params = sum(p.numel() for p in res50_model1.parameters())
trainable_params = sum(p.numel() for p in res50_model1.parameters() if p.requires_grad)

print(f"\n5. Model Statistics:")
print(f"   Total parameters:     {total_params:,}")
print(f"   Trainable parameters: {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")
print(f"   Frozen parameters:    {total_params - trainable_params:,}")

# ============================================================================
# CRITICAL FIX: ONLY OPTIMIZE FC LAYER!
# ============================================================================

criterion = nn.CrossEntropyLoss()

# ✅ CORRECT: Only optimize trainable parameters (FC layer)
optimizer = optim.SGD(
    res50_model1.fc.parameters(),  # ← ONLY FC LAYER!
    lr=0.01, 
    momentum=0.9, 
    weight_decay=1e-4
)

print(f"\n6. Optimizer configured for FC layer only")
print(f"   Learning rate: 0.01")
print(f"   Momentum: 0.9")

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3, verbose=True
)

# ============================================================================
# TRAINING
# ============================================================================

epochs = 15
train_losses, test_losses = [], []
train_accs, test_accs = [], []
best_val_acc = 0.0

print("\n" + "="*80)
print("STARTING TRAINING")
print("="*80)

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    print("-" * 60)
    
    train_loss, train_acc = train(res50_model1, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(res50_model1, val_loader, criterion, device)
    
    scheduler.step(test_acc)
    
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {test_loss:.4f},   Val Acc:   {test_acc:.2f}%")
    
    if test_acc > best_val_acc:
        best_val_acc = test_acc
        torch.save(res50_model1.state_dict(), 'resnet50_frozen_best.pth')
        print(f"  ✓ Best model saved! Val Acc: {test_acc:.2f}%")
    
    # Progress check
    if epoch == 0:
        if test_acc < 55:
            print(f"  ⚠️ Epoch 1 low ({test_acc:.1f}%). Expected 60-70%")
        elif test_acc >= 60:
            print(f"  ✓ Epoch 1 looking good!")

print(f"\n{'='*60}")
print(f"TRAINING COMPLETE")
print(f"{'='*60}")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"Expected: 78-82%")
print(f"{'='*60}")

# Final test
print("\nEvaluating on test set...")
res50_model1.load_state_dict(torch.load('resnet50_frozen_best.pth'))
test_loss_final, test_acc_final = test(res50_model1, test_loader, criterion, device)

print(f"\n{'='*60}")
print(f"FINAL TEST RESULTS")
print(f"{'='*60}")
print(f"Test Accuracy: {test_acc_final:.2f}%")
print(f"Improvement over baseline: +{test_acc_final - 40.2:.2f}%")
print(f"{'='*60}")

RESNET50 FEATURE EXTRACTION - CORRECTED VERSION

1. Loading ResNet50...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 193MB/s] 


   Sample weight: 0.003514
   ✓ Pre-trained weights loaded

2. Freezing backbone...

3. Replacing classifier...

4. Moving to device: cuda





5. Model Statistics:
   Total parameters:     23,714,981
   Trainable parameters: 206,949 (0.87%)
   Frozen parameters:    23,508,032

6. Optimizer configured for FC layer only
   Learning rate: 0.01
   Momentum: 0.9

STARTING TRAINING

Epoch 1/15
------------------------------------------------------------


                                                             

  Train Loss: 2.9148, Train Acc: 32.59%
  Val Loss:   2.1911,   Val Acc:   47.14%
  ✓ Best model saved! Val Acc: 47.14%
  ⚠️ Epoch 1 low (47.1%). Expected 60-70%

Epoch 2/15
------------------------------------------------------------


                                                            

KeyboardInterrupt: 

In [6]:
epochs = 15  # Increase to 15 for better convergence
train_losses, test_losses = [], []
train_accs, test_accs = [], []
best_val_acc = 0.0

print("\n" + "="*60)
print("TRAINING RESNET50 - FEATURE EXTRACTION")
print("="*60)

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    print("-" * 60)
    
    # Training
    train_loss, train_acc = train(res50_model1, train_loader, criterion, optimizer, device)
    
    # Validation
    test_loss, test_acc = test(res50_model1, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step(test_acc)
    
    # Save metrics
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    # Print results
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {test_loss:.4f},   Val Acc:   {test_acc:.2f}%")
    
    # Save best model
    if test_acc > best_val_acc:
        best_val_acc = test_acc
        torch.save(res50_model1.state_dict(), 'resnet50_frozen_best.pth')
        print(f"  ✓ Best model saved! Val Acc: {test_acc:.2f}%")
    
    # Early stopping for errors
    if test_loss > 100 or torch.isnan(torch.tensor(test_loss)):
        print("  ⚠️ Training issue detected! Stopping early.")
        break

print(f"\n{'='*60}")
print(f"FEATURE EXTRACTION TRAINING COMPLETE!")
print(f"{'='*60}")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"Expected Range: 78-82%")
print(f"{'='*60}")

# ============================================================================
# STEP 4: FINAL EVALUATION
# ============================================================================

# Load best model
print("\nLoading best model for final evaluation...")
res50_model1.load_state_dict(torch.load('resnet50_frozen_best.pth'))

# Evaluate on test set
test_loss_final, test_acc_final = test(res50_model1, test_loader, criterion, device)

print(f"\n{'='*60}")
print(f"FINAL TEST SET RESULTS")
print(f"{'='*60}")
print(f"Test Loss:     {test_loss_final:.4f}")
print(f"Test Accuracy: {test_acc_final:.2f}%")
print(f"Improvement over baseline: +{test_acc_final - 40.2:.2f}%")
print(f"{'='*60}")


TRAINING RESNET50 - FEATURE EXTRACTION

Epoch 1/15
------------------------------------------------------------


NameError: name 'train' is not defined

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import numpy as np

print("="*80)
print("RESNET50 FEATURE EXTRACTION - DIAGNOSTIC MODE")
print("="*80)

# ============================================================================
# DIAGNOSTIC 1: VERIFY PRE-TRAINED WEIGHTS
# ============================================================================

print("\n1. Loading ResNet50...")
res50_model1 = models.resnet50(pretrained=True)

# Check if weights loaded
sample_weight = res50_model1.layer1[0].conv1.weight[0,0,0,0].item()
print(f"   Sample weight value: {sample_weight:.6f}")
if abs(sample_weight) > 0.5:
    print("   ⚠️ WARNING: Weights look random! Pre-trained might not have loaded.")
else:
    print("   ✓ Pre-trained weights loaded successfully")

# ============================================================================
# DIAGNOSTIC 2: CHECK TRANSFORMS
# ============================================================================

print("\n2. Checking data transforms...")
print(f"   Train transforms: {train_transforms}")

# Test transform output
sample_batch, _ = next(iter(train_loader))
print(f"   Batch shape: {sample_batch.shape}")
print(f"   Batch mean: {sample_batch.mean():.4f} (should be ~0)")
print(f"   Batch std:  {sample_batch.std():.4f} (should be ~1)")
print(f"   Batch min:  {sample_batch.min():.4f}")
print(f"   Batch max:  {sample_batch.max():.4f}")

if abs(sample_batch.mean()) > 0.5:
    print("   ⚠️ WARNING: Data not normalized correctly!")
else:
    print("   ✓ Data normalization looks correct")

# ============================================================================
# DIAGNOSTIC 3: VERIFY ARCHITECTURE
# ============================================================================

print("\n3. Setting up model...")

# Freeze backbone
for param in res50_model1.parameters():
    param.requires_grad = False

# Try SIMPLE classifier first
print("   Using SIMPLE classifier for testing...")
num_features = res50_model1.fc.in_features
res50_model1.fc = nn.Linear(num_features, 101)  # Simple version!

# Move to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
res50_model1 = res50_model1.to(device)
print(f"   Model on device: {next(res50_model1.parameters()).device}")

# Count parameters
total_params = sum(p.numel() for p in res50_model1.parameters())
trainable_params = sum(p.numel() for p in res50_model1.parameters() if p.requires_grad)
frozen_params = total_params - trainable_params

print(f"\n   Total parameters: {total_params:,}")
print(f"   Trainable: {trainable_params:,} ({100*trainable_params/total_params:.2f}%)")
print(f"   Frozen: {frozen_params:,} ({100*frozen_params/total_params:.2f}%)")

if trainable_params > 1_000_000:
    print("   ⚠️ WARNING: Too many trainable params! Backbone not frozen?")
elif trainable_params < 100_000:
    print("   ⚠️ WARNING: Too few trainable params! Classifier issue?")
else:
    print("   ✓ Parameter count looks correct")

# ============================================================================
# DIAGNOSTIC 4: TEST FORWARD PASS
# ============================================================================

print("\n4. Testing forward pass...")
res50_model1.eval()
with torch.no_grad():
    test_input = torch.randn(2, 3, 224, 224).to(device)
    test_output = res50_model1(test_input)
    print(f"   Input shape: {test_input.shape}")
    print(f"   Output shape: {test_output.shape}")
    print(f"   Output range: [{test_output.min():.2f}, {test_output.max():.2f}]")
    
    # Check if output makes sense
    if test_output.shape != (2, 101):
        print("   ⚠️ WARNING: Wrong output shape!")
    else:
        print("   ✓ Forward pass working correctly")

# ============================================================================
# DIAGNOSTIC 5: TRAINING SETUP
# ============================================================================

print("\n5. Setting up training...")
criterion = nn.CrossEntropyLoss()

# Try HIGHER learning rate
optimizer = optim.Adam(res50_model1.fc.parameters(), lr=0.003, weight_decay=1e-4)
print(f"   Learning rate: 0.003 (higher than before)")

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=2, verbose=True
)

# ============================================================================
# QUICK 3-EPOCH TEST
# ============================================================================

print("\n" + "="*80)
print("RUNNING 3-EPOCH DIAGNOSTIC TEST")
print("="*80)

res50_model1.train()
for epoch in range(3):
    print(f"\nEpoch {epoch+1}/3")
    print("-" * 60)
    
    train_loss, train_acc = train(res50_model1, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(res50_model1, val_loader, criterion, device)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {test_loss:.4f},   Val Acc:   {test_acc:.2f}%")
    
    # Diagnostic checks
    if epoch == 0:
        if test_acc < 50:
            print("  ⚠️ Epoch 1 val accuracy too low! Expected 60-65%")
        elif test_acc > 55:
            print("  ✓ Epoch 1 looks good!")
        else:
            print("  ⚠️ Epoch 1 slightly low, but training...")

print("\n" + "="*80)
print("DIAGNOSTIC COMPLETE")
print("="*80)

# ============================================================================
# ANALYSIS
# ============================================================================

print("\nExpected vs Actual:")
print(f"  Epoch 1 Val Acc: Expected 60-65%, Got: {test_acc:.2f}%")

if test_acc >= 60:
    print("\n✅ GOOD! Model is working correctly.")
    print("   Continue training to 15 epochs. Should reach 78-82%.")
elif test_acc >= 50:
    print("\n⚠️ BELOW EXPECTED but training.")
    print("   Possible issues:")
    print("   - Data quality problems")
    print("   - Need more epochs")
    print("   - Learning rate too low")
elif test_acc < 50:
    print("\n❌ PROBLEM DETECTED!")
    print("   Check the diagnostics above for warnings.")
    print("   Most likely issues:")
    print("   1. Pre-trained weights didn't load")
    print("   2. Data transforms incorrect")
    print("   3. Backbone accidentally unfrozen")

# ============================================================================
# RECOMMENDATIONS
# ============================================================================

print("\n" + "="*80)
print("RECOMMENDATIONS")
print("="*80)

if test_acc < 55:
    print("""
If val accuracy is still low (<55%), try these fixes:

1. RELOAD WITH weights='IMAGENET1K_V1':
   model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

2. USE EVEN SIMPLER CLASSIFIER:
   model.fc = nn.Linear(2048, 101)
   # No dropout, no extra layers

3. INCREASE LEARNING RATE:
   optimizer = optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)

4. CHECK YOUR DATA:
   - Are images actually 224×224?
   - Is normalization correct?
   - Any corrupted images?

5. TRY DIFFERENT OPTIMIZER:
   optimizer = optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)
""")
else:
    print("""
Performance is acceptable. To improve:

1. Train full 15 epochs
2. Switch to complex classifier after epoch 5
3. Use learning rate scheduling
4. Consider data augmentation adjustments
""")

RESNET50 FEATURE EXTRACTION - DIAGNOSTIC MODE

1. Loading ResNet50...
   Sample weight value: 0.003514
   ✓ Pre-trained weights loaded successfully

2. Checking data transforms...
   Train transforms: Compose(
    Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=True)
    RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bilinear, antialias=True)
    RandomHorizontalFlip(p=0.5)
    RandomRotation(degrees=[-10.0, 10.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=(0.8, 1.2), contrast=(0.8, 1.2), saturation=(0.8, 1.2), hue=None)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
   Batch shape: torch.Size([32, 3, 224, 224])
   Batch mean: -0.1486 (should be ~0)
   Batch std:  1.2144 (should be ~1)
   Batch min:  -2.1179
   Batch max:  2.6400
   ✓ Data normalization looks correct

3. Setting up model...
   Using SIMPLE classifier for testing...
   Model on device:

                                                             

  Train Loss: 3.6281, Train Acc: 29.48%
  Val Loss:   2.9186,   Val Acc:   42.20%
  ⚠️ Epoch 1 val accuracy too low! Expected 60-65%

Epoch 2/3
------------------------------------------------------------


                                                             

  Train Loss: 3.4734, Train Acc: 34.36%
  Val Loss:   3.1465,   Val Acc:   43.15%

Epoch 3/3
------------------------------------------------------------


                                                             

  Train Loss: 3.4141, Train Acc: 35.89%
  Val Loss:   2.9685,   Val Acc:   45.85%

DIAGNOSTIC COMPLETE

Expected vs Actual:
  Epoch 1 Val Acc: Expected 60-65%, Got: 45.85%

❌ PROBLEM DETECTED!
   Most likely issues:
   1. Pre-trained weights didn't load
   2. Data transforms incorrect
   3. Backbone accidentally unfrozen

RECOMMENDATIONS

If val accuracy is still low (<55%), try these fixes:

1. RELOAD WITH weights='IMAGENET1K_V1':
   model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

2. USE EVEN SIMPLER CLASSIFIER:
   model.fc = nn.Linear(2048, 101)
   # No dropout, no extra layers

3. INCREASE LEARNING RATE:
   optimizer = optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)

4. CHECK YOUR DATA:
   - Are images actually 224×224?
   - Is normalization correct?
   - Any corrupted images?

5. TRY DIFFERENT OPTIMIZER:
   optimizer = optim.SGD(model.fc.parameters(), lr=0.01, momentum=0.9)



## Part C: Feature Extraction (Frozen) 

In [None]:
res50_model2=models.resnet50(pretrained=True)

# Replace classifier (2048 → 512 → 101)
res50_model2.fc= nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(2048,512),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.5),
    nn.Linear(512,101)
)

# Unfreeze layer4 only
for param in res50_model2.layer4.parameters():
    param.requires_grad = True

# Differential learning rates
optimizer = optim.SGD([
    {'params': res50_model2.layer4.parameters(), 'lr': 1e-4},  # Backbone
    {'params': res50_model2.fc.parameters(), 'lr': 1e-3}       # Classifier
], momentum=0.9, weight_decay=1e-4)

scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=20
)

In [None]:
epochs = 15
train_losses, test_losses = [], []
train_accs, test_accs = [], []
best_val_acc = 0.0

print("\nTraining started...")
for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    
    train_loss, train_acc = train(res50_model2, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(res50_model2, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step(test_acc)
    
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {test_loss:.4f}, Val Acc: {test_acc:.2f}%")
    
    # Save best model
    if test_acc > best_val_acc:
        best_val_acc = test_acc
        torch.save(res50_model2.state_dict(), 'resnet_unfreeze_best.pth')
        print(f"  ✓ Best model saved! Val Acc: {test_acc:.2f}%")
    
    # Early stopping if loss explodes
    if test_loss > 100:
        print("  ⚠️ Loss exploding! Stopping early.")
        break

print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"{'='*60}")

## Part D: Full Fine Tuning

In [None]:
res50_model3=models.resnet50(pretrained=True)

# Replace classifier (2048 → 512 → 101)
res50_model3.fc= nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(2048,512),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.5),
    nn.Linear(512,101)
)

# Unfreeze everything
for param in model.parameters():
    param.requires_grad = True

#Very low LR for stability
optimizer = optim.SGD(
    model.parameters(), 
    lr=1e-5,  # Very low!
    momentum=0.9, 
    weight_decay=1e-4
)

# Warmup + Cosine decay
scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=30
)

In [None]:
epochs = 20
train_losses, test_losses = [], []
train_accs, test_accs = [], []
best_val_acc = 0.0

print("\nTraining started...")
for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    
    train_loss, train_acc = train(res50_model3, train_loader, criterion, optimizer, device)
    test_loss, test_acc = test(res50_model3, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step(test_acc)
    
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss: {test_loss:.4f}, Val Acc: {test_acc:.2f}%")
    
    # Save best model
    if test_acc > best_val_acc:
        best_val_acc = test_acc
        torch.save(res50_model3.state_dict(), 'resnet_fine_best.pth')
        print(f"  ✓ Best model saved! Val Acc: {test_acc:.2f}%")
    
    # Early stopping if loss explodes
    if test_loss > 100:
        print("  ⚠️ Loss exploding! Stopping early.")
        break

print(f"\n{'='*60}")
print(f"Training Complete!")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"{'='*60}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
from torchvision.models import ResNet50_Weights
from tqdm import tqdm

print("="*80)
print("RESNET50 FEATURE EXTRACTION - FIXED VERSION")
print("="*80)

# ============================================================================
# 1. DATA LOADING (USE STANDARD FOOD-101)
# ============================================================================

print("\n1. Loading Food-101 dataset...")

# ImageNet normalization (REQUIRED for pre-trained models)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Training transforms
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Test transforms
test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Load Food-101 using torchvision (CLEAN LOADING)
train_dataset = datasets.Food101(
    root='./data',
    split='train',
    transform=train_transforms,
    download=True
)

test_dataset = datasets.Food101(
    root='./data',
    split='test',
    transform=test_transforms,
    download=True
)

# Split train into train + val
train_size = int(0.85 * len(train_dataset))  # 85% train
val_size = len(train_dataset) - train_size    # 15% val

train_subset, val_subset = random_split(
    train_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

print(f"   Train: {len(train_subset):,}")
print(f"   Val:   {len(val_subset):,}")
print(f"   Test:  {len(test_dataset):,}")

# Create loaders
batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

print(f"   Batch size: {batch_size}")

# ============================================================================
# 2. VERIFY DATA QUALITY
# ============================================================================

print("\n2. Verifying data quality...")
sample_images, sample_labels = next(iter(train_loader))
print(f"   Batch shape: {sample_images.shape}")
print(f"   Image mean: {sample_images.mean():.3f} (expected ~0)")
print(f"   Image std:  {sample_images.std():.3f} (expected ~1)")
print(f"   Image min:  {sample_images.min():.3f}")
print(f"   Image max:  {sample_images.max():.3f}")

# ============================================================================
# 3. MODEL SETUP
# ============================================================================

print("\n3. Loading ResNet50...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"   Device: {device}")

# Load pre-trained ResNet50
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

# Freeze backbone
for param in model.parameters():
    param.requires_grad = False

# Replace classifier
model.fc = nn.Linear(2048, 101)

# Move to device
model = model.to(device)

# Verify setup
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"   Total params:     {total_params:,}")
print(f"   Trainable params: {trainable_params:,}")
print(f"   Expected:         206,948")

if trainable_params != 206948:
    print(f"   ⚠️ Mismatch! Check model setup")

# ============================================================================
# 4. TEST PRE-TRAINED FEATURES
# ============================================================================

print("\n4. Testing pre-trained features...")
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in list(val_loader)[:20]:  # Test 20 batches
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

random_acc = 100. * correct / total
print(f"   Accuracy with random FC: {random_acc:.2f}%")
print(f"   Expected: 20-30%")

if random_acc < 15:
    print("   ❌ PROBLEM: Features not working!")
    print("   This should NOT happen with proper setup")
else:
    print("   ✓ Pre-trained features working!")

# ============================================================================
# 5. TRAINING SETUP
# ============================================================================

print("\n5. Setting up training...")

criterion = nn.CrossEntropyLoss()

# HIGHER learning rate since features aren't learning
optimizer = optim.SGD(
    model.fc.parameters(),
    lr=0.1,  # Much higher!
    momentum=0.9,
    weight_decay=1e-4
)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3, verbose=True
)

print(f"   Optimizer: SGD")
print(f"   Learning rate: 0.1 (high for frozen backbone)")
print(f"   Momentum: 0.9")

# ============================================================================
# 6. TRAINING FUNCTIONS
# ============================================================================

def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc='Training', leave=False):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Validating', leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

# ============================================================================
# 7. TRAINING
# ============================================================================

print("\n" + "="*80)
print("STARTING TRAINING")
print("="*80)

epochs = 15
best_val_acc = 0.0
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    print("-" * 60)
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    scheduler.step(val_acc)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {val_loss:.4f},   Val Acc:   {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'resnet50_frozen_best.pth')
        print(f"  ✓ Best model saved! Val Acc: {val_acc:.2f}%")
    
    # Progress check
    if epoch == 0:
        if val_acc < 50:
            print(f"  ⚠️ Still low! Expected 60-70%")
        elif val_acc >= 60:
            print(f"  ✓ Epoch 1 good! On track for 78-82%")

print(f"\n{'='*60}")
print(f"TRAINING COMPLETE")
print(f"{'='*60}")
print(f"Best Val Accuracy: {best_val_acc:.2f}%")
print(f"Expected: 78-82%")
print(f"{'='*60}")

# ============================================================================
# 8. FINAL EVALUATION
# ============================================================================

print("\nFinal evaluation on test set...")
model.load_state_dict(torch.load('resnet50_frozen_best.pth'))
test_loss, test_acc = validate(model, test_loader, criterion, device)

print(f"\n{'='*60}")
print(f"TEST SET RESULTS")
print(f"{'='*60}")
print(f"Test Accuracy: {test_acc:.2f}%")
print(f"Baseline was: 40.2%")
print(f"Improvement: +{test_acc - 40.2:.2f}%")
print(f"{'='*60}")



RESNET50 FEATURE EXTRACTION - FIXED VERSION

1. Loading Food-101 dataset...


  9%|▉         | 471M/5.00G [00:17<02:29, 30.2MB/s]  