# Part A - Van Gogh Painting Classifier

## Deep Learning Project - Tel Aviv University

This notebook implements a binary classifier to identify Van Gogh paintings using transfer learning with VGG19.

### Overview:
1. **Load Data**: Read pre-prepared CSV file with image paths and labels
2. **Split Dataset**: 70% train, 15% validation, 15% test
3. **Data Augmentation**: Apply transforms for training robustness
4. **Model**: VGG19 pre-trained on ImageNet, fine-tuned for binary classification
5. **Hyperparameter Tuning**: Use Optuna to find best parameters
6. **Training**: Train final model with best hyperparameters
7. **Evaluation**: Test set metrics and visualizations

### Requirements:
- Google Colab with GPU runtime
- `post_impressionism_data.csv` file (created by Get_Post_Impressionism_Data.ipynb)
- Weights & Biases account for experiment tracking

---


## 1. Environment Setup


In [None]:
# Check if running on Google Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("‚úì Running on Google Colab")
else:
    print("Running locally")


Running locally


In [2]:
# Install required packages: optuna (hyperparameter tuning), wandb (experiment tracking)
%pip install -q optuna wandb


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.3.3 requires pyqt5<5.16, which is not installed.
spyder 5.3.3 requires pyqtwebengine<5.16, which is not installed.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


## 2. Import Libraries


In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm import tqdm

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_score, recall_score, f1_score, roc_auc_score, roc_curve
)

# Hyperparameter tuning and logging
import optuna
import wandb

# Set random seeds for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


In [None]:
# Setup device (GPU/CPU) - GPU is much faster for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if device.type == 'cpu':
    print("‚ö†Ô∏è WARNING: Running on CPU. Enable GPU in Colab: Runtime -> Change runtime type -> GPU")


In [None]:
# Login to Weights & Biases for experiment tracking
wandb.login(key="16d1bc863b28f81253ac0ee253b453393791a7e1")
print("‚úì Logged in to Weights & Biases")


## 3. Data Preparation


In [None]:
# Scan Post_Impressionism directory and create metadata CSV
import os
import pandas as pd

# Find Post_Impressionism directory (works in both Kaggle and Colab)
possible_dirs = [
    "/kaggle/input/wikiart/Post_Impressionism",  # Kaggle
    "/content/data/Post_Impressionism",          # Colab
    "/content/Post_Impressionism",               # Colab alternative
    "/content/wikiart/Post_Impressionism",       # Colab alternative
]

base_dir = None
for dir_path in possible_dirs:
    if os.path.exists(dir_path):
        base_dir = dir_path
        break

if base_dir is None:
    raise FileNotFoundError(
        f"‚ùå Post_Impressionism directory not found!\n"
        f"   Checked: {possible_dirs}\n"
        f"   Please download images to one of these locations."
    )

print(f"‚úì Found images in: {base_dir}")

# Scan directory and create DataFrame (like Nir)
records = []
for fname in os.listdir(base_dir):
    if not fname.lower().endswith((".jpg", ".png")):
        continue
    
    artist = fname.split("_")[0]  # Extract artist name from filename
    
    records.append({
        "filepath": os.path.join(base_dir, fname),
        "filename": fname,
        "artist": artist,
        "is_van_gogh": 1 if "van-gogh" in artist.lower() else 0
    })

df = pd.DataFrame(records)

# Save metadata to CSV
csv_output_path = "/kaggle/working/post_impressionism_data.csv" if os.path.exists("/kaggle") else ("/content/post_impressionism_data.csv" if IN_COLAB else "post_impressionism_data.csv")
df.to_csv(csv_output_path, index=False)
print(f"‚úì Saved metadata CSV: {csv_output_path}")

print(f"\n‚úì Loaded: {len(df)} images")
print(f"  Van Gogh: {df['is_van_gogh'].sum()}")
print(f"  Other: {len(df) - df['is_van_gogh'].sum()}")


In [None]:
# Split dataset: 70% train, 15% validation, 15% test (stratified to maintain class balance)
train_df, temp_df = train_test_split(
    df, test_size=0.3, stratify=df["is_van_gogh"], random_state=SEED
)

val_df, test_df = train_test_split(
    temp_df, test_size=0.5, stratify=temp_df["is_van_gogh"], random_state=SEED
)

print("Dataset splits:")
print(f"  Train: {len(train_df):5d} ({len(train_df)/len(df):.1%})")
print(f"  Val:   {len(val_df):5d} ({len(val_df)/len(df):.1%})")
print(f"  Test:  {len(test_df):5d} ({len(test_df)/len(df):.1%})")

print("\nClass distribution:")
print(f"  Train - Van Gogh: {train_df['is_van_gogh'].mean():.2%}")
print(f"  Val   - Van Gogh: {val_df['is_van_gogh'].mean():.2%}")
print(f"  Test  - Van Gogh: {test_df['is_van_gogh'].mean():.2%}")


## 4. Data Transforms & Dataset Class


In [None]:
# Define image transforms: resize to 224x224, normalize with ImageNet stats
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

# Training: data augmentation (random crop, flip, rotation, color jitter)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# Evaluation: no augmentation (consistent evaluation)
eval_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

print("Transforms defined ‚úì")


In [None]:
# Custom PyTorch Dataset class to load images from file paths
class VanGoghDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        # Filter out files that don't exist
        self.df = self.df[self.df['filepath'].apply(os.path.exists)].reset_index(drop=True)
        if len(self.df) < len(df):
            print(f"‚ö†Ô∏è Warning: {len(df) - len(self.df)} files not found and removed")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        try:
            image = Image.open(row["filepath"]).convert("RGB")
            label = row["is_van_gogh"]
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading {row['filepath']}: {e}")
            # Return a black image as fallback (shouldn't happen if we filtered)
            image = Image.new('RGB', (224, 224), color='black')
            label = row["is_van_gogh"]
            if self.transform:
                image = self.transform(image)
            return image, label

# Create datasets
train_dataset = VanGoghDataset(train_df, transform=train_transform)
val_dataset = VanGoghDataset(val_df, transform=eval_transform)
test_dataset = VanGoghDataset(test_df, transform=eval_transform)

print(f"Datasets: Train={len(train_dataset)}, Val={len(val_dataset)}, Test={len(test_dataset)}")


## 5. Training Functions


In [None]:
# Training and evaluation functions
def train_one_epoch(model, loader, optimizer, criterion, device):
    """Train for one epoch: forward pass, compute loss, backward pass, update weights"""
    model.train()
    total_loss = 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if wandb.run is not None:
            wandb.log({"batch_loss": loss.item()})
    return total_loss / len(loader)

@torch.no_grad()
def eval_one_epoch(model, loader, criterion, device):
    """Evaluate: forward pass only, compute loss and accuracy"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return total_loss / len(loader), correct / total

print("Training functions defined ‚úì")


## 6. Hyperparameter Tuning with Optuna


In [None]:
# Create model: supports both VGG19 and AlexNet (project requirement)
def create_model(model_name='VGG19', freeze_features=True, dropout=0.5):
    """
    Create model with binary classifier.
    
    Args:
        model_name: 'VGG19' or 'AlexNet'
        freeze_features: If True, freeze feature extractor (only train classifier)
        dropout: Dropout rate for classifier (0.0 to 0.7)
    """
    if model_name == 'VGG19':
        model = models.vgg19(weights='IMAGENET1K_V1')
        if freeze_features:
            for param in model.features.parameters():
                param.requires_grad = False
        # Modify classifier: VGG19 classifier[6] is the last Linear layer
        num_features = model.classifier[6].in_features
        model.classifier[6] = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(num_features, 2)  # Binary classification
        )
    elif model_name == 'AlexNet':
        model = models.alexnet(weights='IMAGENET1K_V1')
        if freeze_features:
            for param in model.features.parameters():
                param.requires_grad = False
        # Modify classifier: AlexNet classifier[6] is the last Linear layer
        num_features = model.classifier[6].in_features
        model.classifier[6] = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(num_features, 2)  # Binary classification
        )
    else:
        raise ValueError(f"Unknown model: {model_name}")
    
    return model.to(device)

def objective(trial):
    """Optuna objective: try hyperparameters, train model, return validation accuracy"""
    # Optuna suggests hyperparameters
    model_name = trial.suggest_categorical("model_name", ["VGG19", "AlexNet"])  # Project requirement: both models
    lr = trial.suggest_float("lr", 1e-5, 5e-4, log=True)  # Reduced max LR for stability (was 1e-2)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "AdamW"])
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64])  # Added 8 for smaller GPUs
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    momentum = trial.suggest_float("momentum", 0.8, 0.99) if optimizer_name == "SGD" else 0.0
    freeze_features = trial.suggest_categorical("freeze_features", [True, False])
    dropout = trial.suggest_float("dropout", 0.0, 0.7)  # Added dropout search

    # Create DataLoaders, model, optimizer
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)  # Like Nir
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    run = wandb.init(
        project="VanGogh_Classifier",
        name=f"trial_{trial.number}_{model_name}",
        config={"model_name": model_name, "lr": lr, "optimizer": optimizer_name, "batch_size": batch_size,
                "weight_decay": weight_decay, "momentum": momentum,
                "freeze_features": freeze_features, "dropout": dropout},
        reinit=True
    )

    model = create_model(model_name=model_name, freeze_features=freeze_features, dropout=dropout)
    criterion = nn.CrossEntropyLoss()
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_name == "SGD":
        optimizer = torch.optim.SGD(trainable_params, lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name == "AdamW":
        optimizer = torch.optim.AdamW(trainable_params, lr=lr, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.Adam(trainable_params, lr=lr, weight_decay=weight_decay)

    # Train for a few epochs (quick evaluation for hyperparameter search)
    num_epochs = 3  # Reduced for faster trials (30-60 min total search time)
    best_val_acc = 0.0
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)
        best_val_acc = max(best_val_acc, val_acc)
        wandb.log({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss,
                   "val_acc": val_acc, "best_val_acc": best_val_acc})
        trial.report(val_acc, epoch)
        if trial.should_prune():  # Stop bad trials early
            run.finish()
            raise optuna.exceptions.TrialPruned()
    run.finish()
    return best_val_acc

print("Optuna objective function defined ‚úì (supports VGG19 and AlexNet)")


In [None]:
# Run hyperparameter search with Optuna
# Project requirement: must take at least 30 minutes, max 60 minutes
import optuna  # Ensure optuna is imported (in case cells run out of order)
import time

print("Starting hyperparameter search...")
print("="*60)

study = optuna.create_study(
    direction="maximize",
    pruner=optuna.pruners.MedianPruner(n_startup_trials=3, n_warmup_steps=2)
)

# Track start time
start_time = time.time()

# Run optimization (max 60 minutes - will stop at timeout)
study.optimize(objective, n_trials=10, timeout=3600, show_progress_bar=True)  # 10 trials for 30-60 min window

# Calculate elapsed time
elapsed_time = time.time() - start_time
elapsed_minutes = elapsed_time / 60

print("\n" + "="*60)
print("HYPERPARAMETER SEARCH COMPLETE!")
print("="*60)
print(f"\n‚è±Ô∏è  Time taken: {elapsed_minutes:.2f} minutes ({elapsed_time:.0f} seconds)")
print(f"üìä Completed trials: {len(study.trials)} / 10")

# Check project requirements
if elapsed_minutes < 30:
    print(f"\n‚ö†Ô∏è  WARNING: Search took only {elapsed_minutes:.2f} minutes!")
    print("   Project requires at least 30 minutes. Consider increasing n_trials.")
elif elapsed_minutes > 60:
    print(f"\n‚ö†Ô∏è  WARNING: Search took {elapsed_minutes:.2f} minutes (exceeded 60 min limit)")
else:
    print(f"\n‚úì Time requirement met: {elapsed_minutes:.2f} minutes (30-60 min range)")

print(f"\nBest validation accuracy: {study.best_value:.4f}")
print(f"\nBest hyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")


## 7. Train Final Model


In [None]:
# Train final model with best hyperparameters
best_params = study.best_params
print("Training final model with best parameters:")
for k, v in best_params.items():
    print(f"  {k}: {v}")

# Setup: DataLoaders, model, optimizer, scheduler
final_train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True, num_workers=2)
final_val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'], shuffle=False, num_workers=2)

final_model = create_model(
    model_name=best_params.get('model_name', 'VGG19'),
    freeze_features=best_params.get('freeze_features', False),
    dropout=best_params.get('dropout', 0.5)
)
criterion = nn.CrossEntropyLoss()
trainable_params = filter(lambda p: p.requires_grad, final_model.parameters())

if best_params['optimizer'] == "SGD":
    final_optimizer = torch.optim.SGD(trainable_params, lr=best_params['lr'],
                                       momentum=best_params.get('momentum', 0.9),
                                       weight_decay=best_params.get('weight_decay', 1e-4))
elif best_params['optimizer'] == "AdamW":
    final_optimizer = torch.optim.AdamW(trainable_params, lr=best_params['lr'],
                                         weight_decay=best_params.get('weight_decay', 1e-4))
else:
    final_optimizer = torch.optim.Adam(trainable_params, lr=best_params['lr'],
                                        weight_decay=best_params.get('weight_decay', 1e-4))

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(final_optimizer, mode='max', factor=0.5, patience=3)

# Training loop with early stopping
run = wandb.init(project="VanGogh_Classifier", name="final_model_training",
                 config={**best_params, "training_type": "final"}, reinit=True)

num_epochs = 15  # Reduced for time efficiency
best_val_acc = 0.0
best_model_state = None
patience = 3  # Early stopping patience
epochs_without_improvement = 0
train_losses, val_losses, val_accuracies = [], [], []

print("\nStarting training...")
print("="*60)

for epoch in range(num_epochs):
    train_loss = train_one_epoch(final_model, final_train_loader, final_optimizer, criterion, device)
    val_loss, val_acc = eval_one_epoch(final_model, final_val_loader, criterion, device)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    scheduler.step(val_acc)
    wandb.log({"epoch": epoch, "train_loss": train_loss, "val_loss": val_loss,
               "val_acc": val_acc, "lr": final_optimizer.param_groups[0]['lr']})

    print(f"Epoch {epoch+1:02d}/{num_epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = {k: v.cpu().clone() for k, v in final_model.state_dict().items()}
        epochs_without_improvement = 0
        print(f"  ‚úì New best model! (Val Acc: {best_val_acc:.4f})")
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= patience:
        print(f"\nEarly stopping after {epoch+1} epochs")
        break

final_model.load_state_dict(best_model_state)
print(f"\n{'='*60}")
print(f"Training complete! Best validation accuracy: {best_val_acc:.4f}")

torch.save({'model_state_dict': best_model_state, 'best_params': best_params,
            'best_val_acc': best_val_acc}, 'best_vangogh_classifier.pth')
print("Model saved to 'best_vangogh_classifier.pth'")

run.finish()


## 8. Visualizations


In [None]:
# Plot training curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(train_losses, label='Train Loss', color='#1f77b4', linewidth=2)
axes[0].plot(val_losses, label='Validation Loss', color='#ff7f0e', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss', fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(val_accuracies, label='Validation Accuracy', color='#2ca02c', linewidth=2, marker='o')
axes[1].axhline(y=best_val_acc, color='r', linestyle='--', label=f'Best: {best_val_acc:.4f}')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Validation Accuracy', fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=150)
plt.show()
print("Saved: training_curves.png")


## 9. Test Set Evaluation


In [None]:
# Evaluate on test set (final performance metric)
test_loader = DataLoader(test_dataset, batch_size=best_params['batch_size'], shuffle=False, num_workers=2)

final_model.eval()
all_preds, all_labels, all_probs = [], [], []

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating"):
        images, labels = images.to(device), labels.to(device)
        outputs = final_model(images)
        probs = torch.softmax(outputs, dim=1)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        all_probs.extend(probs[:, 1].cpu().numpy())

all_preds, all_labels, all_probs = np.array(all_preds), np.array(all_labels), np.array(all_probs)

# Calculate metrics
test_accuracy = accuracy_score(all_labels, all_preds)
test_precision = precision_score(all_labels, all_preds)
test_recall = recall_score(all_labels, all_preds)
test_f1 = f1_score(all_labels, all_preds)
test_auc = roc_auc_score(all_labels, all_probs)

print("="*60)
print("TEST SET RESULTS")
print("="*60)
print(f"\nüìä Metrics:")
print(f"   Accuracy:  {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"   Precision: {test_precision:.4f}")
print(f"   Recall:    {test_recall:.4f}")
print(f"   F1-Score:  {test_f1:.4f}")
print(f"   AUC-ROC:   {test_auc:.4f}")

print(f"\nüìã Classification Report:")
print(classification_report(all_labels, all_preds, target_names=['Not Van Gogh', 'Van Gogh']))


In [None]:
# Confusion Matrix and ROC Curve
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

cm = confusion_matrix(all_labels, all_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['Not Van Gogh', 'Van Gogh'],
            yticklabels=['Not Van Gogh', 'Van Gogh'], annot_kws={'size': 14})
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')
axes[0].set_title('Confusion Matrix', fontweight='bold')

fpr, tpr, _ = roc_curve(all_labels, all_probs)
axes[1].plot(fpr, tpr, color='#1f77b4', linewidth=2, label=f'ROC (AUC = {test_auc:.4f})')
axes[1].plot([0, 1], [0, 1], color='gray', linestyle='--')
axes[1].fill_between(fpr, tpr, alpha=0.2, color='#1f77b4')
axes[1].set_xlabel('False Positive Rate')
axes[1].set_ylabel('True Positive Rate')
axes[1].set_title('ROC Curve', fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('confusion_matrix_roc.png', dpi=150)
plt.show()
print("Saved: confusion_matrix_roc.png")


In [None]:
# Visualize False Positives (images predicted as Van Gogh but are not)
# This is important for analysis in Part 3
import matplotlib.pyplot as plt
from PIL import Image

# Find False Positives
false_positives = []
for idx, (pred, label, prob) in enumerate(zip(all_preds, all_labels, all_probs)):
    if pred == 1 and label == 0:  # Predicted Van Gogh but is not
        false_positives.append((idx, prob))

# Sort by confidence (highest probability first)
false_positives.sort(key=lambda x: x[1], reverse=True)

# Display top False Positives
num_to_show = min(12, len(false_positives))
if num_to_show > 0:
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    fig.suptitle('False Positives: Predicted as Van Gogh (but are not)', fontsize=16, fontweight='bold')
    
    for i, (idx, prob) in enumerate(false_positives[:num_to_show]):
        row = i // 4
        col = i % 4
        
        # Get image path from test dataset
        image_path = test_df.iloc[idx]['filepath']
        image = Image.open(image_path)
        
        axes[row, col].imshow(image)
        axes[row, col].set_title(f'Conf: {prob:.3f}\n{test_df.iloc[idx]["artist"]}', fontsize=10)
        axes[row, col].axis('off')
    
    # Hide empty subplots
    for i in range(num_to_show, 12):
        row = i // 4
        col = i % 4
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.savefig('false_positives.png', dpi=150)
    plt.show()
    print(f"Saved: false_positives.png")
    print(f"\nTotal False Positives: {len(false_positives)} / {len(all_preds)}")
else:
    print("No False Positives found!")


## 10. Final Summary


In [None]:
# Final Summary
print("="*60)
print("PART A - VAN GOGH CLASSIFIER - SUMMARY")
print("="*60)

print("\nüìÅ Dataset:")
print(f"   Total: {len(df)} | Van Gogh: {df['is_van_gogh'].sum()} | Other: {len(df)-df['is_van_gogh'].sum()}")
print(f"   Split: 70% train / 15% val / 15% test")

print("\nüîß Best Hyperparameters:")
for k, v in best_params.items():
    print(f"   {k}: {v}")

print("\nüìä Performance:")
print(f"   Best Val Acc:  {best_val_acc:.4f} ({best_val_acc*100:.2f}%)")
print(f"   Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"   Test F1-Score: {test_f1:.4f}")
print(f"   Test AUC-ROC:  {test_auc:.4f}")

print("\nüíæ Saved Files:")
print("   best_vangogh_classifier.pth")
print("   training_curves.png")
print("   confusion_matrix_roc.png")

print("\n‚úÖ Part A Complete!")
print("="*60)
