In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

LEARNING_RATE = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Load the pretrained DenseNet-121 model
model = models.densenet121(weights=models.DenseNet121_Weights.DEFAULT)

# Get the number of input features for the classifier
num_features = model.classifier.in_features

# Replacing the classifier with a new one for our binary classification task
model.classifier = nn.Linear(num_features, 2)

# Move the model to the selected device (GPU or CPU)
model = model.to(DEVICE)

# --- Loss Function and Optimizer ---
# CrossEntropyLoss is perfect for multi-class (including binary) classification
criterion = nn.CrossEntropyLoss()

# Adam is a great all-around optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print("✅ Model, loss function, and optimizer are ready.")

# Training and Evaluation Functions
import time
import torch
import torch.nn as nn
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # Iterate over the data
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        if inputs.size(0) == 0:
            continue
            
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct_predictions += torch.sum(preds == labels.data)
        total_samples += labels.size(0)
        
    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions.double() / total_samples
    return epoch_loss, epoch_acc.item()

def evaluate(model, dataloader, criterion, device):
    model.eval()
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    # Disable gradient calculations
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            if inputs.size(0) == 0:
                continue
                
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)
            total_samples += labels.size(0)
            
    epoch_loss = running_loss / total_samples
    epoch_acc = correct_predictions.double() / total_samples
    return epoch_loss, epoch_acc.item()

#Copying the code from 04_data_augmentation.ipynb

import torch
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from PIL import Image
from torch.utils.data import WeightedRandomSampler
import time

# 1. Transformations for the TRAINING Set
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=10, translate=(0.2, 0.2), scale=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.485, 0.485], std=[0.229, 0.229, 0.229])
])

# 2. Transformations for the VALIDATION Set
val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.485, 0.485], std=[0.229, 0.229, 0.229])
])

# 3. Custom Dataset class with filtering of bad samples inside __getitem__
class MammogramDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.label_map = {'benign': 0, 'malignant': 1}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        try:
            img = Image.open(row['image_path']).convert('L')
            if self.transform:
                img = self.transform(img)
            label = self.label_map[row['label']]
            return img, label
        except Exception as e:
            print(f"⚠️  Error loading image {row['image_path']} at index {idx}: {e}")
            # Return None to signal bad sample, handle in collate_fn or dataloader
            return None, None

# 4. Custom collate_fn to filter out None samples
def collate_fn(batch):
    batch = list(filter(lambda x: x[0] is not None, batch))
    if not batch:
        return torch.tensor([]), torch.tensor([])
    
    return DataLoader.default_collate(batch)

# 5. splitting data into training and validation sets
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
BASE_DIR = r'C:\Users\aryan\OneDrive\Desktop\hopescan_project\data'
train_df = pd.read_csv(f'{BASE_DIR}/processed_train_data.csv')
test_df = pd.read_csv(f'{BASE_DIR}/processed_test_data.csv')

print("➡️ Splitting data based on 'patient_id' to prevent data leakage...")
splitter = GroupShuffleSplit(test_size=0.2, n_splits=1, random_state=42)
train_indices, val_indices = next(splitter.split(train_df, groups=train_df['patient_id']))

# 6. Create the final training and validation DataFrames
train_df_split = train_df.iloc[train_indices]
val_df_split = train_df.iloc[val_indices]

print(f"Training samples: {len(train_df_split)} | Validation samples: {len(val_df_split)}")
print(f"Number of unique patients in training set: {train_df_split['patient_id'].nunique()}")
print(f"Number of unique patients in validation set: {val_df_split['patient_id'].nunique()}")

# 7. Creating Dataset
train_dataset = MammogramDataset(train_df_split, transform=train_transforms)
val_dataset = MammogramDataset(val_df_split, transform=val_transforms)
test_dataset = MammogramDataset(test_df, transform=val_transforms)

print("➡️ Creating weighted sampler to address class imbalance...")
class_counts = train_df_split['label'].value_counts()
class_weights = torch.tensor([1.0 / class_counts['benign'], 1.0 / class_counts['malignant']]).float()
print(f"Class counts: {class_counts.to_dict()}")
print(f"Class weights: {class_weights}")

# 8. Creating sample weights for each training sample
print("➡️ Creating sample weights...")
labels = train_df_split['label'].map(train_dataset.label_map).tolist()
sample_weights = [class_weights[label].item() for label in labels]


# Create the sampler
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

print("✅ WeightedRandomSampler created successfully.")

# 9. Create DataLoaders

print("➡️ Creating DataLoaders...")
train_loader = DataLoader(train_dataset, batch_size=16, sampler=sampler, num_workers=4, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, collate_fn=collate_fn)

print(f"Valid training samples: {len(train_dataset)}")
print(f"Valid validation samples: {len(val_dataset)}")
print(f"Valid test samples: {len(test_dataset)}")
print("✅ Setup complete, ready to train!")

# Training Loop
NUM_EPOCHS = 5  # Starting with 10 epochs
best_val_acc = 0.0

print("🚀 Starting model training...")
start_time = time.time()

for epoch in range(NUM_EPOCHS):
    epoch_start_time = time.time()
    
    # Run one epoch of training and validation
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, DEVICE)
    val_loss, val_acc = evaluate(model, val_loader, criterion, DEVICE)
    
    epoch_duration = time.time() - epoch_start_time
    
    print(f"\n--- Epoch {epoch+1}/{NUM_EPOCHS} ---")
    print(f"Time: {epoch_duration:.2f}s")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")
    
    # Save the model if validation accuracy has improved
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        # Save the model's state dictionary
        torch.save(model.state_dict(), 'best_model_checkpoint.pth')
        print("✅ New best model saved!")fhm

total_training_time = time.time() - start_time
print(f"\nTraining finished in {total_training_time / 60:.2f} minutes.")
print(f"Best validation accuracy: {best_val_acc:.4f}")

Using device: cuda
✅ Model, loss function, and optimizer are ready.
➡️ Splitting data based on 'patient_id' to prevent data leakage...
Training samples: 2277 | Validation samples: 585
Number of unique patients in training set: 998
Number of unique patients in validation set: 250
➡️ Creating weighted sampler to address class imbalance...
Class counts: {'benign': 1340, 'malignant': 937}
Class weights: tensor([0.0007, 0.0011])
➡️ Creating sample weights...
✅ WeightedRandomSampler created successfully.
➡️ Creating DataLoaders...
Valid training samples: 2277
Valid validation samples: 585
Valid test samples: 704
✅ Setup complete, ready to train!
🚀 Starting model training...
