In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
from dataset_class import MessidorOpenCVDataset
from preprocess_class import OpenCV_DR_Preprocessor
from transforms import light_transform, heavy_transform, test_transform
from train import train_model
from utils.class_weights import get_class_weights
from models.ResNet import get_resnet18_model  # or your model of choice


In [2]:
# 2. Set up
root_dir = '/workspace/DR_Training/MESSIDOR'
preprocessor = OpenCV_DR_Preprocessor(apply_clahe=True, apply_roi_mask=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 2
n_splits = 5

# 3. Load full dataframe
full_dataset = MessidorOpenCVDataset(
    root_dir=root_dir,
    preprocessor=preprocessor,
    light_transform=light_transform,
    heavy_transform=heavy_transform,
    minority_classes=[1]
)
df = full_dataset.data

# 4. Set up cross-validation
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
all_fold_metrics = []

In [3]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['Retinopathy grade'])):
    print(f"\n--- Fold {fold+1}/{n_splits} ---")
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

    train_dataset = MessidorOpenCVDataset(
        root_dir=root_dir,
        preprocessor=preprocessor,
        light_transform=light_transform,
        heavy_transform=heavy_transform,
        minority_classes=[1]
    )
    train_dataset.data = train_df

    val_dataset = MessidorOpenCVDataset(
        root_dir=root_dir,
        preprocessor=preprocessor,
        light_transform=test_transform,
        heavy_transform=test_transform,
        minority_classes=[1]
    )
    val_dataset.data = val_df

    # Create loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

    # Compute class weights for this fold
    class_weights = get_class_weights(train_loader.dataset, num_classes=num_classes, device=str(device))

    # Initialize your model
    model = get_resnet18_model(num_classes=num_classes)

    # Train
    trained_model = train_model(
        model, train_loader, val_loader,
        num_epochs=10,  # or your preferred number
        learning_rate=2e-4,
        device=str(device),
        class_weights=class_weights,
        model_id=f'resnet_18_fold_{fold+1}'
    )

    # After training, read the CSV and store the best epoch's metrics
    fold_log = pd.read_csv(f'logs/fold_{fold+1}.csv')
    best_row = fold_log.loc[fold_log['val_loss'].idxmin()]  # or use best val_auc, etc.
    all_fold_metrics.append(best_row)


--- Fold 1/5 ---




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 181MB/s]


Epoch 1/10, Loss: 0.2427, Val Loss: 0.4244, Val Accuracy: 0.8292, Val AUC: 0.9226
✅ Validation loss improved, saving best model.
Epoch 2/10, Loss: 0.1125, Val Loss: 0.7729, Val Accuracy: 0.7875, Val AUC: 0.9287
⚠️ No improvement in val loss for 1 epoch(s).
Epoch 3/10, Loss: 0.1717, Val Loss: 0.8498, Val Accuracy: 0.6458, Val AUC: 0.9348
⚠️ No improvement in val loss for 2 epoch(s).
Epoch 4/10, Loss: 0.1528, Val Loss: 0.4541, Val Accuracy: 0.8625, Val AUC: 0.9544
⚠️ No improvement in val loss for 3 epoch(s).
Epoch 5/10, Loss: 0.1023, Val Loss: 0.3008, Val Accuracy: 0.8917, Val AUC: 0.9477
✅ Validation loss improved, saving best model.
Epoch 6/10, Loss: 0.1369, Val Loss: 0.4080, Val Accuracy: 0.8833, Val AUC: 0.9540
⚠️ No improvement in val loss for 1 epoch(s).
Epoch 7/10, Loss: 0.1020, Val Loss: 0.8625, Val Accuracy: 0.8000, Val AUC: 0.9529
⚠️ No improvement in val loss for 2 epoch(s).
Epoch 8/10, Loss: 0.0927, Val Loss: 0.8230, Val Accuracy: 0.8208, Val AUC: 0.9668
⚠️ No improvement in

FileNotFoundError: [Errno 2] No such file or directory: 'logs/fold_1.csv'

In [None]:
metrics_df = pd.DataFrame(all_fold_metrics)
print("\n=== Cross-Validation Results ===")
print(metrics_df.describe())  # mean, std, etc. for each metric

# Optionally, print just the mean for each metric
print("\nMean metrics across folds:")
print(metrics_df.mean())

In [5]:
import torch

torch.save(trained_model.state_dict(), "saved_models/resnet_binary_lr2e4_messidor.pth")
print("Model saved successfully!")

RuntimeError: Parent directory saved_models does not exist.