In [3]:
%load_ext autoreload
%autoreload 2
from prepare.preprocessing import process_all_scans
from prepare.patch_extraction import extract_patches_neg, extract_patches_pos
from prepare.build_training_index import create_balanced_training_csv

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
'''
[LUNA_PATH]/
            annotations.csv
            candidates.csv
            
            candidates_V2/
                        candidates_V2.csv
                        
            subset0/
                        *.mhd
                        *.raw
            subset1/
                        *.mhd
                        *.raw

'''

# INPUT
LUNA_PATH = r"D:\archive"
ANNOTATION_FILE = LUNA_PATH + r"\annotations.csv"
CANDIDATES_FILE = LUNA_PATH + r"\candidates_V2\candidates_V2.csv"

# OUTPUT
OUTPUT_PATH = r"D:\fill_lung_true\output"
PREPROCESSED_OUTPUT = OUTPUT_PATH + r"\preprocessed_luna16"

PATCH_OUTPUT = r"C:\Users\azizd\python\pytorch\final_project\output\patches"

METADATA_FILE = r"C:\Users\azizd\python\pytorch\final_project\output\preprocessed_metadata.csv"

PATCH_CSV =  r"C:\Users\azizd\python\pytorch\final_project\output\patch_data.csv"
CHECKPOINT_PATH = "best_model.pt"

In [18]:
# --- Stage 1: Preprocess All Scans ---
process_all_scans(LUNA_PATH, PREPROCESSED_OUTPUT, fill_lung_structures=True)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
# --- Stage 2.a: Extract 3D Positive Patches for Training ---
extract_patches_pos(
    annotation_csv=ANNOTATION_FILE,
    metadata_csv=METADATA_FILE,
    output_folder=PATCH_OUTPUT,
    patch_size=32,
    augmentation=True
)

In [None]:
# --- Stage 2.b: Extract 3D Positive Patches for Training ---
extract_negative_patches_from_candidates(
    candidates_csv=CANDIDATES_FILE,
    annotations_csv=ANNOTATION_FILE,
    metadata_csv=METADATA_FILE,
    output_folder=PATCH_OUTPUT,
    patch_size=32,
    max_negatives_per_scan=5
)

In [12]:
# --- Stage 3: Create Patches CSV File for Training ---
create_balanced_training_csv(
     patch_folder=PATCH_OUTPUT,
     output_csv=PATCH_CSV,
     oversample_pos=False,
     downsample_neg=True
 )

Original: 2372 positive, 3696 negative
Balanced dataset saved: C:\Users\azizd\python\pytorch\final_project\output\patch_data.csv
Final counts → Positive: 2372, Negative: 2372


In [6]:
# --- Stage 4: Training ---
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split

from prepare.classes import LunaPatchDataset, Advanced3DAugment
from model.model import Luna3DCNN, run_training 

In [7]:
# --- Hyperparams ---
BATCH_SIZE = 16
NUM_EPOCHS = 20
LR = 1e-4

In [13]:
# --- Stratified Split ---
df = pd.read_csv(PATCH_CSV)

train_df, val_df = train_test_split(df, stratify=df['label'], test_size=0.2, random_state=42)

train_dataset = LunaPatchDataset(train_df.reset_index(drop=True), transform=None)
val_dataset = LunaPatchDataset(val_df.reset_index(drop=True), transform=None)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# --- Model ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Luna3DCNN().to(device)

# --- Loss & Optimizer ---
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=1e-3, epochs=NUM_EPOCHS, steps_per_epoch=len(train_loader)
)

In [14]:
# --- Train/Val Loop ---
def run_epoch(model, loader, criterion, optimizer=None):
    is_train = optimizer is not None
    model.train() if is_train else model.eval()

    losses, all_labels, all_preds = [], [], []

    for x, y in tqdm(loader, desc="Train" if is_train else "Val"):
        x = x.to(device).float()
        y = y.to(device).float().view(-1, 1)

        if is_train:
            optimizer.zero_grad()

        logits = model(x)
        # Clip here
        # logits = torch.clamp(logits, -5, 5)
        loss = criterion(logits, y)

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
            optimizer.step()
            scheduler.step()  # <- Only called during training

        probs = torch.sigmoid(logits).detach().cpu().numpy()
        all_preds.extend(probs)
        all_labels.extend(y.cpu().numpy())
        losses.append(loss.item())

    y_true = np.array(all_labels)
    y_pred = np.array(all_preds)

    best_f1, best_thresh = 0, 0.5
    for t in np.arange(0.2, 0.81, 0.05):
        y_bin = (y_pred > t).astype(int)
        f1 = f1_score(y_true, y_bin)
        if f1 > best_f1:
            best_f1, best_thresh = f1, t

    y_pred_bin = (y_pred > best_thresh).astype(int)
    auc = roc_auc_score(y_true, y_pred)
    acc = accuracy_score(y_true, y_pred_bin)
    precision = precision_score(y_true, y_pred_bin, zero_division=0)
    recall = recall_score(y_true, y_pred_bin, zero_division=0)
    f1 = f1_score(y_true, y_pred_bin, zero_division=0)

    """
    print("Predicted probabilities:", y_pred[:10].flatten())
    print("Binarized predictions:", y_pred_bin[:10].flatten())
    print("True labels:", y_true[:10].flatten())
    print("VAL label distribution:", np.bincount(y_true.astype(int).flatten()))
    """
    return np.mean(losses), acc, auc, precision, recall, f1, best_thresh

In [15]:
# --- Training ---
best_auc = 0
for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")

    train_loss, train_acc, train_auc, _, _, _, _ = run_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc, val_auc, val_prec, val_rec, val_f1, best_thresh = run_epoch(model, val_loader, criterion)

    print(f"[Train] Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | AUC: {train_auc:.4f}")
    print(f"[Val]   Loss: {val_loss:.4f} | Acc: {val_acc:.4f} | AUC: {val_auc:.4f} | F1: {val_f1:.4f} | P: {val_prec:.4f} | R: {val_rec:.4f} | T: {best_thresh:.2f}")

    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), CHECKPOINT_PATH)
        print(f"✅ Saved new best model with AUC {best_auc:.4f}")


Epoch 1/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:13<00:00, 17.43it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 26.16it/s]


[Train] Loss: 0.6946 | Acc: 0.5001 | AUC: 0.5031
[Val]   Loss: 0.7077 | Acc: 0.4995 | AUC: 0.7443 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20
✅ Saved new best model with AUC 0.7443

Epoch 2/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.66it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 51.34it/s]


[Train] Loss: 0.5993 | Acc: 0.6300 | AUC: 0.7330
[Val]   Loss: 56.4623 | Acc: 0.5005 | AUC: 0.5000 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 3/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.63it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 54.33it/s]


[Train] Loss: 0.5308 | Acc: 0.7257 | AUC: 0.7936
[Val]   Loss: 26.4377 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 4/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.56it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.86it/s]


[Train] Loss: 0.5096 | Acc: 0.7531 | AUC: 0.8039
[Val]   Loss: 858.1126 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 5/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.57it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.61it/s]


[Train] Loss: 0.4425 | Acc: 0.7929 | AUC: 0.8627
[Val]   Loss: 99.6546 | Acc: 0.5005 | AUC: 0.5000 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 6/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.46it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.88it/s]


[Train] Loss: 0.4149 | Acc: 0.8129 | AUC: 0.8810
[Val]   Loss: 274.6281 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 7/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:08<00:00, 26.65it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.78it/s]


[Train] Loss: 0.4223 | Acc: 0.8013 | AUC: 0.8819
[Val]   Loss: 134.4920 | Acc: 0.5005 | AUC: 0.5000 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 8/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 26.28it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 55.91it/s]


[Train] Loss: 0.3724 | Acc: 0.8430 | AUC: 0.9082
[Val]   Loss: 1.2754 | Acc: 0.5026 | AUC: 0.5928 | F1: 0.6676 | P: 0.5011 | R: 1.0000 | T: 0.80

Epoch 9/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 26.17it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.32it/s]


[Train] Loss: 0.3606 | Acc: 0.8432 | AUC: 0.9142
[Val]   Loss: 96.6387 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 10/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 26.05it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.28it/s]


[Train] Loss: 0.3483 | Acc: 0.8582 | AUC: 0.9231
[Val]   Loss: 24.7655 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 11/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 26.06it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.52it/s]


[Train] Loss: 0.3202 | Acc: 0.8653 | AUC: 0.9293
[Val]   Loss: 118.8609 | Acc: 0.4995 | AUC: 0.5000 | F1: 0.6662 | P: 0.4995 | R: 1.0000 | T: 0.20

Epoch 12/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.84it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.78it/s]


[Train] Loss: 0.3186 | Acc: 0.8682 | AUC: 0.9321
[Val]   Loss: 28.8939 | Acc: 0.5005 | AUC: 0.7515 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50
✅ Saved new best model with AUC 0.7515

Epoch 13/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.91it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.25it/s]


[Train] Loss: 0.3160 | Acc: 0.8735 | AUC: 0.9333
[Val]   Loss: 3.9233 | Acc: 0.5005 | AUC: 0.8075 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50
✅ Saved new best model with AUC 0.8075

Epoch 14/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.92it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.65it/s]


[Train] Loss: 0.2927 | Acc: 0.8862 | AUC: 0.9435
[Val]   Loss: 41.7573 | Acc: 0.5005 | AUC: 0.7334 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 15/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.76it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.13it/s]


[Train] Loss: 0.2783 | Acc: 0.8914 | AUC: 0.9469
[Val]   Loss: 24.3827 | Acc: 0.5005 | AUC: 0.7950 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 16/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.79it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.45it/s]


[Train] Loss: 0.2620 | Acc: 0.8964 | AUC: 0.9522
[Val]   Loss: 10.9188 | Acc: 0.5005 | AUC: 0.5239 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 17/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.70it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.18it/s]


[Train] Loss: 0.2480 | Acc: 0.9004 | AUC: 0.9561
[Val]   Loss: 5.0144 | Acc: 0.5005 | AUC: 0.7785 | F1: 0.0000 | P: 0.0000 | R: 0.0000 | T: 0.50

Epoch 18/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.74it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 57.38it/s]


[Train] Loss: 0.2396 | Acc: 0.9057 | AUC: 0.9585
[Val]   Loss: 1.7500 | Acc: 0.5205 | AUC: 0.9356 | F1: 0.0771 | P: 1.0000 | R: 0.0401 | T: 0.20
✅ Saved new best model with AUC 0.9356

Epoch 19/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.62it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.45it/s]


[Train] Loss: 0.2344 | Acc: 0.9117 | AUC: 0.9615
[Val]   Loss: 0.9793 | Acc: 0.7882 | AUC: 0.9573 | F1: 0.8194 | P: 0.7136 | R: 0.9620 | T: 0.80
✅ Saved new best model with AUC 0.9573

Epoch 20/20


Train: 100%|█████████████████████████████████████████████████████████████████████████| 238/238 [00:09<00:00, 25.56it/s]
Val: 100%|█████████████████████████████████████████████████████████████████████████████| 60/60 [00:01<00:00, 56.27it/s]

[Train] Loss: 0.2130 | Acc: 0.9204 | AUC: 0.9677
[Val]   Loss: 0.2167 | Acc: 0.9241 | AUC: 0.9642 | F1: 0.9198 | P: 0.9741 | R: 0.8713 | T: 0.60
✅ Saved new best model with AUC 0.9642



