In [None]:
import sys
sys.path.append("../")

import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

# Import from src
from src import SERIES_DIR, TRAIN_CSV
from src.bricks import Predictor
from src.models import UNet3DClassifier

# Error Analysis and Hard Negatives

This notebook uses the Predictor class to identify:
- False negatives (missed aneurysms)
- False positives (incorrect detections)
- Hard negatives for model improvement

In [None]:
# Load data
df_train = pd.read_csv(TRAIN_CSV)
print(f"Loaded {len(df_train)} series")

## Load Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = UNet3DClassifier(in_ch=1, base_ch=16).to(device)
# model.load_state_dict(torch.load("path/to/model.pth"))
model.eval()

print(f"Model loaded on {device}")

## Initialize Predictor

In [None]:
predictor = Predictor(
    model_dict={"default": model},
    cube_size=(48, 48, 48),
    stride=(24, 24, 24),
    device=device
)
print("Predictor initialized")

## Analyze Errors

Run predictions and identify false positives/negatives.

In [None]:
import os
available_series = [d for d in os.listdir(SERIES_DIR)
                   if os.path.isdir(os.path.join(SERIES_DIR, d))]

df_local = df_train[df_train["SeriesInstanceUID"].isin(available_series)].copy()

print(f"Available series: {len(df_local)}")
print(f"With aneurysm: {df_local['Aneurysm Present'].sum()}")
print(f"Without aneurysm: {(1 - df_local['Aneurysm Present']).sum()}")

In [None]:
false_negatives = []
false_positives = []
threshold = 0.5

for idx, row in tqdm(df_local.iterrows(), total=len(df_local), desc="Analyzing"):
    series_uid = row["SeriesInstanceUID"]
    series_path = os.path.join(SERIES_DIR, series_uid)
    true_label = row["Aneurysm Present"]
    
    try:
        predictions = predictor.predict_series(series_path)
        pred_prob = predictions[13]
        pred_label = 1 if pred_prob > threshold else 0
        
        if true_label == 1 and pred_label == 0:
            false_negatives.append({"series_uid": series_uid, "probability": pred_prob})
        elif true_label == 0 and pred_label == 1:
            false_positives.append({"series_uid": series_uid, "probability": pred_prob})
    except Exception as e:
        print(f"Error on {series_uid}: {e}")
        continue

print(f"\nResults:")
print(f"False negatives: {len(false_negatives)}")
print(f"False positives: {len(false_positives)}")

## Save Results

In [None]:
if false_negatives:
    fn_df = pd.DataFrame(false_negatives)
    fn_df.to_csv("false_negatives.csv", index=False)
    print(f"Saved {len(false_negatives)} false negatives")

if false_positives:
    fp_df = pd.DataFrame(false_positives)
    fp_df.to_csv("false_positives.csv", index=False)
    print(f"Saved {len(false_positives)} false positives")