In [1]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score, f1_score, precision_recall_curve
import numpy as np
from torchvision.models import densenet121, DenseNet121_Weights, efficientnet_b2, EfficientNet_B2_Weights, efficientnet_b3, EfficientNet_B3_Weights
import matplotlib.pyplot as plt
import glob, json
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CONFIG = {
    'batch_size': 8,
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'data_dir': './processed_images/data',
    'models_dir': './Final Models',
    'seed': 42
}
disease_list = [
    'Atelectasis','Cardiomegaly','Consolidation','Edema','Effusion',
    'Emphysema','Fibrosis','Hernia','Infiltration','Mass',
    'Nodule','Pleural_Thickening','Pneumonia','Pneumothorax'
]
print('Device:', CONFIG['device'])
print('Models dir:', CONFIG['models_dir'])

Device: cuda
Models dir: ./Final Models


In [3]:
transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
def get_label_vector(labels_str):
    labels = labels_str.split('|')
    if labels == ['No Finding']:
        return [0]*len(disease_list)
    return [1 if d in labels else 0 for d in disease_list]
class CheXNetDataset(Dataset):
    def __init__(self, df, image_to_folder, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_to_folder = image_to_folder
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = row['Image Index']
        folder = self.image_to_folder[img]
        path = os.path.join(folder, img)
        image = Image.open(path).convert('RGB')
        if self.transform: image = self.transform(image)
        labels = torch.tensor(get_label_vector(row['Finding Labels']), dtype=torch.float)
        return image, labels

In [4]:
# Load CSV and map images
data_path = CONFIG['data_dir']
csv_file = os.path.join(data_path, 'Data_Entry_2017.csv')
df = pd.read_csv(csv_file)
image_folders = [os.path.join(data_path, f'images_{str(i).zfill(3)}','images') for i in range(1,13)]
image_to_folder = {}
for folder in image_folders:
    if os.path.exists(folder):
        for f in os.listdir(folder):
            if f.endswith('.png'): image_to_folder[f]=folder
df = df[df['Image Index'].isin(image_to_folder.keys())].reset_index(drop=True)
# split same as other notebook
unique_patients = df['Patient ID'].unique()
train_val_patients, test_patients = train_test_split(unique_patients, test_size=0.02, random_state=CONFIG['seed'])
train_patients, val_patients = train_test_split(train_val_patients, test_size=0.052, random_state=CONFIG['seed'])
test_df = df[df['Patient ID'].isin(test_patients)]
val_df = df[df['Patient ID'].isin(val_patients)]
valloader = DataLoader(CheXNetDataset(val_df, image_to_folder, transform=transform_test), batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
testloader = DataLoader(CheXNetDataset(test_df, image_to_folder, transform=transform_test), batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
print('Test size:', len(test_df),'Val size:', len(val_df))

Test size: 2299 Val size: 5974


In [5]:
def create_densenet_model():
    model = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)
    model.classifier = nn.Linear(model.classifier.in_features, 14)
    return model

def create_efficientnet_b2_model():
    model = efficientnet_b2(weights=EfficientNet_B2_Weights.IMAGENET1K_V1)
    num_features = model.classifier[1].in_features
    model.classifier = nn.Linear(num_features, 14)
    return model

def create_efficientnet_b3_model():
    model = efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)
    num_features = model.classifier[1].in_features
    model.classifier = nn.Linear(num_features, 14)
    return model

# Attention mechanism classes (moved to global scope for proper pickling)
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1
        
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class CBAM(nn.Module):
    def __init__(self, in_planes, ratio=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(in_planes, ratio)
        self.sa = SpatialAttention(kernel_size)

    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        return x

class DenseNetCBAM(nn.Module):
    def __init__(self, num_classes=14):
        super(DenseNetCBAM, self).__init__()
        # Load pretrained DenseNet121
        self.densenet = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)
        
        # Get the number of features from the last layer
        num_features = self.densenet.classifier.in_features
        
        # Remove the classifier layer
        self.densenet.classifier = nn.Identity()
        
        # Add CBAM attention after feature extraction
        self.attention = CBAM(num_features)
        
        # Add global average pooling
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        
        # Add dropout and final classifier
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Linear(num_features, num_classes)
        
    def forward(self, x):
        # Extract features using DenseNet backbone
        features = self.densenet.features(x)
        
        # Apply attention mechanism
        attended_features = self.attention(features)
        
        # Global average pooling
        pooled = self.global_pool(attended_features)
        pooled = pooled.view(pooled.size(0), -1)
        
        # Apply dropout and classifier
        output = self.dropout(pooled)
        output = self.classifier(output)
        
        return output

def create_densenet_attention_model():
    return DenseNetCBAM()


def detect_model_architecture(model_path):
    try:
        state_dict = torch.load(model_path, map_location='cpu')
        
        # Check for attention model
        attention_keys = ['attention.ca.fc1.weight', 'attention.ca.fc2.weight', 'attention.sa.conv1.weight']
        densenet_prefixed_keys = ['densenet.features.conv0.weight', 'densenet.features.norm0.weight']
        if all(key in state_dict for key in attention_keys) and any(key in state_dict for key in densenet_prefixed_keys):
            return 'densenet_attention'
        
        # Check for standard models
        densenet_keys = ['features.conv0.weight', 'features.denseblock1.denselayer1.norm1.weight']
        if all(key in state_dict for key in densenet_keys):
            return 'densenet'
        
        # For EfficientNet: use channel-based detection for accurate variant identification
        efficientnet_keys = ['features.0.0.weight', 'features.1.0.block.0.0.weight']
        if any(key in state_dict for key in efficientnet_keys):
            variant = detect_efficientnet_variant(model_path)
            if variant in ['efficientnet_b2', 'efficientnet_b3']:
                return variant
            else:
                print(f"Detected EfficientNet but unknown variant: {variant}")
                return 'efficientnet_unknown'
        
        return 'unknown'
    except Exception as e:
        print(f"Error: {e}")
        return 'unknown'

In [6]:
# Helper: flexible state_dict loader
def load_state_dict_flexible(model, checkpoint_path, device='cpu'):
    """Load checkpoint into model by matching keys and shapes where possible.
    Returns list of skipped keys and mismatched keys for debugging.
    """
    checkpoint = torch.load(checkpoint_path, map_location=device)
    if 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    else:
        state_dict = checkpoint

    model_state = model.state_dict()
    loaded_keys = []
    skipped_missing = []
    skipped_shape = []

    for k, v in state_dict.items():
        if k in model_state:
            if model_state[k].shape == v.shape:
                model_state[k] = v
                loaded_keys.append(k)
            else:
                skipped_shape.append((k, v.shape, model_state[k].shape))
        else:
            skipped_missing.append(k)

    # Load updated state dict into model
    model.load_state_dict(model_state)

    return {
        'loaded_keys': loaded_keys,
        'skipped_missing': skipped_missing,
        'skipped_shape': skipped_shape
    }

# Helper: detect exact EfficientNet variant from checkpoint
def detect_efficientnet_variant(checkpoint_path):
    """Detect EfficientNet variant (B2, B3, etc.) by inspecting first conv layer channels"""
    try:
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        if 'state_dict' in checkpoint:
            state_dict = checkpoint['state_dict']
        else:
            state_dict = checkpoint
        
        # Look for first conv layer to determine variant
        first_conv_key = 'features.0.0.weight'
        if first_conv_key in state_dict:
            out_channels = state_dict[first_conv_key].shape[0]
            
            # EfficientNet channel mappings (first conv output channels)
            variant_map = {
                32: 'efficientnet_b2',  # B2 starts with 32 channels
                40: 'efficientnet_b3',  # B3 starts with 40 channels
                48: 'efficientnet_b4',  # B4 starts with 48 channels (future support)
                # Add more variants as needed
            }
            
            if out_channels in variant_map:
                return variant_map[out_channels]
            else:
                print(f"Warning: Unknown EfficientNet variant with {out_channels} channels")
                return 'efficientnet_unknown'
        
        return 'not_efficientnet'
    except Exception as e:
        print(f"Error detecting EfficientNet variant: {e}")
        return 'error'

In [7]:
# Load models
model_paths = glob.glob(os.path.join(CONFIG['models_dir'],'*.pth'))
models=[]; model_names=[]
for p in model_paths:
    try:
        arch = detect_model_architecture(p)
        if arch=='densenet': m=create_densenet_model(); n='DenseNet121'
        elif arch=='densenet_attention': m=create_densenet_attention_model(); n='DenseNet+CBAM'
        elif arch=='efficientnet_b2': m=create_efficientnet_b2_model(); n='EfficientNet-B2'
        elif arch=='efficientnet_b3': m=create_efficientnet_b3_model(); n='EfficientNet-B3'
        else: print('Skip',os.path.basename(p)); continue
        try:
            # Try strict load first
            m.load_state_dict(torch.load(p,map_location=CONFIG['device']))
        except Exception as e:
            print('Strict load failed for',os.path.basename(p),"-> trying flexible loader. Error:",e)
            info = load_state_dict_flexible(m, p, device=CONFIG['device'])
            print('Flexible loader report:', f"loaded {len(info['loaded_keys'])} keys, skipped {len(info['skipped_missing'])} missing, {len(info['skipped_shape'])} shape-mismatches")
            if len(info['loaded_keys'])==0:
                print('No compatible keys loaded; skipping', os.path.basename(p))
                continue
        m.to(CONFIG['device']); m.eval()
        models.append(m); model_names.append(os.path.basename(p))
        print('Loaded',os.path.basename(p),n)
    except Exception as e:
        print('Fail',os.path.basename(p),e)
print('Total loaded',len(models))

Loaded seed 22 - densnet 121 - focal loss.pth DenseNet121
Loaded seed 22 - densnet 121 - ZLPR loss.pth DenseNet121
Loaded seed 22 - densnet 121 - ZLPR loss.pth DenseNet121
Loaded seed 32 - densenet 121 - ZLPR loss.pth DenseNet121
Loaded seed 32 - densenet 121 - ZLPR loss.pth DenseNet121
Loaded seed 32- densnet121 - focal loss.pth DenseNet121
Loaded seed 32- densnet121 - focal loss.pth DenseNet121
Loaded seed 42 - densnet 121 - focal loss.pth DenseNet121
Loaded seed 42 - densnet 121 - focal loss.pth DenseNet121
Loaded seed 42 - densnet 121 - ZLPR loss.pth DenseNet121
Loaded seed 42 - densnet 121 - ZLPR loss.pth DenseNet121
Loaded seed 42 - densnet121- Attention - focal loss.pth DenseNet+CBAM
Loaded seed 42 - densnet121- Attention - focal loss.pth DenseNet+CBAM
Loaded seed 42 - efficienet b3 - focal loss.pth EfficientNet-B3
Loaded seed 42 - efficienet b3 - focal loss.pth EfficientNet-B3
Loaded seed 42 - efficinet b2 - focal loss.pth EfficientNet-B2
Total loaded 9
Loaded seed 42 - efficin

In [8]:
def get_predictions(models,dataloader,device):
    preds=[]; labels=[]
    with torch.no_grad():
        for x,y in tqdm(dataloader):
            x=x.to(device)
            b_preds=[]
            for m in models:
                out=m(x); b_preds.append(torch.sigmoid(out).cpu())
            preds.append(torch.stack(b_preds))
            labels.append(y)
    preds=torch.cat(preds,dim=1)
    labels=torch.cat(labels,dim=0)
    return preds.numpy(), labels.numpy()
print('Get val preds')
val_predictions,val_labels=get_predictions(models,valloader,CONFIG['device'])
print('Get test preds')
test_predictions,test_labels=get_predictions(models,testloader,CONFIG['device'])
print('Shapes',val_predictions.shape,test_predictions.shape)

Get val preds


100%|██████████| 747/747 [04:36<00:00,  2.70it/s]
100%|██████████| 747/747 [04:36<00:00,  2.70it/s]


Get test preds


100%|██████████| 288/288 [01:46<00:00,  2.70it/s]

Shapes (9, 5974, 14) (9, 2299, 14)





In [9]:
# Simple average ensemble
def simple_average(predictions):
    # predictions: [num_models, num_samples, num_classes]
    return np.mean(predictions, axis=0)

ensemble_val = simple_average(val_predictions)
ensemble_test = simple_average(test_predictions)

def evaluate(labels,preds):
    thresholds=[]
    for i in range(preds.shape[1]):
        p,r,t=precision_recall_curve(labels[:,i],preds[:,i])
        f1s=2*(p*r)/(p+r+1e-8)
        thresholds.append(t[np.argmax(f1s)] if len(t)>0 else 0.5)
    preds_b=np.zeros_like(preds)
    for i in range(preds.shape[1]): preds_b[:,i]=(preds[:,i]>thresholds[i]).astype(int)
    aucs=[roc_auc_score(labels[:,i],preds[:,i]) for i in range(preds.shape[1])]
    f1s=[f1_score(labels[:,i],preds_b[:,i]) for i in range(preds.shape[1])]
    return {'avg_auc':np.mean(aucs),'avg_f1':np.mean(f1s),'aucs':aucs,'f1s':f1s}
val_res=evaluate(val_labels,ensemble_val)
test_res=evaluate(test_labels,ensemble_test)
print('Val AUC:',val_res['avg_auc'])
print('Test AUC:',test_res['avg_auc'])

Val AUC: 0.8800682123264689
Test AUC: 0.8558600734612247


In [10]:
# Test AUROC and F1 for each individual model (not ensemble) - use model_names for readable output
individual_test_aurocs = []
individual_test_f1s = []
individual_thresholds = []
for i in range(test_predictions.shape[0]):
    preds = test_predictions[i]  # preds shape: [num_samples, num_classes]
    aucs = []
    f1s = []
    thresholds = []
    for j in range(preds.shape[1]):
        p, r, t = precision_recall_curve(test_labels[:, j], preds[:, j])
        f1_scores = 2 * (p * r) / (p + r + 1e-8)
        thresh = float(t[np.argmax(f1_scores)]) if len(t) > 0 else 0.5
        thresholds.append(thresh)
        aucs.append(float(roc_auc_score(test_labels[:, j], preds[:, j])))
        preds_b = (preds[:, j] > thresh).astype(int)
        f1s.append(float(f1_score(test_labels[:, j], preds_b)))
    individual_test_aurocs.append(np.mean(aucs))
    individual_test_f1s.append(np.mean(f1s))
    individual_thresholds.append(thresholds)
# Build DataFrame with actual model names if available
model_display_names = model_names if len(model_names)==len(individual_test_aurocs) else [f'Model_{i+1}' for i in range(len(individual_test_aurocs))]

individual_results_df = pd.DataFrame({
    'Model': model_display_names,
    'Test_AUROC': np.round(individual_test_aurocs,4),
    'Test_F1': np.round(individual_test_f1s,4)
})
print('1. Individual Model Performance on Test Set:')
display(individual_results_df)

1. Individual Model Performance on Test Set:


Unnamed: 0,Model,Test_AUROC,Test_F1
0,seed 22 - densnet 121 - focal loss.pth,0.8475,0.3852
1,seed 22 - densnet 121 - ZLPR loss.pth,0.8468,0.3758
2,seed 32 - densenet 121 - ZLPR loss.pth,0.8479,0.3762
3,seed 32- densnet121 - focal loss.pth,0.8458,0.3679
4,seed 42 - densnet 121 - focal loss.pth,0.8514,0.3803
5,seed 42 - densnet 121 - ZLPR loss.pth,0.8462,0.3621
6,seed 42 - densnet121- Attention - focal loss.pth,0.848,0.3787
7,seed 42 - efficienet b3 - focal loss.pth,0.8117,0.3338
8,seed 42 - efficinet b2 - focal loss.pth,0.8322,0.3528


In [11]:
# Save results
out_dir='avg_ensemble_results'
os.makedirs(out_dir,exist_ok=True)
weights={'method':'simple_average','num_models':len(models)}
# Save mean F1 score as well
with open(os.path.join(out_dir,'summary.json'),'w') as f:
    json.dump({
        'weights':weights,
        'val_auc':float(val_res['avg_auc']),
        'val_f1':float(val_res['avg_f1']),
        'test_auc':float(test_res['avg_auc']),
        'test_f1':float(test_res['avg_f1'])
    },f,indent=2)
print('Saved to',out_dir)

Saved to avg_ensemble_results


In [12]:
# Per-class AUROC, F1, and thresholds after ensembling (formatted table)
perclass_auroc = val_res['aucs']
perclass_f1 = val_res['f1s']
perclass_thresholds = []
for i in range(ensemble_val.shape[1]):
    p, r, t = precision_recall_curve(val_labels[:, i], ensemble_val[:, i])
    f1_scores = 2 * (p * r) / (p + r + 1e-8)
    thresh = float(t[np.argmax(f1_scores)]) if len(t) > 0 else 0.5
    perclass_thresholds.append(thresh)
perclass_df = pd.DataFrame({
    'Disease': disease_list,
    'AUROC': np.round(perclass_auroc,4),
    'F1_Score': np.round(perclass_f1,4),
    'Threshold': np.round(perclass_thresholds,4)
})
print('Per-class performance (validation set, ensemble):')
display(perclass_df)
# Save per-class CSV for later reference
os.makedirs(out_dir, exist_ok=True)
perclass_df.to_csv(os.path.join(out_dir,'per_class_results.csv'), index=False)
print('Saved per-class results to', os.path.join(out_dir,'per_class_results.csv'))

Per-class performance (validation set, ensemble):


Unnamed: 0,Disease,AUROC,F1_Score,Threshold
0,Atelectasis,0.868,0.4641,0.3153
1,Cardiomegaly,0.9219,0.4367,0.2786
2,Consolidation,0.8336,0.3,0.2115
3,Edema,0.9113,0.259,0.2556
4,Effusion,0.8927,0.5373,0.3656
5,Emphysema,0.948,0.5705,0.2631
6,Fibrosis,0.8774,0.2478,0.2368
7,Hernia,0.9934,0.7059,0.2836
8,Infiltration,0.7369,0.4488,0.3081
9,Mass,0.9029,0.4744,0.327


Saved per-class results to avg_ensemble_results\per_class_results.csv


In [13]:
# Create comprehensive results tables and save CSVs
print('=' * 80)
print('ENSEMBLE RESULTS SUMMARY')
print('=' * 80)

# 1. Individual Model Performance Table (with real model names)
individual_results_df = pd.DataFrame({
    'Model': model_display_names,
    'Test_AUROC': np.round(individual_test_aurocs,4),
    'Test_F1': np.round(individual_test_f1s,4)
})
print('\n1. Individual Model Performance on Test Set:')
display(individual_results_df)
# Save CSV
os.makedirs(out_dir, exist_ok=True)
individual_results_df.to_csv(os.path.join(out_dir,'individual_results.csv'), index=False)
print('Saved individual model results to', os.path.join(out_dir,'individual_results.csv'))

# 2. Ensemble vs Best Individual Model Comparison
best_individual_auroc = float(np.max(individual_test_aurocs))
best_individual_f1 = float(np.max(individual_test_f1s))
ensemble_comparison_df = pd.DataFrame({
    'Metric': ['AUROC', 'F1_Score'],
    'Best_Individual': [round(best_individual_auroc,4), round(best_individual_f1,4)],
    'Simple_Average_Ensemble': [round(float(test_res['avg_auc']),4), round(float(test_res['avg_f1']),4)],
})
ensemble_comparison_df['Improvement'] = ensemble_comparison_df['Simple_Average_Ensemble'] - ensemble_comparison_df['Best_Individual']
ensemble_comparison_df['Improvement_Pct'] = (ensemble_comparison_df['Improvement'] / ensemble_comparison_df['Best_Individual'] * 100).round(2)
print('\n2. Ensemble vs Best Individual Model:')
display(ensemble_comparison_df)
ensemble_comparison_df.to_csv(os.path.join(out_dir,'ensemble_vs_individual.csv'), index=False)
print('Saved ensemble comparison to', os.path.join(out_dir,'ensemble_vs_individual.csv'))

# 3. Per-Class Performance Table (already created)
print('\n3. Per-Class Performance (Ensemble):')
display(perclass_df)
perclass_df.to_csv(os.path.join(out_dir,'per_class_results.csv'), index=False)
print('Saved per-class results to', os.path.join(out_dir,'per_class_results.csv'))

# 4. Summary Statistics
summary_stats_df = pd.DataFrame({
    'Statistic': ['Mean', 'Std', 'Min', 'Max'],
    'AUROC': [perclass_df['AUROC'].mean(), perclass_df['AUROC'].std(), perclass_df['AUROC'].min(), perclass_df['AUROC'].max()],
    'F1_Score': [perclass_df['F1_Score'].mean(), perclass_df['F1_Score'].std(), perclass_df['F1_Score'].min(), perclass_df['F1_Score'].max()]
})
summary_stats_df['AUROC'] = summary_stats_df['AUROC'].round(4)
summary_stats_df['F1_Score'] = summary_stats_df['F1_Score'].round(4)
print('\n4. Per-Class Performance Statistics:')
display(summary_stats_df)

print('\n' + '=' * 80)

ENSEMBLE RESULTS SUMMARY

1. Individual Model Performance on Test Set:


Unnamed: 0,Model,Test_AUROC,Test_F1
0,seed 22 - densnet 121 - focal loss.pth,0.8475,0.3852
1,seed 22 - densnet 121 - ZLPR loss.pth,0.8468,0.3758
2,seed 32 - densenet 121 - ZLPR loss.pth,0.8479,0.3762
3,seed 32- densnet121 - focal loss.pth,0.8458,0.3679
4,seed 42 - densnet 121 - focal loss.pth,0.8514,0.3803
5,seed 42 - densnet 121 - ZLPR loss.pth,0.8462,0.3621
6,seed 42 - densnet121- Attention - focal loss.pth,0.848,0.3787
7,seed 42 - efficienet b3 - focal loss.pth,0.8117,0.3338
8,seed 42 - efficinet b2 - focal loss.pth,0.8322,0.3528


Saved individual model results to avg_ensemble_results\individual_results.csv

2. Ensemble vs Best Individual Model:


Unnamed: 0,Metric,Best_Individual,Simple_Average_Ensemble,Improvement,Improvement_Pct
0,AUROC,0.8514,0.8559,0.0045,0.53
1,F1_Score,0.3852,0.3857,0.0005,0.13


Saved ensemble comparison to avg_ensemble_results\ensemble_vs_individual.csv

3. Per-Class Performance (Ensemble):


Unnamed: 0,Disease,AUROC,F1_Score,Threshold
0,Atelectasis,0.868,0.4641,0.3153
1,Cardiomegaly,0.9219,0.4367,0.2786
2,Consolidation,0.8336,0.3,0.2115
3,Edema,0.9113,0.259,0.2556
4,Effusion,0.8927,0.5373,0.3656
5,Emphysema,0.948,0.5705,0.2631
6,Fibrosis,0.8774,0.2478,0.2368
7,Hernia,0.9934,0.7059,0.2836
8,Infiltration,0.7369,0.4488,0.3081
9,Mass,0.9029,0.4744,0.327


Saved per-class results to avg_ensemble_results\per_class_results.csv

4. Per-Class Performance Statistics:


Unnamed: 0,Statistic,AUROC,F1_Score
0,Mean,0.8801,0.4159
1,Std,0.0657,0.1571
2,Min,0.7369,0.1037
3,Max,0.9934,0.7059



