In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from load_data import Data
from models import LogisticRegressor, Model, Analytics, LinearCombo
from sklearn.linear_model import LogisticRegression
import os
import pickle


In [2]:
data = Data()

pc_path = r"/media/joshua/Expansion1/tst/BaseLearnerInference"

training_images = [f"{a:03d}" for a in range(1, 33)]
all_images = [f"{a:03d}" for a in range(1, 41)]
testing_images = [f"{a:03d}" for a in range(33, 41)]

models = ["BasicPlans", "DA5_Segmentations", "LargeEncoder"]

seg_path = os.path.join(pc_path, "Probabilities")
gt_path = os.path.join(pc_path, "GroundTruths")

data.get_simple_segmentations(seg_path, all_images, ".nrrd")
data.get_groundtruths(gt_path)



## **Base Learner Performance**

In [3]:
model_segs = {model: {} for model in models}
for model in models:
    for image in training_images:
        model_segs[model][image] = data.simple_data[image][model]



In [4]:
# Get dice coefficients for each training image and model
print("Dice Coefficients by Training Image and Model:")
print("=" * 60)

for model in models:
    print(f"\n{model}:")
    model_analytics = Analytics(model_segs[model], data.gts)
    dice_scores = model_analytics.separation_distances()
    
    for image, dice in dice_scores.items():
        print(f"  Image {image}: {dice:.4f}")

# Or create a detailed DataFrame for training images
detailed_results = []
for model in models:
    model_analytics = Analytics(model_segs[model], data.gts)
    dice_scores = model_analytics.separation_distances()
    
    for image, dice in dice_scores.items():
        detailed_results.append({
            'Model': model,
            'Image': image, 
            'Dice': dice
        })

detailed_df = pd.DataFrame(detailed_results)
print("\nDetailed Training Results:")
print(detailed_df.to_string(index=False))

# Pivot table for easier comparison
pivot_df = detailed_df.pivot(index='Image', columns='Model', values='Dice')
print("\nPivot Table (Training Images vs Models):")
print(pivot_df.to_string())

Dice Coefficients by Training Image and Model:

BasicPlans:
  Image 001: 0.3379
  Image 002: 0.6080
  Image 003: 0.4917
  Image 004: 0.3967
  Image 005: 0.8895
  Image 006: 0.3586
  Image 007: 0.5854
  Image 008: 0.4396
  Image 009: 0.5667
  Image 010: 0.4604
  Image 011: 0.5568
  Image 012: 0.4326
  Image 013: 0.3394
  Image 014: 0.3762
  Image 015: 0.6884
  Image 016: 0.5745
  Image 017: 0.6979
  Image 018: 0.8891
  Image 019: 0.4462
  Image 020: 0.4814
  Image 021: 0.4555
  Image 022: 0.5795
  Image 023: 0.7454
  Image 024: 0.7168
  Image 025: 0.5840
  Image 026: 0.6880
  Image 027: 1.2401
  Image 028: 0.7404
  Image 029: 0.6770
  Image 030: 0.4822
  Image 031: 0.5823
  Image 032: 0.4727

DA5_Segmentations:
  Image 001: 0.3435
  Image 002: 0.6415
  Image 003: 0.5073
  Image 004: 0.3749
  Image 005: 0.6841
  Image 006: 0.3399
  Image 007: 0.5146
  Image 008: 0.4110
  Image 009: 0.4856
  Image 010: 0.3854
  Image 011: 0.3714
  Image 012: 0.3761
  Image 013: 0.2844
  Image 014: 0.3391


In [5]:
model_hd = {}
model_dc = {}
model_sd = {}

for model in models:
    model_analytics = Analytics(model_segs[model], data.gts)
    mean_dice = np.mean(list(model_analytics.dice_coefficients().values()))
    mean_hd = np.mean(list(model_analytics.maximum_hausdorff_distance().values()))
    model_sd[model] = np.mean(list(model_analytics.separation_distances().values()))
    model_hd[model] = mean_hd
    model_dc[model] = mean_dice



In [8]:
results_df = pd.DataFrame({
    "Model": list(model_dc.keys()),
    "Mean Dice": [model_dc[m] for m in model_dc],
    "Mean Hausdorff": [model_hd[m] for m in model_hd],
    "Mean Separation": [model_sd[m] for m in model_sd]
})

print(results_df.to_string(index=False))

            Model  Mean Dice  Mean Hausdorff  Mean Separation
       BasicPlans   0.967473        9.721207         0.580649
DA5_Segmentations   0.970390        9.057757         0.544874
     LargeEncoder   0.963385       11.351560         0.584953


## **Out Sample Performance**

In [4]:
prob_data = Data()

pc_path = r"/media/joshua/Expansion1/tst/BaseLearnerInference"

training_images = [f"{a:03d}" for a in range(1, 33)]
all_images = [f"{a:03d}" for a in range(1, 41)]
testing_images = [f"{a:03d}" for a in range(33, 41)]

models = ["BasicPlans", "DA5_Segmentations", "LargeEncoder"]

seg_path = os.path.join(pc_path, "Probabilities")
gt_path = os.path.join(pc_path, "GroundTruths")

prob_data.get_simple_segmentations(seg_path, all_images, ".npz")
prob_data.get_groundtruths(gt_path)

In [5]:
logreg = LogisticRegressor(prob_data.simple_data, prob_data.gts, models)

In [6]:
logreg.train_model(training_images, model_params = {"C": 3.1622776601683795e-5})

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,3.1622776601683795e-05
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [7]:
logreg_predictions = logreg.predict(testing_images)

In [8]:
model_segs = {model: {} for model in models}
for model in models:
    for image in testing_images:
        model_segs[model][image] = data.simple_data[image][model]



In [9]:
model_segs["Ensemble"] = logreg_predictions

In [12]:
a = Analytics(model_segs["Ensemble"], data.gts)

a.maximum_hausdorff_distance()

{'033': np.float64(2.8284271247461903),
 '034': np.float64(3.0),
 '035': np.float64(13.038404810405298),
 '036': np.float64(2.23606797749979),
 '037': np.float64(3.0),
 '038': np.float64(1.7320508075688772),
 '039': np.float64(2.8284271247461903),
 '040': np.float64(2.0)}

In [17]:
model_hd = {}
model_dc = {}
model_sd = {}

for model in model_segs.keys():
    model_analytics = Analytics(model_segs[model], data.gts)
    mean_dice = np.mean(list(model_analytics.dice_coefficients().values()))
    mean_hd = np.mean(list(model_analytics.maximum_hausdorff_distance().values()))
    model_sd[model] = np.mean(list(model_analytics.separation_distances().values()))
    print(model)
    print(model_analytics.separation_distances())
    model_hd[model] = mean_hd
    model_dc[model] = mean_dice



BasicPlans
{'033': np.float64(0.5284528385529931), '034': np.float64(0.4945121031809155), '035': np.float64(0.6662501186724713), '036': np.float64(0.45826063395033845), '037': np.float64(0.5060217302943121), '038': np.float64(0.294727329166553), '039': np.float64(0.4545599573361448), '040': np.float64(0.3192858277274792)}
DA5_Segmentations
{'033': np.float64(0.4956506934238428), '034': np.float64(0.5216558585710049), '035': np.float64(0.6963480771083121), '036': np.float64(0.5444401735297923), '037': np.float64(0.5020552074521939), '038': np.float64(0.2889709582198416), '039': np.float64(0.42955696396554244), '040': np.float64(0.33193346193411)}
LargeEncoder
{'033': np.float64(0.5532026698776714), '034': np.float64(0.4940214889704276), '035': np.float64(0.5915583778615917), '036': np.float64(0.48195333993287226), '037': np.float64(0.4861864915440528), '038': np.float64(0.3169428986759202), '039': np.float64(0.4919413517983757), '040': np.float64(0.34966415320503347)}
Ensemble
{'033': n

In [20]:
# Get dice coefficients for each training image and model
print("Dice Coefficients by Training Image and Model:")
print("=" * 60)

for model in model_segs.keys():
    print(f"\n{model}:")
    model_analytics = Analytics(model_segs[model], data.gts)
    dice_scores = model_analytics.separation_distances()
    
    for image, dice in dice_scores.items():
        print(f"  Image {image}: {dice:.4f}")

# Or create a detailed DataFrame for training images
detailed_results = []
for model in model_segs.keys():
    model_analytics = Analytics(model_segs[model], data.gts)
    dice_scores = model_analytics.separation_distances()
    
    for image, dice in dice_scores.items():
        detailed_results.append({
            'Model': model,
            'Image': image, 
            'Dice': dice
        })

detailed_df = pd.DataFrame(detailed_results)
print("\nDetailed Training Results:")
print(detailed_df.to_string(index=False))

# Pivot table for easier comparison
pivot_df = detailed_df.pivot(index='Image', columns='Model', values='Dice')
print("\nPivot Table (Training Images vs Models):")
print(pivot_df.to_string())

Dice Coefficients by Training Image and Model:

BasicPlans:
  Image 033: 0.5285
  Image 034: 0.4945
  Image 035: 0.6663
  Image 036: 0.4583
  Image 037: 0.5060
  Image 038: 0.2947
  Image 039: 0.4546
  Image 040: 0.3193

DA5_Segmentations:
  Image 033: 0.4957
  Image 034: 0.5217
  Image 035: 0.6963
  Image 036: 0.5444
  Image 037: 0.5021
  Image 038: 0.2890
  Image 039: 0.4296
  Image 040: 0.3319

LargeEncoder:
  Image 033: 0.5532
  Image 034: 0.4940
  Image 035: 0.5916
  Image 036: 0.4820
  Image 037: 0.4862
  Image 038: 0.3169
  Image 039: 0.4919
  Image 040: 0.3497

Ensemble:
  Image 033: 0.5119
  Image 034: 0.4902
  Image 035: 0.6268
  Image 036: 0.4777
  Image 037: 0.4982
  Image 038: 0.2933
  Image 039: 0.4505
  Image 040: 0.3302

Detailed Training Results:
            Model Image     Dice
       BasicPlans   033 0.528453
       BasicPlans   034 0.494512
       BasicPlans   035 0.666250
       BasicPlans   036 0.458261
       BasicPlans   037 0.506022
       BasicPlans   038 0.29

In [15]:
results_df = pd.DataFrame({
    "Model": list(model_dc.keys()),
    "Mean Dice": [model_dc[m] for m in model_dc],
    "Mean Hausdorff": [model_hd[m] for m in model_hd],
    "Mean Separation": [model_sd[m] for m in model_sd]
})

print(results_df.to_string(index=False))

            Model  Mean Dice  Mean Hausdorff  Mean Separation
       BasicPlans   0.974778        5.547775         0.465259
DA5_Segmentations   0.974412        2.876370         0.476326
     LargeEncoder   0.974332        6.008247         0.470684
         Ensemble   0.975232        3.832922         0.459817
