In [11]:
import os
import sys
import numpy as np
from tqdm.notebook import tqdm
import plotly.express as px
import plotly.graph_objects as go

In [2]:
BASE_DIR = os.getcwd()

RECTIFIED_IMAGES = os.path.join(BASE_DIR, "images/augmented/")
DISPARITY_IMG_OUTPUT = os.path.join(BASE_DIR, "images/disparity_maps/")
CALIBRATION_DATA = os.path.join(BASE_DIR, "data/stereo_calibration_data.npz")
DISPARITY_OUTPUT = os.path.join(BASE_DIR, "data/")
TEST_IMAGES = os.path.join(BASE_DIR, "images/test/")

In [3]:
sys.path.append(BASE_DIR) 
from src.disparity import disparity

In [None]:
# disparities_per_label, normalised_disparities_per_label = disparity.compute_disparities_per_label(RECTIFIED_IMAGES, DISPARITY_OUTPUT, stop=None)

In [4]:
# Load the disparities from the file
disparities_per_label_file = np.load(os.path.join(DISPARITY_OUTPUT, "disparities_per_label.npz"))
disparities_per_label_file = {k: v for k, v in disparities_per_label_file.items()}
normalised_disparities_per_label_file = np.load(os.path.join(DISPARITY_OUTPUT, "normalised_disparities_per_label.npz"))
normalised_disparities_per_label_file = {k: v for k, v in normalised_disparities_per_label_file.items()}

In [5]:
colour = "#cac8c8"  # light grey
theme_dict = {
    "template": {
        "layout": {
            "font": {"family": "Arial, sans-serif", "size": 32, "color": "black"},
            "title": {"font": {"size": 32, "color": "black"}, "x": 0.0},
            "paper_bgcolor": "white",
            "plot_bgcolor": "white",
            # Make default discrete colors the same subtle grey so PX won't assign
            # a contrasting fill color. Repeat entries for multiple categories if needed.
            "colorway": [colour] * 8,
            "margin": {"l": 60, "r": 20, "t": 42, "b": 36},
            "legend": {"bgcolor": "rgba(0,0,0,0)", "bordercolor": "rgba(0,0,0,0)", "borderwidth": 0, "font": {"size": 32}},
            "xaxis": {
                "title": {"standoff": 6, "font": {"size": 32}},
                "tickfont": {"size": 32},
                "showgrid": False,
                "zeroline": False,
                "linecolor": "black",
                "linewidth": 0.6,
                "mirror": True,
                "ticks": "outside",
            },
            "yaxis": {
                "title": {"standoff": 6, "font": {"size": 32}},
                "tickfont": {"size": 32},
                "showgrid": False,
                "gridcolor": "#e9e9e9",
                "gridwidth": 0.5,
                "zeroline": False,
                "linecolor": "black",
                "linewidth": 0.6,
                "mirror": True,
                "ticks": "outside",
            },
            "uniformtext": {"mode": "hide", "minsize": 32},
            "annotationdefaults": {"font": {"size": 32, "color": "black"}},
            "hovermode": "closest",
            "barmode": "group",
        },
        "data": {
            "bar": [
                {
                    # explicit fill color (light grey) + thin black outline
                    "marker": {"color": colour, "line": {"color": "black", "width": 0.8}},
                    # text settings:
                    "textfont": {"size": 32, "color": "black"},        # outside text
                    "insidetextfont": {"size": 32, "color": "black"},  # inside text
                    # Ensure automatic placement still uses 'auto' but color is forced black
                    "textposition": "auto",
                    # optional default numeric format:
                    "texttemplate": "%{y:.0f}",
                    "opacity": 1.0,
                }
            ]
        },
    }
}


In [9]:
# Normalised (Bar Plot)
order = ["<=3", "4", "5", "6", "7", "8" ]
for label in tqdm(order, desc="Plotting", unit=" label"):
    hist_array = normalised_disparities_per_label_file[label]
    # Subtract min_occurence from all values
    if len(hist_array) == 0 or np.all(hist_array == 0):
        print(f"[{label}] no valid data, skipping.")
        continue
    
    mean_occurence = np.mean(hist_array)
    hist_array = hist_array 
    
    # Remove negative values
    hist_array[hist_array < 0] = 0

    # Only keep values that are far enough away
    min_dist = 0 # in mm
    hist_array = hist_array

    # Plot the histogram
    # x = disparity values, y = normalized fractions
    x = np.arange(hist_array.shape[0])
    y = hist_array

    # Stats
    nonzero = y > 0
    min_val = x[nonzero].min()
    max_val = x[nonzero].max()
    mean = (x * y).sum() / y.sum()
    var  = ((x - mean)**2 * y).sum() / y.sum()
    std  = np.sqrt(var)

    fig = px.bar(
        x=x+min_dist, y=y,
        labels={'x': 'Disparity Value', 'y': '#Pixel with Disparity Value'},
        # title=f"Disparity Histogram for “{label}”"
    )
    fig.update_layout(yaxis=dict(range=[0, 18_000]))

    # put stats below the chart
    fig.add_annotation(
        text=f"Min: {min_val:.2f},  Max: {max_val:.2f},\n "
                f"&mu;={mean:.2f}, σ={std:.2f}",
        showarrow=False,
        x=0.5, y=-0.11,
        xref='paper', yref='paper',
        font=dict(size=28),
        yshift=350
    )
    fig.update_layout(**theme_dict)  
    fig.update_traces(
        marker=dict(color=colour, line=dict(color="black", width=0.8)),
        textfont=dict(color="black"),
        insidetextfont=dict(color="black"),
        textposition="auto"
    )  
    fig.write_image(f"Disparity-Histogram-{label}.pdf")
    fig.show()
    

Plotting:   0%|          | 0/6 [00:00<?, ? label/s]

In [None]:
# colours = {"<=3":"#002c4b","4":"#da6600","5":"#0d740d","6":"#b30909","7":"#5e1f99","8":"#e68774"}
# Colorblind-safe, publication-friendly palette (Okabe–Ito subset)
colours = {
    "<=3": "#000000",   # black
    "4":   "#E69F00",   # orange
    "5":   "#56B4E9",   # light blue
    "6":   "#009E73",   # bluish green
    "7":   "#0072B2",   # deep blue
    "8":   "#D55E00",   # reddish orange
}
thin_width = 0.8

In [None]:
# Collect valid arrays and compute per-label stats
valid_labels = []
label_stats = {}   # will store (min_idx, max_idx, mean, std)
arrays = {}

for label in tqdm(order, desc="Collecting", unit="label"):
    hist_array = normalised_disparities_per_label_file.get(label, None)
    if hist_array is None or len(hist_array) == 0 or np.all(hist_array == 0):
        print(f"[{label}] no valid data, skipping.")
        continue

    hist = hist_array.copy()

    # Remove negative values (same as your code)
    hist[hist < 0] = 0

    # Keep array
    arrays[label] = hist

    # Basic stats (on un-padded data)
    x = np.arange(hist.shape[0]) + min_dist
    nonzero = hist > 0
    if nonzero.any():
        min_val = x[nonzero].min()
        max_val = x[nonzero].max()
        mean = (x * hist).sum() / hist.sum()
        var  = ((x - mean)**2 * hist).sum() / hist.sum()
        std  = np.sqrt(var)
    else:
        min_val = np.nan; max_val = np.nan; mean = np.nan; std = np.nan

    label_stats[label] = (min_val, max_val, mean, std)
    valid_labels.append(label)

if len(valid_labels) == 0:
    raise RuntimeError("No valid label arrays to plot.")

# Pad arrays so they share the same x axis
max_len = max(arr.shape[0] for arr in arrays.values())
x = np.arange(max_len) + min_dist

padded = {}
for label, arr in arrays.items():
    if arr.shape[0] < max_len:
        y = np.zeros(max_len, dtype=arr.dtype)
        y[: arr.shape[0]] = arr
    else:
        y = arr
    padded[label] = y

# Build the combined line figure
fig = go.Figure()



for i, label in enumerate(valid_labels):
    y = padded[label]

    # Use markers+lines for readability; change mode to 'lines' if you prefer plain lines
    trace_kwargs = dict(
        x=x, y=y,
        mode="lines+markers",
        name=str(label),
        hovertemplate=f"Label={label}<br>Disparity=%{{x:.0f}} mm<br>Frac=%{{y:.6f}}<extra></extra>"
    )
    if colours and label in colours:
        trace_kwargs["line"] = dict(color=colours[label], width=thin_width)
        trace_kwargs["marker"] = dict(color=colours[label])
    else:
        trace_kwargs["line"] = dict(width=thin_width)

    fig.add_trace(go.Scatter(**trace_kwargs))

# Layout tuning
fig.update_layout(
    # title="Disparity distributions (overlaid lines)",
    xaxis_title="Disparity Value",
    yaxis_title="#Pixel with Disparity Value",
    yaxis=dict(range=[0, 18_000]),   # same as your bar plots
    legend_title="Label",
    hovermode="x unified",
    **(theme_dict if "theme_dict" in globals() else {})
)

# Optional: add a small stats table/annotation in the bottom-right with per-label mean/std
# build a short multi-line string
stats_lines = []
for label in valid_labels:
    mn, mx, mean, std = label_stats[label]
    stats_lines.append(f"{label}: μ={mean:.2f}, σ={std:.2f}")
stats_text = "<br>".join(stats_lines)

fig.add_annotation(
    text=stats_text,
    showarrow=False,
    x=0.5, y=0.65,
    xref='paper', yref='paper',
    align='left',
    font=dict(size=28),
    bgcolor="rgba(255,255,255,0.7)"
)
fig.update_traces(mode="lines")

# Save & show
fig.write_image("Disparity-Distributions-Lines.pdf")   # requires kaleido/orca
fig.show()

Collecting:   0%|          | 0/6 [00:00<?, ?label/s]

### Checking Separability

In [24]:
import numpy as np
from scipy.spatial.distance import jensenshannon
from scipy.stats import entropy, wasserstein_distance
import plotly.graph_objects as go
import plotly.express as px

# ---------- USER: put your dict here ----------
# pmfs_dict = normalised_disparities_per_label_file
# min_dist = 100  # same offset you used when plotting (so bin_centers match)
# If pmfs_dict contains counts or fractions of different lengths, the code will align/pad.
# ------------------------------------------------

def prepare_pmfs(pmfs_dict, eps=1e-9):
    """Pad to same length, clip negatives, add eps, normalize -> returns pmfs dict and bin_centers."""
    labels = list(pmfs_dict.keys())
    max_len = max((len(pmfs_dict[k]) for k in labels))
    arrs = {}
    for k in labels:
        a = np.array(pmfs_dict[k], dtype=float).copy()
        a[a < 0] = 0.0
        if a.shape[0] < max_len:
            b = np.zeros(max_len, dtype=float)
            b[: a.shape[0]] = a
            a = b
        a = a + eps
        a = a / a.sum()
        arrs[k] = a
    return arrs, labels

def total_variation(p,q):
    return 0.5 * np.sum(np.abs(p - q))

def hellinger(p,q):
    return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q))**2))

def bhattacharyya_distance(p,q):
    bc = np.sum(np.sqrt(p * q))
    return -np.log(np.clip(bc, 1e-16, 1.0))

def symmetric_kl(p,q):
    # 0.5 * (KL(p||q) + KL(q||p))
    return 0.5 * (entropy(p, q) + entropy(q, p))

def jensen_shannon(p,q, base=2):
    # scipy.spatial.distance.jensenshannon returns sqrt(JS); square it for JS.
    return float(jensenshannon(p, q, base=base))**2

def pairwise_matrices(pmfs, bin_centers=None):
    labels = list(pmfs.keys())
    n = len(labels)
    names = ['JSD','SymKL','TV','Hellinger','Bhattacharyya','Wasserstein']
    mats = {name: np.zeros((n,n)) for name in names}
    for i in range(n):
        for j in range(n):
            p = pmfs[labels[i]]
            q = pmfs[labels[j]]
            mats['JSD'][i,j] = jensen_shannon(p,q)
            mats['SymKL'][i,j] = symmetric_kl(p,q)
            mats['TV'][i,j] = total_variation(p,q)
            mats['Hellinger'][i,j] = hellinger(p,q)
            mats['Bhattacharyya'][i,j] = bhattacharyya_distance(p,q)
            if bin_centers is not None:
                # wasserstein_distance accepts samples or distributions via weights
                mats['Wasserstein'][i,j] = wasserstein_distance(bin_centers, bin_centers, p, q)
            else:
                mats['Wasserstein'][i,j] = np.nan
    return labels, mats

# Classical (metric) MDS from a distance matrix D (NxN) -> 2D coords
def classical_mds(D, n_components=2):
    # D is pairwise distance matrix
    n = D.shape[0]
    # double center
    D2 = D**2
    J = np.eye(n) - np.ones((n,n))/n
    B = -0.5 * J.dot(D2).dot(J)
    # eigendecomposition
    vals, vecs = np.linalg.eigh(B)
    # sort descending
    idx = np.argsort(vals)[::-1]
    vals = vals[idx]
    vecs = vecs[:, idx]
    # keep positive eigenvalues
    pos = vals > 1e-12
    L = np.diag(np.sqrt(vals[pos][:n_components]))
    V = vecs[:, pos][:, :n_components]
    X = V.dot(L)
    # if fewer than requested components exist, pad with zeros
    if X.shape[1] < n_components:
        X = np.hstack([X, np.zeros((n, n_components - X.shape[1]))])
    return X

# ------------------- Main runner -------------------
def compute_and_plot(pmfs_dict, min_dist=100, show_metrics=['JSD','Wasserstein','Hellinger']):
    pmfs, labels = prepare_pmfs(pmfs_dict, eps=1e-9)
    max_len = len(next(iter(pmfs.values())))
    bin_centers = np.arange(max_len) + min_dist

    labels, mats = pairwise_matrices(pmfs, bin_centers=bin_centers)

    # Build a Plotly heatmap with a dropdown to select metric
    metric_list = list(mats.keys())
    # set JSD as default if available
    default_metric = 'JSD' if 'JSD' in metric_list else metric_list[0]

    z0 = mats[default_metric]
    fig_heat = go.Figure()

    # initial heatmap
    fig_heat.add_trace(go.Heatmap(
        z=z0,
        x=labels, y=labels,
        text=np.round(z0, 4),
        hovertemplate="i: %{y}<br>j: %{x}<br>val: %{z:.4f}<extra></extra>",
        colorbar=dict(title=default_metric)
    ))

    # build dropdown buttons to update heatmap z and colorbar title
    buttons = []
    for metric in metric_list:
        z = mats[metric]
        buttons.append(dict(method='update',
                            label=metric,
                            args=[{'z': [z], 'text': [np.round(z,4)]},
                                  {'coloraxis': None,
                                   'annotations': [] ,
                                   'title': f"Pairwise {metric}"}]))
    fig_heat.update_layout(
        updatemenus=[dict(active=metric_list.index(default_metric),
                          buttons=buttons,
                          x=0.01, y=1.15, xanchor='left', yanchor='top')],
        title=f"Pairwise {default_metric}",
        xaxis=dict(side='top'),
        width=700, height=700
    )

    fig_heat.show()

    # MDS scatter using the chosen distance (use Wasserstein if present else JSD)
    preferred = 'Wasserstein' if 'Wasserstein' in mats else default_metric
    D = mats[preferred]
    # if we used a divergence (JSD,KL etc) that is not metric, it's still okay for MDS but distances should be symmetric.
    coords = classical_mds(D)
    df = {
        'label': labels,
        'x': coords[:,0],
        'y': coords[:,1]
    }
    fig_mds = px.scatter(df, x='x', y='y', text='label', width=700, height=500,
                         title=f"MDS 2D embedding (based on {preferred})")
    fig_mds.update_traces(textposition='top center')
    fig_mds.show()

    # Print mean off-diagonal for each metric (quick summary numbers)
    print("Metric summary (mean off-diagonal):")
    for name, M in mats.items():
        n = M.shape[0]
        off = M[~np.eye(n, dtype=bool)]
        print(f" - {name}: mean={np.mean(off):.6g}, std={np.std(off):.6g}")

    return labels, pmfs, mats, bin_centers

labels, pmfs, mats, bin_centers = compute_and_plot(normalised_disparities_per_label_file, min_dist=0)



Metric summary (mean off-diagonal):
 - JSD: mean=0.00252601, std=0.00177139
 - SymKL: mean=0.00703754, std=0.00495084
 - TV: mean=0.041075, std=0.0171192
 - Hellinger: mean=0.0386646, std=0.0161294
 - Bhattacharyya: mean=0.00175741, std=0.00123519
 - Wasserstein: mean=2.12349, std=1.05875


In [30]:
import numpy as np

# Assuming disparity values are in range 0..max_disp (say 128 or 256)
max_disp = 128
bin_edges = np.arange(max_disp+1)  # integer binning

images_histograms = []

for label, arr in disparities_per_label_file.items():
    # arr shape: (n_samples, H, W)
    n_samples = arr.shape[0]
    for i in range(n_samples):
        img = arr[i]

        # mask invalid disparities (-1)
        valid = img[img >= 0]

        if valid.size == 0:
            continue  # skip empty images

        # build histogram
        h, _ = np.histogram(valid, bins=bin_edges, density=False)

        # normalize to PMF
        h = h.astype(float)
        h /= h.sum()

        # append as (true_label, histogram_vector)
        images_histograms.append((label, h))


In [31]:
import numpy as np
from sklearn.metrics import roc_auc_score

def avg_loglik(h, pmf, eps=1e-12):
    p = pmf + eps
    p = p / p.sum()
    return np.sum(h * np.log(p)) / (h.sum() + 1e-12)

# images_histograms: list of tuples (true_label, hist_vector) for validation/test images
# pmfs: dict label -> pmf
labels = list(pmfs.keys())

# build score matrix
y_true = []
scores = []
for true_label, h in images_histograms:
    y_true.append(true_label)
    scores.append([avg_loglik(h, pmfs[c]) for c in labels])
scores = np.array(scores)  # shape (n_images, n_classes)

# compute AUC for each class (one-vs-rest)
for k, class_label in enumerate(labels):
    y_bin = np.array([1 if y==class_label else 0 for y in y_true])
    s = scores[:, k]
    try:
        auc = roc_auc_score(y_bin, s)
    except ValueError:
        auc = np.nan
    print(class_label, "AUC:", auc)

4 AUC: 0.3782335302542069
5 AUC: 0.4762813909707668
7 AUC: 0.6315995031320968
6 AUC: 0.6277601932567686
<=3 AUC: 0.3257030517463666
8 AUC: 0.6743162393162394


In [32]:
import plotly.express as px

# compute LL_A - LL_B for validation images
ll_diffs = []
labels_list = []
for true_label, h in images_histograms:
    llA = avg_loglik(h, pmfs['8'])
    llB = avg_loglik(h, pmfs['<=3'])
    ll_diffs.append(llA - llB)
    labels_list.append(true_label)

fig = px.histogram(x=ll_diffs, color=labels_list, nbins=60,
                   labels={'x':'LL(8)-LL(<=3)', 'color':'true class'})
fig.show()

In [None]:
import numpy as np
from sklearn.metrics import roc_curve, auc
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from tqdm import tqdm

# To subsample for speed:
max_images_per_label = None   # e.g. 200 or None to use all
n_bins = 200                  # number of histogram bins (tuneable)
eps = 1e-9                    # smoothing to avoid log(0)
# ------------------------------------------------

# 1) Discover global max disparity (ignore invalid -1 values)
all_max = 0.0
for lab, arr in disparities_per_label_file.items():
    # flatten and ignore negative
    m = np.max(arr[arr >= 0]) if np.any(arr >= 0) else -1
    if m > all_max:
        all_max = float(m)
if all_max <= 0:
    raise ValueError("No valid disparity values found in data.")
bin_edges = np.linspace(0.0, all_max, n_bins + 1)

# 2) Build per-image histograms and keep image-level metadata
# We'll collect a list of tuples: (true_label, counts_vector)
images = []
for label, arr in disparities_per_label_file.items():
    n_samples = arr.shape[0]
    idxs = np.arange(n_samples)
    if max_images_per_label is not None and n_samples > max_images_per_label:
        # deterministic subsample: take evenly spaced indices
        idxs = np.linspace(0, n_samples-1, max_images_per_label, dtype=int)
    for i in idxs:
        img = arr[i]
        vals = img[img >= 0].ravel()   # valid pixels only
        if vals.size == 0:
            continue
        counts, _ = np.histogram(vals, bins=bin_edges)
        images.append((label, counts.astype(np.float64)))

if len(images) == 0:
    raise RuntimeError("No valid images (non-empty) found after preprocessing.")

labels = sorted(list(disparities_per_label_file.keys()))
# Place "<=3" first if present
if "<=3" in labels:
    labels = ["<=3"] + [l for l in labels if l != "<=3"]
label_to_index = {lab: idx for idx, lab in enumerate(labels)}
n_labels = len(labels)
nbins = n_bins

# 3) Precompute aggregated per-class counts (sum of hist counts across images)
agg_counts = {lab: np.zeros(nbins, dtype=np.float64) for lab in labels}
counts_per_class = {lab: 0 for lab in labels}
for lab, counts in images:
    agg_counts[lab] += counts
    counts_per_class[lab] += 1

# 4) For each image compute per-class average log-likelihoods (leave-one-out for the image's class)
# We'll collect: for each image -> dict class->score (avg log-likelihood)
results = []  # list of (true_label, scores_array_of_length_n_labels)
for (true_label, img_counts) in tqdm(images, desc="Scoring images"):
    total_pixels = img_counts.sum()
    if total_pixels == 0:
        # skip (shouldn't happen because we filtered earlier)
        continue

    # Prepare scores array
    scores = np.zeros(n_labels, dtype=np.float64)

    for j, cls in enumerate(labels):
        # reference counts: aggregate counts for this class, but subtract this image if same class (leave-one-out)
        ref = agg_counts[cls].copy()
        if cls == true_label:
            ref = ref - img_counts
            # numeric safety: negative due to subsampling? clip
            ref[ref < 0] = 0.0

            # If this was the only image for the class, fallback to aggregated (avoid empty denominator)
            if counts_per_class[cls] <= 1:
                ref = agg_counts[cls].copy()

        # smooth & normalise to PMF
        ref = ref + eps
        ref = ref / ref.sum()

        # compute average log-likelihood per pixel: sum_i (count_i * log p_i) / total_pixels
        # using counts (not normalized histogram) is OK because we divide by total_pixels
        ll = np.sum(img_counts * np.log(ref))
        avg_ll = ll / total_pixels
        scores[j] = avg_ll

    results.append((true_label, scores))

# 5) Build y_true and y_score vectors per class to compute ROC and AUC
auc_by_label = {}
roc_data = {}   # label -> (fpr,tpr,auc)
for k, cls in enumerate(labels):
    y_true = []
    y_score = []
    for true_label, scores in results:
        y_true.append(1 if true_label == cls else 0)
        # score is average loglik for class 'cls' (higher = more likely)
        y_score.append(float(scores[k]))
    # compute ROC & AUC (sklearn)
    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc_val = auc(fpr, tpr)
    auc_by_label[cls] = float(auc_val)
    roc_data[cls] = (fpr, tpr, auc_val)

# 6) Plot ROC curves (Plotly)
fig_roc = go.Figure()
for cls in labels:
    fpr, tpr, auc_val = roc_data[cls]
    fig_roc.add_trace(go.Scatter(
        x=fpr, y=tpr, mode='lines',
        name=f"{cls} (AUC={auc_val:.3f})",
        line=dict(color=colours.get(cls, "#666666"), width=thin_width),
        hovertemplate=f"label={cls}<br>AUC={auc_val:.3f}<br>FPR=%{{x:.3f}}, TPR=%{{y:.3f}}<extra></extra>"
    ))

# diagonal baseline
fig_roc.add_trace(go.Scatter(x=[0,1], y=[0,1], mode='lines',
                             line=dict(dash='dash', color='black'),
                             showlegend=False, hoverinfo='skip'))

fig_roc.update_layout(
                    # title="ROC curves (one-vs-rest) — histogram-based leave-one-out scores",
                      xaxis_title="False Positive Rate",
                      yaxis_title="True Positive Rate",
                      width=1_200, height=1_000,
                    **(theme_dict if "theme_dict" in globals() else {})
                      )
fig_roc.write_image(f"ROC curves (one-vs-rest) - histogram-based leave-one-out scores.pdf")
fig_roc.show()

# 7) Bar chart of AUCs (Plotly)
df_auc = pd.DataFrame(sorted(auc_by_label.items(), key=lambda kv: kv[0]), columns=["Label", "AUC"])
# Place "<=3" first if present
if "<=3" in df_auc['Label'].values:
    df_auc = pd.concat([df_auc[df_auc['Label'] == "<=3"],
                        df_auc[df_auc['Label'] != "<=3"]], ignore_index=True)

fig_bar = px.bar(df_auc, x="Label", y="AUC", text="AUC", range_y=[0,1],
                #  title="AUC per class (one-vs-rest, averaged log-likelihood scores)"
                 )
fig_bar.update_traces(texttemplate='%{text:.3f}', textposition='outside')
# add horizontal line for chance
fig_bar.add_hline(y=0.5, line_dash="dash", annotation_text="random (0.5)", annotation_position="top left")
fig_bar.update_layout(
                        # width=700, height=450,
                        **(theme_dict if "theme_dict" in globals() else {})
                        
                        )
fig_bar.update_traces(
        marker=dict(color=colour, line=dict(color="black", width=0.8)),
        textfont=dict(color="black"),
        insidetextfont=dict(color="black"),
        textposition="auto"
    ) 
fig_bar.write_image(f"AUC per class (one-vs-rest, averaged log-likelihood scores).pdf")
fig_bar.show()

# 8) Print AUC numbers for quick reference
print("AUC per class:")
for lab, a in df_auc.values:
    print(f" - {lab}: {a:.4f}")


Scoring images: 100%|██████████| 2960/2960 [00:00<00:00, 30924.82it/s]


AUC per class:
 - <=3: 0.3144
 - 4: 0.3717
 - 5: 0.4725
 - 6: 0.6337
 - 7: 0.6341
 - 8: 0.6804


In [43]:
# Define a mapping for special cases
string_to_int = {
    "<=3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8
}

# Parse the string into an integer using the mapping
parsed_int = string_to_int.get("<=3", None)
print(parsed_int)  # Output: 3

3


----

In [None]:
order = ["<=3", "4", "5", "6", "7", "8" ]
for label in tqdm(order, desc="Plotting", unit=" label"):
    hist_array = normalised_disparities_per_label_file[label]
    # Subtract min_occurence from all values
    if len(hist_array) == 0 or np.all(hist_array == 0):
        print(f"[{label}] no valid data, skipping.")
        continue
    
    mean_occurence = np.mean(hist_array)
    # Subtract the mean occurrence from all values
    hist_array = hist_array - mean_occurence
    
    # Remove negative values
    hist_array[hist_array < 0] = 0

    # Only keep values that are far enough away
    min_dist = 0 # in mm
    hist_array = hist_array[min_dist:]

    # Plot the histogram
    # x = disparity values, y = normalized fractions
    x = np.arange(hist_array.shape[0])
    y = hist_array

    # Stats
    nonzero = y > 0
    min_val = x[nonzero].min()
    max_val = x[nonzero].max()
    mean = (x * y).sum() / y.sum()
    var  = ((x - mean)**2 * y).sum() / y.sum()
    std  = np.sqrt(var)

    fig = px.bar(
        x=x+min_dist, y=y,
        labels={'x': 'Disparity Value', 'y': 'Fraction of Valid Pixels'},
        title=f"Normalised Disparity Histogram for “{label}”"
    )
    fig.update_layout(yaxis=dict(range=[0, 7_000]))

    # put stats below the chart
    fig.add_annotation(
        text=f"Min: {min_val:.2f},  Max: {max_val:.2f},  "
                f"Mean: {mean:.2f},  Std: {std:.2f}",
        showarrow=False,
        x=0.5, y=-0.11,
        xref='paper', yref='paper',
        font=dict(size=12)
    )

    fig.show()
    

In [None]:
for mask in [None]:
    print(f"Using mask: {mask}")
    disparity.disparity_depth_estimation(RECTIFIED_IMAGES, stop=10, mask=mask)

In [None]:
for mask in [None]:
    print(f"Using mask: {mask}")
    disparity.disparity_depth_estimation(TEST_IMAGES, stop=10, mask=mask)

In [None]:
for threshholds in [(60, 180), (100, 200)]:
    print(f"Using thresholds: {threshholds}")
    disparity.canny_edge_detection(RECTIFIED_IMAGES, stop=4, threshholds=threshholds)