In [24]:
import pandas as pd
import numpy as np
from IPython.display import display
import os


# Load your data
csv_path = os.path.join("..", "Output", "Pixel Matrix", "Pixel_Matrix_With_Distance_54999_Nye.csv")
xlsx_path = os.path.join("..", "..", "Data", "Raw Data", "2024-04-08 Test data for overbite classification.xlsx")

df_csv = pd.read_csv(csv_path)
df_excel = pd.read_excel(xlsx_path)

# Clean filenames for merge
df_excel["Filename"] = df_excel["Filename"].str.replace(".png", "", regex=False)

# Only use filenames that exist in both files
df_excel_matched = df_excel[df_excel["Filename"].isin(df_csv["Filename"])]

# Merge metadata and predictions
df_merged = pd.merge(df_excel_matched, df_csv, on="Filename", how="left")

# Extract jaw and side
df_merged["Jaw"] = df_merged["Filename"].str.extract(r"_(upper|lower)")
df_merged["Side"] = df_merged["Filename"].str.extract(r"_(left|right)")
df_merged["Base_ID"] = df_merged["Filename"].str.extract(r"([A-Z0-9]+)")

# Flip Y and compute Y* for upper jaw
df_merged["Y_flipped"] = 1023 - df_merged["Y_Refined"]
df_merged["Y_star"] = df_merged["Y_flipped"] - df_merged["Y_vertical_translate"]

# Separate upper and lower jaw data
df_upper = df_merged[df_merged["Jaw"] == "upper"].copy()
df_lower = df_merged[df_merged["Jaw"] == "lower"].copy()

# Merge by Base_ID and Side only (left-left, right-right)
df_pair = pd.merge(
    df_upper[["Base_ID", "Side", "Y_star", "Class"]],
    df_lower[["Base_ID", "Side", "Y_Refined"]],
    on=["Base_ID", "Side"],
    suffixes=("_upper", "_lower")
)

# Calculate overbite in mm
df_pair["overbite_mm"] = (df_pair["Y_star"] - df_pair["Y_Refined"]) * 0.08

# Classify overbite
def classify_overbite(mm):
    if mm < 1:
        return "A"
    elif mm < 2:
        return "B"
    elif mm < 3:
        return "C"
    elif mm < 4:
        return "D"
    else:
        return "E"

df_pair["Predicted_Class"] = df_pair["overbite_mm"].apply(classify_overbite)

# Compare to ground truth
df_pair["Match"] = df_pair["Predicted_Class"] == df_pair["Class"]
accuracy = df_pair["Match"].mean()

# Display results
display(df_pair[["Base_ID", "Side", "Y_star", "Y_Refined", "overbite_mm", "Predicted_Class", "Class", "Match"]])
print(f"✅ Classification Accuracy (side-matched): {accuracy:.2%}")
print(f"🔎 Matched image pairs (correct side): {len(df_pair)}")

# Optional: Save to CSV
df_pair.to_csv(os.path.join("..", "Output", "Overbite Detection", "overbite_classification_results_54999_Nye.csv"), index=False)
print("💾 Results saved to overbite_classification_results_sidematched.csv")


Unnamed: 0,Base_ID,Side,Y_star,Y_Refined,overbite_mm,Predicted_Class,Class,Match
0,013FHA7K,left,390.0,369,1.68,B,B,True
1,013FHA7K,right,413.0,369,3.52,D,D,True
2,013NUWYR,left,305.0,243,4.96,E,E,True
3,013NUWYR,right,248.0,235,1.04,B,A,False
4,013NXP1H,left,368.0,357,0.88,A,A,True
...,...,...,...,...,...,...,...,...
145,LA67YYVJ,right,502.0,436,5.28,E,E,True
146,LGADV15G,left,360.0,350,0.80,A,A,True
147,LGADV15G,right,355.0,353,0.16,A,A,True
148,MNIAB8K3,left,354.0,336,1.44,B,B,True


✅ Classification Accuracy (side-matched): 96.00%
🔎 Matched image pairs (correct side): 150
💾 Results saved to overbite_classification_results_sidematched.csv


# Nye CSV filer, ny metode

In [44]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import cohen_kappa_score

# === Paths ===
test_path = os.path.join("..", "..", "Data", "Raw Data", "2025-05-08 TRANSLATE_KEY_set4.csv")  # Now using CSV
pred_path = os.path.join("..", "Output", "Pixel Matrix", "Pixel_Matrix_With_Distance_54999_Nye.csv")
summary_path = os.path.join("..", "Output", "Overbite Detection", "patient_level_summary4.csv")

# === Load data (CSV-only now) ===
df_test = pd.read_csv(test_path, delimiter=";")
df_pred = pd.read_csv(pred_path)

# === Clean filenames ===
df_test["Filename"] = df_test["Filename"].str.replace(".png", "", regex=False)
df_pred["Filename"] = df_pred["Filename"].str.replace(".png", "", regex=False)

# === Keep only rows where model made a prediction ===
df_test = df_test[df_test["Filename"].isin(df_pred["Filename"])].copy()

# === Merge predictions into test data ===
df = pd.merge(df_test, df_pred[["Filename", "X_Refined", "Y_Refined"]], on="Filename", how="left")

# === Extract metadata ===
df["Jaw"] = df["Filename"].str.extract(r"_(upper|lower)")
df["Side"] = df["Filename"].str.extract(r"_(left|right)")
df["Base_ID"] = df["Filename"].str.extract(r"([A-Z0-9]+)")

# === Compute Y_star for upper jaw ===
df["Y_flipped"] = 1023 - df["Y_Refined"]
df["Y_star"] = df["Y_flipped"] - df["Y_vertical_translate"]

# === Compute overbite per side ===
df_upper = df[df["Jaw"] == "upper"].copy()
df_lower = df[df["Jaw"] == "lower"][["Base_ID", "Side", "Y_Refined"]].rename(columns={"Y_Refined": "Y_Refined_lower"})
df_upper = pd.merge(df_upper, df_lower, on=["Base_ID", "Side"], how="left")
df_upper["overbite_mm"] = (df_upper["Y_star"] - df_upper["Y_Refined_lower"]) * 0.08

# === Classify overbite
def classify_overbite(mm):
    if pd.isna(mm):
        return ""
    if mm < 1:
        return "A"
    elif mm < 2:
        return "B"
    elif mm < 3:
        return "C"
    elif mm < 4:
        return "D"
    else:
        return "E"

df_upper["Predicted_Class"] = df_upper["overbite_mm"].apply(classify_overbite)

# === Average overbite per patient (include all, even NONE)
df_upper["Base_ID"] = df_upper["Filename"].str.extract(r"([A-Z0-9]+)")
df_avg_all = df_upper.groupby("Base_ID", as_index=False).agg({
    "overbite_mm": "mean",
    "Class_FINAL": "first"
})
df_avg_all["Predicted_Class"] = df_avg_all["overbite_mm"].apply(classify_overbite)
df_avg_all["Overbite_pixel_AVG"] = df_avg_all["overbite_mm"] / 0.08

# === Match column
df_avg_all["Match"] = df_avg_all.apply(
    lambda row: row["Predicted_Class"] == row["Class_FINAL"]
    if pd.notna(row["Class_FINAL"]) and row["Class_FINAL"] != "NONE"
    else "", axis=1
)

# === Format final patient-level summary
df_patient_summary = df_avg_all.rename(columns={
    "Base_ID": "Filename",
    "overbite_mm": "Overbite_mm_AVG",
    "Class_FINAL": "True Class"
})[["Filename", "Overbite_pixel_AVG", "Overbite_mm_AVG", "Predicted_Class", "True Class", "Match"]]

# === Add Overbite_AVG from test data for comparison
df_overbite_avg = df[df["Jaw"] == "upper"][["Base_ID", "Overbite_AVG"]].dropna().drop_duplicates(subset="Base_ID")
df_patient_summary = pd.merge(
    df_patient_summary,
    df_overbite_avg.rename(columns={"Base_ID": "Filename"}),
    on="Filename",
    how="left"
)
df_patient_summary["Overbite_pixel_DIFF"] = df_patient_summary["Overbite_pixel_AVG"] - df_patient_summary["Overbite_AVG"]

# === Reorder columns
df_patient_summary = df_patient_summary[
    ["Filename", "Overbite_pixel_AVG", "Overbite_AVG", "Overbite_pixel_DIFF",
     "Overbite_mm_AVG", "Predicted_Class", "True Class", "Match"]
]

# === Save final summary CSV
df_patient_summary.to_csv(summary_path, index=False)
print(f"Patient-level summary saved to: {summary_path}")

# === Accuracy and kappa (on evaluable cases only)
df_eval = df_patient_summary[df_patient_summary["True Class"].notna() & (df_patient_summary["True Class"] != "NONE")]
df_eval = df_eval[df_eval["Predicted_Class"] != ""]

accuracy = (df_eval["Predicted_Class"] == df_eval["True Class"]).mean()
kappa = cohen_kappa_score(
    df_eval["True Class"],
    df_eval["Predicted_Class"],
    labels=["A", "B", "C", "D", "E"],
    weights="quadratic"
)

print(f"\nToothset Classification Accuracy: {accuracy:.2%} ({df_eval['Match'].sum()} out of {len(df_eval)})")
print(f"Weighted Cohen’s Kappa: {kappa:.4f}")
print(f"Total patients evaluated: {len(df_eval)}")
print(f"Patients excluded (NONE): {df_patient_summary['True Class'].eq('NONE').sum()}")

Patient-level summary saved to: ..\Output\Overbite Detection\patient_level_summary4.csv

Toothset Classification Accuracy: 95.77% (68 out of 71)
Weighted Cohen’s Kappa: 0.9905
Total patients evaluated: 71
Patients excluded (NONE): 3


# Resultater
2025-05-08 TRANSLATE_KEY_set1.csv

| Field       | Value                 |
| ----------- | --------------------- |
| `Toothset Classification Accuracy`  | `94.37% (67 out of 71)` |
| `Weighted Cohen’s Kappa` | `0.9852`                 |
| `Total patients evaluated`  | `71` |
| `Patients excluded (NONE)` | `3`                 |

2025-05-08 TRANSLATE_KEY_set2.csv

| Field       | Value                 |
| ----------- | --------------------- |
| `Toothset Classification Accuracy`  | `95.77% (68 out of 71)` |
| `Weighted Cohen’s Kappa` | `0.9879`                 |
| `Total patients evaluated`  | `71` |
| `Patients excluded (NONE)` | `3`                 |

2025-05-08 TRANSLATE_KEY_set3.csv

| Field       | Value                 |
| ----------- | --------------------- |
| `Toothset Classification Accuracy`  | `97.22% (70 out of 72)` |
| `Weighted Cohen’s Kappa` | `0.9936`                 |
| `Total patients evaluated`  | `72` |
| `Patients excluded (NONE)` | `2`                 |


2025-05-08 TRANSLATE_KEY_set4.csv

| Field       | Value                 |
| ----------- | --------------------- |
| `Toothset Classification Accuracy`  | `95.77% (68 out of 71)` |
| `Weighted Cohen’s Kappa` | `0.9905`                 |
| `Total patients evaluated`  | `71` |
| `Patients excluded (NONE)` | `3`                 |



# Udregning

Patient = 013FHA7K

\------------------------------

Filename	013FHA7K_upper_left

Y_Refined	276

\------------------------------

Y_flipped	747

Y_vertical_translate	347

Y_target	400

Class_FINAL	C

\------------------------------

Y_flipped = 1023 - Y_Refined = 1023 - 276 = 747

Y_star = Y_flipped - Y_vertical_translate = 747 - 347 = 400


| Field       | Value                 |
| ----------- | --------------------- |
| `Filename`  | `013FHA7K_lower_left` |
| `Y_Refined` | `369`                 |

overbite_pixels = Y_star - Y_Refined_lower = 400 - 369 = 31

overbite_mm = 31 * 0.08 = 2.48


013FHA7K_upper_right.png

| Field      | Value |
| ---------- | ----- |
| Y\_Refined | `276` |

| Field                  | Value |
| ---------------------- | ----- |
| Y\_flipped             | `747` |
| Y\_vertical\_translate | `342` |


Y_flipped = 1023 - 276 = 747

Y_star = 747 - 342 = 405


013FHA7K_lower_right.png

overbite_pixels = 405 - 369 = 36

overbite_mm = 36 * 0.08 = 2.88


avg_overbite = (2.48 + 2.88) / 2 = 2.68 mm

Predicted_Class = "C"  # Because 2 mm ≤ x < 3 mm
