# Stage 4: Predicting sound type

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# ===== Standard Library =====
import os
import sys

# Add ../src to path
sys.path.append("../src")
ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
from sklearn.model_selection import KFold

from prediction_pipeline import (
    Organ,
    SingleSourceDataset,
    MixedSourceDataset,
    load_single_source_dataset,
    load_mixed_dataset,
    random_forest_feature_regressor,
    train_classifier,
    GMMClassifier,
    Model,
    run_pipeline
)

In [3]:
import numpy as np
import pandas as pd

def results_to_dataframe(results):
    feature_r2 = results["feature_r2"]
    f1 = results["f1"]
    accuracy = results["accuracy"]
    cms = results["confusion_matrices"]

    # average confusion matrix
    avg_cm = np.mean(cms, axis=0)

    df = pd.DataFrame({
        "fold": list(range(1, len(feature_r2)+1)),
        "feature_r2": feature_r2,
        "f1": f1,
        "accuracy": accuracy
    })

    # Append averages as a final row
    avg_row = {
        "fold": "avg",
        "feature_r2": np.mean(feature_r2),
        "f1": np.mean(f1),
        "accuracy": np.mean(accuracy)
    }

    df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True)

    return df, avg_cm


In [4]:
results = run_pipeline(Organ.HEART, model=Model.KNN)
df_results, avg_confusion_matrix = results_to_dataframe(results)
avg_confusion_matrix

Loaded 50 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/heart_sounds/HS.csv
Loaded 50 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/lung_sounds/LS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/heart_ref/Mix_HS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/lung_ref/Mix_LS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/mixed_ref/Mix.csv


Extracting heart_sound_type: 100%|██████████| 50/50 [00:01<00:00, 26.39it/s]


Loaded 50 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/heart_sounds/HS.csv
Loaded 50 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/lung_sounds/LS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/heart_ref/Mix_HS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/lung_ref/Mix_LS.csv
Loaded 145 entries from /Users/xanderyoon/School/Fall '25/ISYE 6416/Final Project copy/data/mixed_sounds/mixed_ref/Mix.csv


Extracting mixed dataset (heart): 100%|██████████| 145/145 [00:05<00:00, 28.95it/s]
Folds: 100%|██████████| 5/5 [00:01<00:00,  2.89it/s]


array([[0. , 0.2, 0. , 0.2, 0. , 0.6, 1.4, 0. , 0. , 0.2],
       [0.4, 0.4, 0. , 0. , 0. , 0.2, 1.8, 0. , 0. , 0.2],
       [0. , 0.2, 0. , 1.2, 0.2, 0.2, 0.6, 0. , 0.2, 0. ],
       [0.2, 0.4, 0.2, 0.2, 0.2, 0.6, 0.6, 0. , 0. , 0.2],
       [0. , 0.6, 0. , 0.6, 0. , 0.2, 1. , 0.2, 0. , 0.8],
       [0.2, 0.6, 0. , 0.6, 0. , 1. , 0.2, 0. , 0. , 0.2],
       [0.6, 0.2, 0. , 0.2, 0. , 0.4, 0.8, 0.2, 0. , 0.2],
       [0. , 0. , 0. , 1. , 0. , 0.6, 1.2, 0.2, 0. , 0. ],
       [0. , 0.4, 0. , 0.2, 0. , 1. , 0.8, 0.4, 0. , 0.4],
       [0. , 0.4, 0. , 0.2, 0.6, 0.6, 1.2, 0. , 0.2, 0. ]])

In [5]:
df_results

Unnamed: 0,fold,feature_r2,f1,accuracy
0,1,-0.040246,0.037618,0.068966
1,2,0.033894,0.0,0.0
2,3,-0.181576,0.038177,0.068966
3,4,-0.083021,0.163383,0.241379
4,5,-0.010828,0.03183,0.068966
5,avg,-0.056356,0.054202,0.089655
