In [2]:
# 1) Setup & Imports
import os
import pandas as pd
import numpy as np
from pathlib import Path

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 160)
print("Environment ready.")

Environment ready.


In [3]:
# 2) Paths & Helpers
BASE = Path('.')
COMBINED = BASE / 'combined_summary_old'

def load_csv(path: Path, required: bool = True) -> pd.DataFrame:
    if path.exists():
        try:
            df = pd.read_csv(path)
            print(f"Loaded: {path} -> {df.shape}")
            return df
        except Exception as e:
            print(f"Failed to load {path}: {e}")
            if required:
                raise
            return pd.DataFrame()
    else:
        msg = f"Missing: {path}"
        print(msg)
        if required:
            raise FileNotFoundError(msg)
        return pd.DataFrame()

In [None]:
# 2b) Base directory for raw results 
BASE_RESULTS = Path('7-9 January Results')

# Helpers to collect per-fold accuracies from JSON summaries
import json

def collect_json_per_seed(kfold_dir: Path, model_label: str, noise: str) -> list:
    results = []
    # file names that may exist
    candidates = ['best_per_seed.json', 'bilstm_best_per_seed.json', 'transformer_best_per_seed.json']
    for name in candidates:
        fp = kfold_dir / name
        if fp.exists():
            with open(fp, 'r') as f:
                data = json.load(f)
            # Expect list of records with keys: seed, fold, sr, val_acc
            for rec in data:
                sr = int(rec.get('sr')) if rec.get('sr') is not None else None
                acc = float(rec.get('val_acc')) if rec.get('val_acc') is not None else None
                if acc is not None and sr is not None:
                    results.append({
                        'model': model_label,
                        'noise': noise,
                        'sr': sr,
                        'val_acc': acc,
                        'seed': rec.get('seed'),
                        'fold': rec.get('fold'),
                    })
            break
    return results

# Helpers for BiLSTM-Transformer: parse history.csv per seed/fold
import csv

def collect_bt_history(kfold_sr_dir: Path, model_label: str, noise: str, sr_value: int) -> list:
    results = []
    per_seed_and_fold = kfold_sr_dir / 'per_seed_and_fold'
    if not per_seed_and_fold.exists():
        return results
    for item in per_seed_and_fold.iterdir():
        if item.is_dir():
            hist = item / 'history.csv'
            if hist.exists():
                # take best val_acc over epochs for that seed/fold
                best_acc = None
                with open(hist, 'r', newline='') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        try:
                            acc = float(row.get('val_acc'))
                            if best_acc is None or acc > best_acc:
                                best_acc = acc
                        except Exception:
                            continue
                if best_acc is not None:
                    # parse seed/fold from folder name if possible
                    name = item.name
                    seed = None
                    fold = None
                    # expected pattern like ..._seedXX_foldYY_srZZZZ
                    parts = name.split('_')
                    for i,p in enumerate(parts):
                        if p.startswith('seed'):
                            try:
                                seed = int(p.replace('seed',''))
                            except:
                                pass
                        if p.startswith('fold'):
                            try:
                                fold = int(p.replace('fold',''))
                            except:
                                pass
                    results.append({
                        'model': model_label,
                        'noise': noise,
                        'sr': sr_value,
                        'val_acc': best_acc,
                        'seed': seed,
                        'fold': fold,
                    })
    return results

In [4]:
# 3) Load Combined Summaries
model_summary = load_csv(COMBINED / '25_model_summary_grand.csv')
stats_holm = load_csv(COMBINED / '07_stats_tests_holm.csv', required=False)
class_acc_pivot = load_csv(COMBINED / '12c_class_accuracy_pivot.csv', required=False)
efficiency_agg = load_csv(COMBINED / '10_efficiency_agg.csv', required=False)
param_counts = load_csv(COMBINED / '08_param_counts.csv', required=False)
per_fold = load_csv(COMBINED / '01_per_fold_all_models.csv', required=False)

# Optional: detailed inference logs (real-time)
inference_logs = load_csv(BASE / 'inference_log_detailed.csv', required=False)

Loaded: combined_summary_old\25_model_summary_grand.csv -> (3, 7)
Loaded: combined_summary_old\07_stats_tests_holm.csv -> (12, 6)
Loaded: combined_summary_old\12c_class_accuracy_pivot.csv -> (32, 6)
Loaded: combined_summary_old\10_efficiency_agg.csv -> (8, 6)
Loaded: combined_summary_old\08_param_counts.csv -> (6, 3)
Loaded: combined_summary_old\01_per_fold_all_models.csv -> (180, 6)
Loaded: inference_log_detailed.csv -> (96, 9)


In [22]:
# 3b) Build per-fold dataset from 7-9 January Results (MFCC = 40 only)
per_fold_rows = []

# BiLSTM 40
for noise in ['Pink','White']:
    folder = BASE_RESULTS / 'Bilstm' / f'Bilstm {noise} 40' / 'kfold_outputs'
    if folder.exists():
        per_fold_rows += collect_json_per_seed(folder, 'LSTM', noise)

# Transformer 40
for noise in ['Pink','White']:
    folder = BASE_RESULTS / 'Transformer' / f'Transformer {noise} 40' / 'kfold_outputs'
    if folder.exists():
        per_fold_rows += collect_json_per_seed(folder, 'Transformer', noise)

# BiLSTM-Transformer 40
for noise in ['Pink','White']:
    for sr_tag, sr_val, sub in [('8K',8000,'kfold_outputs_sr8000'), ('16K',16000,'kfold_outputs_sr16000')]:
        folder = BASE_RESULTS / 'Bilstm-Transformer' / f'Bilstm-Transformer {sr_tag} {noise} 40' / sub
        if folder.exists():
            per_fold_rows += collect_bt_history(folder, 'BiLSTM+Transformer', noise, sr_val)

per_fold = pd.DataFrame(per_fold_rows)
print("Built per_fold from raw results:", per_fold.shape)
print(per_fold.head())

Built per_fold from raw results: (84, 6)
  model noise     sr   val_acc  seed  fold
0  LSTM  Pink   8000  0.966820    36     1
1  LSTM  Pink   8000  0.965177    38     4
2  LSTM  Pink   8000  0.963535    42     2
3  LSTM  Pink  16000  0.966820    36     3
4  LSTM  Pink  16000  0.965834    38     2


In [23]:
# 4) Overall Performance (Tabel 1) — recomputed from per_fold (MFCC 40)
if not per_fold.empty:
    g = per_fold.groupby('model')['val_acc'].agg(['mean','std','min','max']).reset_index()
    g = g.rename(columns={'model':'Model','mean':'Grand_MeanAcc_%','std':'Std_Across_Noise_SR_%','min':'Min_Accuracy_%','max':'Max_Accuracy_%'})
    # convert to %
    for c in ['Grand_MeanAcc_%','Std_Across_Noise_SR_%','Min_Accuracy_%','Max_Accuracy_%']:
        g[c] = g[c] * 100
    tbl1 = g
else:
    tbl1 = pd.DataFrame()

print("\nTabel 1 — Perbandingan Kinerja Model Secara Keseluruhan (Akurasi, MFCC=40)")
print(tbl1)


Tabel 1 — Perbandingan Kinerja Model Secara Keseluruhan (Akurasi, MFCC=40)
                Model  Grand_MeanAcc_%  Std_Across_Noise_SR_%  Min_Accuracy_%  Max_Accuracy_%
0  BiLSTM+Transformer        97.569934               0.261892       97.076216       98.061761
1                LSTM        96.528690               0.257660       96.024967       96.977661
2         Transformer        95.551220               0.298255       94.972067       95.992116


In [None]:
# 5) Statistical Significance (Tabel 2) — recomputed from per_fold (MFCC 40)
from scipy import stats

def holm_bonferroni(pvals):
    m = len(pvals)
    ranked = sorted(enumerate(pvals), key=lambda x: x[1])
    adjusted = [None]*m
    for i,(idx,p) in enumerate(ranked, start=1):
        adjusted[idx] = min(p * (m - i + 1), 1.0)
    return adjusted

rows = []
if not per_fold.empty:
    for sr in sorted(per_fold['sr'].unique()):
        for (a,b) in [('BiLSTM+Transformer','BiLSTM'), ('BiLSTM+Transformer','Transformer'), ('BiLSTM','Transformer')]:
            da = per_fold[(per_fold['sr']==sr) & (per_fold['model']==a)]['val_acc'].values
            db = per_fold[(per_fold['sr']==sr) & (per_fold['model']==b)]['val_acc'].values
            if len(da)>1 and len(db)>1:
                # two-sample t-test (independent)
                t_p = stats.ttest_ind(da, db, equal_var=False).pvalue
                # Wilcoxon (paired not appropriate; use Mann-Whitney U)
                w_p = stats.mannwhitneyu(da, db, alternative='two-sided').pvalue
                rows.append({'sr': sr, 'model_a': a, 'model_b': b, 'test': 'ttest_p', 'p_raw': t_p})
                rows.append({'sr': sr, 'model_a': a, 'model_b': b, 'test': 'wilcoxon_p', 'p_raw': w_p})

    # Holm-Bonferroni correction per SR
    df_stats = pd.DataFrame(rows)
    tbl2_parts = []
    for sr in sorted(df_stats['sr'].unique()):
        df_sr = df_stats[df_stats['sr']==sr].copy()
        adjusted = holm_bonferroni(df_sr['p_raw'].tolist())
        df_sr['p_holm'] = adjusted
        tbl2_parts.append(df_sr)
    tbl2 = pd.concat(tbl2_parts, ignore_index=True)
else:
    tbl2 = pd.DataFrame()

print("\nTabel 2 — Uji Signifikansi Statistik (Holm-Bonferroni, MFCC=40)")
print(tbl2)


Tabel 2 — Uji Signifikansi Statistik (Holm-Bonferroni, MFCC=40)
       sr             model_a      model_b        test     p_raw    p_holm
0    8000  BiLSTM+Transformer         LSTM     ttest_p  0.000004  0.000021
1    8000  BiLSTM+Transformer         LSTM  wilcoxon_p  0.000144  0.000432
2    8000  BiLSTM+Transformer  Transformer     ttest_p  0.000002  0.000014
3    8000  BiLSTM+Transformer  Transformer  wilcoxon_p  0.000144  0.000576
4    8000                LSTM  Transformer     ttest_p  0.000198  0.000396
5    8000                LSTM  Transformer  wilcoxon_p  0.004998  0.004998
6   16000  BiLSTM+Transformer         LSTM     ttest_p  0.000022  0.000109
7   16000  BiLSTM+Transformer         LSTM  wilcoxon_p  0.000144  0.000431
8   16000  BiLSTM+Transformer  Transformer     ttest_p  0.000002  0.000013
9   16000  BiLSTM+Transformer  Transformer  wilcoxon_p  0.000144  0.000287
10  16000                LSTM  Transformer     ttest_p  0.000059  0.000236
11  16000                LSTM  Tran

In [25]:
# 6) Robustness Under Noise (Tabel 3) — recomputed from per_fold (MFCC 40)
if not per_fold.empty:
    g = per_fold.groupby(['model','noise','sr'])['val_acc']
    tbl3 = g.agg(['mean','std','min','max','count']).reset_index()
else:
    tbl3 = pd.DataFrame()

print("\nTabel 3 — Analisis Ketahanan terhadap Kebisingan (MFCC 40)")
print(tbl3)


Tabel 3 — Analisis Ketahanan terhadap Kebisingan (MFCC 40)
                 model  noise     sr      mean       std       min       max  count
0   BiLSTM+Transformer   Pink   8000  0.975513  0.002832  0.971091  0.979304     15
1   BiLSTM+Transformer   Pink  16000  0.976345  0.002643  0.972405  0.980618     15
2   BiLSTM+Transformer  White   8000  0.974287  0.002433  0.970762  0.978640     15
3   BiLSTM+Transformer  White  16000  0.976652  0.002094  0.972733  0.980289     15
4                 LSTM   Pink   8000  0.965177  0.001643  0.963535  0.966820      3
5                 LSTM   Pink  16000  0.967477  0.002052  0.965834  0.969777      3
6                 LSTM  White   8000  0.962659  0.002187  0.960250  0.964520      3
7                 LSTM  White  16000  0.965834  0.002608  0.962878  0.967806      3
8          Transformer   Pink   8000  0.955869  0.002466  0.953022  0.957293      3
9          Transformer   Pink  16000  0.955541  0.003619  0.951380  0.957950      3
10         Trans

In [26]:
# 7) Sampling Rate Effects (Tabel 4) — recomputed from per_fold (MFCC 40)
if not per_fold.empty:
    sr_mean = per_fold.groupby(['model','sr'], as_index=False)['val_acc'].mean()
    piv = sr_mean.pivot(index='model', columns='sr', values='val_acc').reset_index()
    piv.columns = ['Model'] + [f"{int(c)}Hz" if str(c).isdigit() else str(c) for c in piv.columns[1:]]
    if 'BiLSTM+Transformer' in piv['Model'].values:
        h = piv[piv['Model']=='BiLSTM+Transformer'].iloc[0]
        for c in piv.columns[1:]:
            piv[f"Gap_{c}_vs_Hybrid"] = piv[c] - h[c]
    tbl4 = piv
else:
    tbl4 = pd.DataFrame()

print("\nTabel 4 — Dampak Sampling Rate dan Selisih Akurasi (MFCC 40)")
print(tbl4)


Tabel 4 — Dampak Sampling Rate dan Selisih Akurasi (MFCC 40)
                Model    8000Hz   16000Hz  Gap_8000Hz_vs_Hybrid  Gap_16000Hz_vs_Hybrid
0  BiLSTM+Transformer  0.974900  0.976499              0.000000               0.000000
1                LSTM  0.963918  0.966656             -0.010982              -0.009843
2         Transformer  0.954662  0.956362             -0.020238              -0.020136


In [27]:
# 8) Training Stability (Tabel 5) — recomputed from per_fold (MFCC 40)
# Approximated cross-seed stability: compute per (model,sr,seed) mean, then std across seeds
if not per_fold.empty:
    seed_means = per_fold.groupby(['model','sr','seed'])['val_acc'].mean().reset_index()
    stab = seed_means.groupby(['model','sr'])['val_acc'].agg(['mean','std']).reset_index()
    stab = stab.rename(columns={'mean':'mean_of_seed_means','std':'sd_of_seed_means'})
    stab['stability_score'] = (1.0 / (stab['sd_of_seed_means'] + 1e-9)) * 1000
    tbl5 = stab
else:
    tbl5 = pd.DataFrame()

print("\nTabel 5 — Analisis Stabilitas Pelatihan (MFCC 40)")
print(tbl5)


Tabel 5 — Analisis Stabilitas Pelatihan (MFCC 40)
                model     sr  mean_of_seed_means  sd_of_seed_means  stability_score
0  BiLSTM+Transformer   8000            0.974900          0.000281     3.562511e+06
1  BiLSTM+Transformer  16000            0.976499          0.001385     7.221832e+05
2                LSTM   8000            0.963918          0.001904     5.252699e+05
3                LSTM  16000            0.966656          0.001731     5.778471e+05
4         Transformer   8000            0.954662          0.002966     3.371797e+05
5         Transformer  16000            0.956362          0.002546     3.927059e+05


In [None]:
# 9) Efficiency & Parameters (Tabel 6)
print("\nTabel 6 — Kompleksitas Model dan Efisiensi Pelatihan")
print("(Diabaikan untuk saat ini; sumber valid throughput/parameter tidak tersedia dari raw folder)")


Tabel 6 — Kompleksitas Model dan Efisiensi Pelatihan
                model     sr   n_params
0  BiLSTM+Transformer   8000  1687303.0
1  BiLSTM+Transformer  16000  1687303.0
2                LSTM   8000   560644.0
3                LSTM  16000   560644.0
4         Transformer   8000  1687303.0
5         Transformer  16000  1687303.0
                model     sr  mean_throughput  avg_epoch_time_sec  total_time_sec  n_folds
0  BiLSTM+Transformer   8000       302.107158           40.539121    12161.736424       30
1  BiLSTM+Transformer  16000       356.209157           34.298491    10289.547351       30
2                LSTM   8000       137.823368           88.522673    26556.801814       30
3                LSTM  16000       183.208741           66.555773    19966.732026       30
4         Transformer   8000       328.522483           37.152917      371.529170       15
5         Transformer   8000       323.397524           37.753824      377.538244       15
6         Transformer  16000 

In [16]:
# 10) Per-Class Performance
if not class_acc_pivot.empty:
    print("\nPerforma Per-Kelas — ringkasan pivot")
    print(class_acc_pivot)
else:
    print("\nPerforma Per-Kelas — data pivot tidak tersedia")


Performa Per-Kelas — ringkasan pivot
          model     sr  noise  class  n_classes  class_accuracy
0        BiLSTM   8000   Pink      0          4        0.956768
1        BiLSTM   8000   Pink      1          4        0.952907
2        BiLSTM   8000   Pink      2          4        0.957295
3        BiLSTM   8000   Pink      3          4        0.964633
4        BiLSTM   8000  White      0          4        0.956768
5        BiLSTM   8000  White      1          4        0.952907
6        BiLSTM   8000  White      2          4        0.957295
7        BiLSTM   8000  White      3          4        0.964633
8        BiLSTM  16000   Pink      0          4        0.961023
9        BiLSTM  16000   Pink      1          4        0.952469
10       BiLSTM  16000   Pink      2          4        0.961796
11       BiLSTM  16000   Pink      3          4        0.964455
12       BiLSTM  16000  White      0          4        0.961023
13       BiLSTM  16000  White      1          4        0.952469
14

In [17]:
# 11) Real-Time Integration (Tabel 7)
if not inference_logs.empty:
    cols = inference_logs.columns
    def find_col(prefixes):
        for p in prefixes:
            for c in cols:
                if p in c.lower():
                    return c
        return None
    inf = find_col(['inference','infer'])
    tr = find_col(['transport','tcp'])
    tot = find_col(['total'])
    metrics = {
        'Inference Time (ms)': float(inference_logs[inf].mean()) if inf else np.nan,
        'Transport Latency (ms)': float(inference_logs[tr].mean()) if tr else np.nan,
        'Total Response Time (ms)': float(inference_logs[tot].mean()) if tot else np.nan,
    }
    tbl7 = pd.DataFrame([metrics])
else:
    tbl7 = pd.DataFrame()

print("\nTabel 7 — Penilaian Kinerja Real-Time (Integrasi Endless Runner)")
print(tbl7)


Tabel 7 — Penilaian Kinerja Real-Time (Integrasi Endless Runner)
   Inference Time (ms)  Transport Latency (ms)  Total Response Time (ms)
0                  NaN                     NaN                       NaN


## Sub-bab: Perbandingan Koefisien MFCC dan Rekomendasi
Ringkas akurasi lintas jumlah koefisien MFCC (13, 20, 25, 30, 40), stabilitas (CV), dan skor rekomendasi untuk transparansi pemilihan (highest is best).

In [18]:
# 12) MFCC Comparison & Recommendation
mfcc_scores = load_csv(BASE / 'mfcc_recommendation_scores.csv', required=False)
mfcc_acc_pivot = load_csv(BASE / 'mfcc_accuracy_pivot.csv', required=False)
mfcc_stability = load_csv(BASE / 'mfcc_stability_analysis.csv', required=False)

print("\nSkor Rekomendasi MFCC (Highest is Best)")
print(mfcc_scores.round(4) if not mfcc_scores.empty else "(no data)")

print("\nAkurasi Rata-rata per Model × MFCC")
print(mfcc_acc_pivot.round(4) if not mfcc_acc_pivot.empty else "(no data)")

print("\nStabilitas (CV%) per Model × MFCC")
print(mfcc_stability.round(4) if not mfcc_stability.empty else "(no data)")

print("\nMetode Skor Rekomendasi:")
print("  - Accuracy: min–max normalisasi rata-rata akurasi → 0..100")
print("  - Stability: 100 − rata-rata CV% (semakin kecil CV%, skor semakin tinggi)")
print("  - Fairness: min–max normalisasi akurasi minimum → 0..100")
print("  - Final: 0.5*Accuracy + 0.3*Stability + 0.2*Fairness (weights dapat disesuaikan)")

Loaded: mfcc_recommendation_scores.csv -> (5, 8)
Loaded: mfcc_accuracy_pivot.csv -> (3, 6)
Loaded: mfcc_stability_analysis.csv -> (15, 8)

Skor Rekomendasi MFCC (Highest is Best)
   mfcc_coefficients  avg_accuracy  avg_cv_percent  min_accuracy  accuracy_score  stability_score  fairness_score  final_score
0                 40        0.9655          0.1362        0.9555         52.6816          99.8638          9.5668      58.2133
1                 25        0.9655          0.1288        0.9554         52.4709          99.8712          9.1001      58.0168
2                 20        0.9652          0.1488        0.9549         51.1795          99.8512          6.7197      56.8890
3                 30        0.9648          0.1625        0.9540         49.5460          99.8375          3.0673      55.3377
4                 13        0.9649          0.0874        0.9533         50.0350          99.9126          0.0000      54.9913

Akurasi Rata-rata per Model × MFCC
                model  

In [28]:
# 13) Save Selected Tables for Reuse (MFCC 40)
out_dir = BASE / 'bab4_outputs'
os.makedirs(out_dir, exist_ok=True)

for name, df in [
    ('tabel1_overall.csv', 'tbl1'),
    ('tabel2_holm.csv', 'tbl2'),
    ('tabel3_noise.csv', 'tbl3'),
    ('tabel4_sr.csv', 'tbl4'),
    ('tabel5_stability.csv', 'tbl5'),
]:
    if name and df in globals() and isinstance(globals()[df], pd.DataFrame) and not globals()[df].empty:
        globals()[df].to_csv(out_dir / name, index=False)
        print(f"Saved: {name}")

# Also save per_fold backing data
if not per_fold.empty:
    per_fold.to_csv(out_dir / 'per_fold_mfcc40.csv', index=False)
    print("Saved: per_fold_mfcc40.csv")

Saved: tabel1_overall.csv
Saved: tabel2_holm.csv
Saved: tabel3_noise.csv
Saved: tabel4_sr.csv
Saved: tabel5_stability.csv
Saved: per_fold_mfcc40.csv


In [11]:
# Inspect per_fold columns
print(per_fold.columns.tolist())
print(per_fold.head())

['seed', 'fold', 'sr', 'val_acc', 'model', 'noise']
   seed  fold    sr   val_acc        model noise
0    36     1  8000  0.949737  Transformer  Pink
1    36     2  8000  0.946781  Transformer  Pink
2    36     3  8000  0.949737  Transformer  Pink
3    36     4  8000  0.942181  Transformer  Pink
4    36     5  8000  0.943477  Transformer  Pink


In [7]:
# Inspect model_summary columns and head
print(model_summary.columns.tolist())
print(model_summary.head())

['Rank', 'Model', 'Grand_Mean_Accuracy_%', 'Std_Across_Noise_SR', 'Min_Accuracy_%', 'Max_Accuracy_%', 'Total_Rows']
   Rank               Model  Grand_Mean_Accuracy_%  Std_Across_Noise_SR  Min_Accuracy_%  Max_Accuracy_%  Total_Rows
0     1  BiLSTM+Transformer                  97.52                 0.04           97.49           97.56           4
1     2                LSTM                  95.89                 0.10           95.79           95.99           4
2     3         Transformer                  94.82                 0.08           94.74           94.89           4
