# CallGenes Benchmark Notebook

This notebook recreates the benchmarking visualizations. Run the cells in order to refresh the plots after generating new data.

## 1. Benchmarking (run_benchmarking_all.sh)

Reads `benchmarking_results_all/summary_results.csv` and aggregates tool-level precision/recall for starts and stops.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

summary_path = Path('..') / 'benchmarking_results_all' / 'summary_results.csv'
if not summary_path.exists():
    raise FileNotFoundError(f'Missing {summary_path}. Run run_benchmarking_all.sh first.')

df = pd.read_csv(summary_path)
tool_metrics = []
for tool, group in df.groupby('Tool'):
    agg = group[['TP_Start','TP_Stop','FP_Start','FP_Stop','FN_Start','FN_Stop']].sum()
    precision_start = agg['TP_Start'] / (agg['TP_Start'] + agg['FP_Start'])
    recall_start = agg['TP_Start'] / (agg['TP_Start'] + agg['FN_Start'])
    precision_stop = agg['TP_Stop'] / (agg['TP_Stop'] + agg['FP_Stop'])
    recall_stop = agg['TP_Stop'] / (agg['TP_Stop'] + agg['FN_Stop'])
    tool_metrics.append({
        'Tool': tool,
        'TP_Start': agg['TP_Start'],
        'FP_Start': agg['FP_Start'],
        'FN_Start': agg['FN_Start'],
        'TP_Stop': agg['TP_Stop'],
        'FP_Stop': agg['FP_Stop'],
        'FN_Stop': agg['FN_Stop'],
        'Precision_Start': precision_start,
        'Recall_Start': recall_start,
        'Precision_Stop': precision_stop,
        'Recall_Stop': recall_stop
    })
tool_df = pd.DataFrame(tool_metrics).sort_values('Recall_Stop', ascending=False)
display(tool_df)

fig, axes = plt.subplots(1, 2, figsize=(14,5), sharey=False)
tool_df.plot(x='Tool', y=['Precision_Stop','Recall_Stop'], kind='bar', ax=axes[0], title='Stop precision/recall')
tool_df.plot(x='Tool', y=['Precision_Start','Recall_Start'], kind='bar', ax=axes[1], title='Start precision/recall')
for ax in axes:
    ax.set_ylim(0,1)
    ax.legend(loc='lower left')
plt.tight_layout()
plt.show()


## 2. CallGenes multiplier sweep

Aggregates metrics from `callgenes_mode_results_*` folders to compare multiplier strategies (ratio, advisory, exponential, direct_only, etc.).

In [None]:

from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

base = Path('..')
multiplier_modes = ['ratio','ratio_pow2','ratio_pow3','advisory','sigmoid','exponential','direct_only']
passes = ['single_pass','single_pass_nn','two_pass','two_pass_nn']
records = []
for mode in multiplier_modes:
    csv_path = base / f'callgenes_mode_results_{mode}' / 'summary_results_callgenes.csv'
    if not csv_path.exists():
        continue
    df_mode = pd.read_csv(csv_path)
    for pass_name in passes:
        subset = df_mode[df_mode['Mode'] == pass_name]
        if subset.empty:
            continue
        agg = subset[['TP_Start','TP_Stop','FP_Start','FP_Stop','FN_Start','FN_Stop']].sum()
        precision_start = agg['TP_Start'] / (agg['TP_Start'] + agg['FP_Start']) if (agg['TP_Start'] + agg['FP_Start']) else 0
        recall_start = agg['TP_Start'] / (agg['TP_Start'] + agg['FN_Start']) if (agg['TP_Start'] + agg['FN_Start']) else 0
        precision_stop = agg['TP_Stop'] / (agg['TP_Stop'] + agg['FP_Stop']) if (agg['TP_Stop'] + agg['FP_Stop']) else 0
        recall_stop = agg['TP_Stop'] / (agg['TP_Stop'] + agg['FN_Stop']) if (agg['TP_Stop'] + agg['FN_Stop']) else 0
        records.append({
            'mode': mode,
            'pass': pass_name,
            'TP_Start': agg['TP_Start'],
            'FP_Start': agg['FP_Start'],
            'FN_Start': agg['FN_Start'],
            'TP_Stop': agg['TP_Stop'],
            'FP_Stop': agg['FP_Stop'],
            'FN_Stop': agg['FN_Stop'],
            'precision_start': precision_start,
            'recall_start': recall_start,
            'precision_stop': precision_stop,
            'recall_stop': recall_stop
        })
metrics = pd.DataFrame(records)
display(metrics)
if not metrics.empty:
    fig, axes = plt.subplots(1, 2, figsize=(14,5), sharey=True)
    for ax, metric in zip(axes, ['recall_stop','precision_stop']):
        pivot = metrics.pivot(index='mode', columns='pass', values=metric)
        pivot.plot(kind='bar', ax=ax, title=f'{metric} by mode/pass')
        ax.set_ylabel(metric)
        ax.set_ylim(0.75, 1.0)
        ax.legend(loc='lower left', fontsize='small')
    plt.tight_layout()
    plt.show()


## 3. Score Modifier Histograms

This cell scans every `scores.csv` (produced when `log=t truegenes=...` is set) and summarizes the ratio `Modified_Score / Original_Score` by status (TP/FP) per multiplier mode.

In [None]:

from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

base = Path('..')
multiplier_modes = ['ratio','ratio_pow2','ratio_pow3','advisory','sigmoid','exponential','direct_only']
rows = []
for mode in multiplier_modes:
    mode_dir = base / f'callgenes_mode_results_{mode}'
    if not mode_dir.exists():
        continue
    for scores_path in mode_dir.rglob('scores.csv'):
        df = pd.read_csv(scores_path)
        if 'Original_Score' not in df or 'Modified_Score' not in df:
            continue
        df = df.replace({'Original_Score': {0: pd.NA}}).dropna(subset=['Original_Score'])
        df['multiplier'] = df['Modified_Score'] / df['Original_Score']
        for status, group in df.groupby('Status'):
            rows.append({
                'mode': mode,
                'scores_path': scores_path.name,
                'status': status,
                'count': len(group),
                'avg_multiplier': group['multiplier'].mean(),
                'median_multiplier': group['multiplier'].median()
            })
mult_df = pd.DataFrame(rows)
display(mult_df)
if not mult_df.empty:
    pivot = mult_df.pivot_table(index='mode', columns='status', values='avg_multiplier')
    pivot.plot(kind='bar', figsize=(10,5), title='Average multiplier by mode/status')
    plt.ylabel('Modified/Original')
    plt.show()
