# by_fd_graph.ipynb — port of `by_fd_graph.R` to Python

This notebook implements a matplotlib equivalent of the R helper `by_fd_graph(obj)` used when `by_fd=True`.

## Inputs expected (mirroring our Python wrapper object)
`obj` (dict) should contain:
- `args["estimator"]`: list like ["aoss","waoss"] or ["ivwaoss"]
- `by_levels`: list of bin labels
- `quantiles`: 2 x (K+1) array-like (row0 = cdf, row1 = value thresholds)
- `switchers_df`: DataFrame with columns:
    - `N_partition_XX` (number of switchers in each bin)
    - `Med_delta_pre_XX` (median |ΔD| or |ΔZ| in each bin)
- `results_by_j`: each contains `table` and `pairs`

The function returns a matplotlib Figure (similar role to ggplot object in R).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def _table_row_for_estimator(table, pairs: int, est_index: int):
    '''
    R selects row: pairs*(i-1)+1 (1-based). In 0-based Python that's pairs*(i-1).
    We pick the FIRST row of the estimator block, which corresponds to the aggregate line.
    '''
    r = pairs * est_index
    if isinstance(table, pd.DataFrame):
        r = min(max(r, 0), len(table) - 1)
        return table.iloc[r]
    arr = np.asarray(table)
    r = min(max(r, 0), arr.shape[0] - 1)
    return arr[r, :]


def by_fd_graph(obj: dict):
    '''
    Matplotlib port of `by_fd_graph(obj)`.

    Returns
    -------
    fig : matplotlib.figure.Figure
    '''
    args = obj.get('args', {}) or {}
    ests = list(args.get('estimator') or [])
    if len(ests) == 0:
        raise ValueError("by_fd_graph: obj['args']['estimator'] is empty.")

    models = ['aoss', 'waoss', 'ivwaoss']
    est_to_i = {m: i for i, m in enumerate(models)}  # 0,1,2

    by_levels = list(obj.get('by_levels', []))
    if len(by_levels) == 0:
        raise ValueError('by_fd_graph: missing by_levels.')

    switchers_df = obj.get('switchers_df', None)
    if switchers_df is None:
        raise ValueError('by_fd_graph: missing switchers_df (needed for nswitchers/median).')

    quantiles = np.asarray(obj.get('quantiles'))
    if quantiles.ndim != 2 or quantiles.shape[0] < 2:
        raise ValueError("by_fd_graph: 'quantiles' must be a 2 x (K+1) array-like.")

    K = len(by_levels)

    rows = []
    for est in ests:
        if est not in est_to_i:
            continue
        est_index = est_to_i[est]
        for j in range(K):
            subobj = obj.get(f'results_by_{j+1}', None)
            if subobj is None:
                continue
            table = subobj.get('table', None)
            pairs = int(subobj.get('pairs', 1) or 1)
            if table is None:
                continue

            row = _table_row_for_estimator(table, pairs=pairs, est_index=est_index)

            if isinstance(row, pd.Series):
                pe = float(row.get('pe', row.iloc[0]))
                lb = float(row.get('lb', row.iloc[2] if len(row) > 2 else np.nan))
                ub = float(row.get('ub', row.iloc[3] if len(row) > 3 else np.nan))
            else:
                pe = float(row[0])
                lb = float(row[2]) if len(row) > 2 else np.nan
                ub = float(row[3]) if len(row) > 3 else np.nan

            nswitch = float(switchers_df.loc[j, 'N_partition_XX'])
            median = float(switchers_df.loc[j, 'Med_delta_pre_XX'])

            lbin = float(quantiles[1, j])
            ubin = float(quantiles[1, j + 1])
            lbin_cdf = float(quantiles[0, j] * 100.0)
            ubin_cdf = float(quantiles[0, j + 1] * 100.0)
            include = '[' if (j == 0) else '('

            rows.append({
                'model': est,
                'pe': pe, 'lb': lb, 'ub': ub,
                'nswitchers': nswitch,
                'median': median,
                'lbin': lbin, 'ubin': ubin,
                'lbin_cdf': lbin_cdf, 'ubin_cdf': ubin_cdf,
                'include': include,
                'j': j + 1,
            })

    pe_set = pd.DataFrame(rows)
    if pe_set.empty:
        raise ValueError('by_fd_graph: no rows collected (check obj structure).')

    var_gr = 'Z' if ('ivwaoss' in pe_set['model'].unique()) else 'D'

    first_est = ests[0]
    base = pe_set.loc[pe_set['model'] == first_est].sort_values('j')
    ticks = base['median'].to_list()
    labels = [
        f"{m:.2f}\n({l:.0f}%-{u:.0f}%)\n{inc}{lbv:.2f},{ubv:.2f}]\nN={n:.0f}"
        for m, l, u, inc, lbv, ubv, n in zip(
            base['median'], base['lbin_cdf'], base['ubin_cdf'], base['include'],
            base['lbin'], base['ubin'], base['nswitchers']
        )
    ]

    ncols = len(ests)
    fig, axes = plt.subplots(1, ncols, figsize=(6.5 * ncols, 5.5), sharey=True)
    if ncols == 1:
        axes = [axes]

    ymins, ymaxs = [], []
    for ax, est in zip(axes, ests):
        d = pe_set.loc[pe_set['model'] == est].sort_values('median')
        ax.errorbar(
            d['median'], d['pe'],
            yerr=[d['pe'] - d['lb'], d['ub'] - d['pe']],
            fmt='o', capsize=0
        )
        ax.plot(d['median'], d['pe'], linestyle='dashed', linewidth=0.8)
        ax.axhline(0.0, linewidth=0.8)

        ax.set_xlabel(f"|Δ{var_gr}| - {est.upper()}")
        ax.set_ylabel('')

        ax.set_xticks(ticks)
        ax.set_xticklabels(labels)
        ax.tick_params(axis='x', labelrotation=0)

        ymins.append(np.nanmin(d['lb']))
        ymaxs.append(np.nanmax(d['ub']))

    y0, y1 = float(np.nanmin(ymins)), float(np.nanmax(ymaxs))
    for ax in axes:
        ax.set_ylim(y0, y1)

    fig.tight_layout()
    return fig


if __name__ == '__main__':
    pass
