In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler



In [None]:
data = pd.read_csv('data/BBAG-cross.csv')
data = data[data.country != 'Slovakia'].reset_index(drop =  True)

## Permutation test

In [None]:

SEED = 42
SCALE_RANGE = (0.05, 0.95)
np.random.seed(SEED)

def fit_logit_scaled_single(df, predictor, y, scale_range=SCALE_RANGE):
    X_raw = df[[predictor]].astype(float)

    scaler = MinMaxScaler(feature_range=scale_range)
    X_scaled = scaler.fit_transform(X_raw.values)

    X = sm.add_constant(X_scaled, has_constant="add")
    res = sm.Logit(y, X).fit(disp=0)
    return res


def permute_within_country_pvalue_single(
    df: pd.DataFrame,
    predictor: str,
    n_perm: int = 1000,
    seed: int = 42,
    ycol: str = "GAP_bin",
    country_col: str = "country",
    scale_range=SCALE_RANGE,
):
    rng = np.random.default_rng(seed)

    d = (
        df[[country_col, predictor, ycol]].dropna()
        .reset_index(drop=True)
    )

    y0 = d[ycol].astype(int).to_numpy()

    res_obs = fit_logit_scaled_single(d, predictor, y0, scale_range)
    beta_obs = float(res_obs.params[1]) 

    idx_by_country = {
        c: np.where(d[country_col].to_numpy() == c)[0]
        for c in d[country_col].unique()
    }

    betas_perm = np.empty(n_perm, dtype=float)

    for p in range(n_perm):
        y_perm = y0.copy()
        for idx in idx_by_country.values():
            y_perm[idx] = rng.permutation(y_perm[idx])

        res_p = fit_logit_scaled_single(d, predictor, y_perm, scale_range)
        betas_perm[p] = float(res_p.params[1])


    p_value = (np.sum(np.abs(betas_perm) >= abs(beta_obs)) + 1) / (n_perm + 1)

    return p_value, beta_obs, betas_perm


In [None]:
predictors = ["Mono", "One", "Two", "Three", "Total"]

rows = []
for pred in predictors:
    p_value, beta_obs, betas_perm = permute_within_country_pvalue_single(
        data,
        predictor=pred,
        n_perm=1000,
        seed=42
    )
    rows.append({
        "Predictor": pred,
        "p_value": p_value
    })

perm_results = pd.DataFrame(rows).sort_values("p_value").reset_index(drop=True)
perm_results
