In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

# Load data
bf_april = pd.read_csv("../../oos_test_april/output/bf_results_april.csv")
sqa_april = pd.read_csv("../../oos_test_april/output/sqa_results_april.csv")
bf_std = pd.read_csv("../../oos_test_march/output/bf_results_march.csv")
sqa_std = pd.read_csv('../../oos_test_march/output/sqa_results_march.csv')

# Welch’s t-test function
def welch_ttest(df1, df2, column):
    return ttest_ind(df1[column], df2[column], equal_var=False)

# Cohen's d function for unequal sample sizes
def cohens_d(x1, x2):
    n1, n2 = len(x1), len(x2)
    s1, s2 = np.var(x1, ddof=1), np.var(x2, ddof=1)
    pooled_sd = np.sqrt(((n1 - 1)*s1 + (n2 - 1)*s2) / (n1 + n2 - 2))
    return (np.mean(x1) - np.mean(x2)) / pooled_sd

# Columns to compare
columns = ["profit_pct", "step_runtime_sec", "cycle_length"]
labels = ["Profit %", "Runtime (s)", "Cycle Length"]

# Perform Welch's t-test and Cohen's d
results = {}
for col, label in zip(columns, labels):
    results[f"April - {label}"] = {
        "Welch t-stat": welch_ttest(bf_april, sqa_april, col).statistic,
        "p-value": welch_ttest(bf_april, sqa_april, col).pvalue,
        "Cohen's d": cohens_d(bf_april[col], sqa_april[col])
    }
    results[f"Standardized - {label}"] = {
        "Welch t-stat": welch_ttest(bf_std, sqa_std, col).statistic,
        "p-value": welch_ttest(bf_std, sqa_std, col).pvalue,
        "Cohen's d": cohens_d(bf_std[col], sqa_std[col])
    }

# Display results
import pandas as pd
results_df = pd.DataFrame(results).T
print(results_df)


                             Welch t-stat        p-value  Cohen's d
April - Profit %                -6.677064   4.419881e-11  -0.382461
Standardized - Profit %         -5.052119   5.926432e-07  -0.394420
April - Runtime (s)           -108.363936   0.000000e+00  -4.804205
Standardized - Runtime (s)     -81.502437  1.863451e-227  -5.577363
April - Cycle Length           -13.659535   5.797840e-37  -0.919621
Standardized - Cycle Length     -7.254555   1.463112e-12  -0.599347
