In [1]:
import numpy as np
import matplotlib.pyplot as plt
import eat
import pandas as pd
from models.modelsFDH import FDH

single output

In [1]:
import time
import numpy as np
import pandas as pd
import eat
from models.modelsFDH import FDH

TRIALS = 20
NS_VALUES = [10, 15, 20]
SAMPLE_SIZES = [25, 50, 75]

SCENARIOS = {
    1: {"inputs": 1, "outputs": 1,
        "func": lambda x: np.log(x) + 3,
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    2: {"inputs": 2, "outputs": 1,
        "func": lambda x1, x2: 0.1*x1 + 0.1*x2 + 0.3*((x1 * x2)**0.5),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    3: {"inputs": 3, "outputs": 1,
        "func": lambda x1, x2, x3: 0.1*x1 + 0.1*x2 + 0.1*x3 + 0.3*(x1*x2*x3)**(1/3),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    4: {"inputs": 9, "outputs": 1,
        "func": lambda x: np.prod(x**0.11, axis=1),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
}


In [2]:
def generate_data(scenario, n):
    x = np.random.uniform(1, 10, size=(n, scenario["inputs"]))
    if scenario == SCENARIOS[1]:
        y_true = scenario["func"](x[:, 0])
    elif scenario == SCENARIOS[2]:
        y_true = scenario["func"](x[:, 0], x[:, 1])
    elif scenario == SCENARIOS[3]:
        y_true = scenario["func"](x[:, 0], x[:, 1], x[:, 2])
    elif scenario == SCENARIOS[4]:
        y_true = scenario["func"](x)
    else:
        raise NotImplementedError("NOT SUPPORTED.")

    inefficiency = scenario["inefficiency"](n)
    y_obs = y_true - inefficiency

    y_obs = y_obs.reshape(n, scenario["outputs"])
    y_obs[y_obs<0] = 1e-6

    return x, y_obs, y_true

def calculate_metrics(true_values, estimates):
    mse = np.mean((estimates - true_values) ** 2)
    bias = np.mean(estimates - true_values)
    abs_bias = np.mean(np.abs(estimates - true_values))
    return mse, bias, abs_bias


In [3]:
def run_monte_carlo():

    records = []
    time_records = []

    for NS in NS_VALUES:
        start_time = time.time()

        for scenario_id, scenario in SCENARIOS.items():
            for n in SAMPLE_SIZES:

                mse_fdh_list, mse_eat_list = [], []
                bias_fdh_list, bias_eat_list = [], []
                abs_fdh_list, abs_eat_list = [], []

                for trial in range(TRIALS):
                    x, y_obs, y_true = generate_data(scenario, n)

                    print('NS', NS,"scen", scenario_id, "n", n, "Trial", trial)

                    fdh = FDH(x, y_obs)
                    df_fdh = fdh.fdh_output_vrs()
                    y_fdh = y_obs * df_fdh['efficiency'].values.reshape(-1, 1)

                    df = pd.DataFrame(x, columns=[f"x[{i}]" for i in range(scenario['inputs'])])
                    df[[f"y[{i}]" for i in range(scenario['outputs'])]] = y_obs
                    eat_model = eat.EAT(df,
                                         [f"x[{i}]" for i in range(scenario['inputs'])],
                                         [f"y[{i}]" for i in range(scenario['outputs'])],
                                         numStop=NS, fold=5)
                    eat_model.fit()
                    y_eat = eat_model.predict(df.loc[:, [f"x[{i}]" for i in range(scenario['inputs'])]],
                                              [f"x[{i}]" for i in range(scenario['inputs'])])
                    y_eat = y_eat[[f"p_y[{i}]" for i in range(scenario['outputs'])]].values

                    mse_fdh, bias_fdh, abs_fdh = calculate_metrics(y_true, y_fdh)
                    mse_eat, bias_eat, abs_eat = calculate_metrics(y_true, y_eat)

                    mse_fdh_list.append(mse_fdh)
                    bias_fdh_list.append(bias_fdh)
                    abs_fdh_list.append(abs_fdh)
                    mse_eat_list.append(mse_eat)
                    bias_eat_list.append(bias_eat)
                    abs_eat_list.append(abs_eat)

                mean_fdh_mse = np.mean(mse_fdh_list)
                mean_eat_mse = np.mean(mse_eat_list)
                mean_fdh_bias = np.mean(bias_fdh_list)
                mean_eat_bias = np.mean(bias_eat_list)
                mean_fdh_abs = np.mean(abs_fdh_list)
                mean_eat_abs = np.mean(abs_eat_list)

                ratio_mse = mean_eat_mse / mean_fdh_mse if mean_fdh_mse != 0 else np.nan
                ratio_bias = mean_eat_bias / mean_fdh_bias if mean_fdh_bias != 0 else np.nan
                ratio_abs = mean_eat_abs / mean_fdh_abs if mean_fdh_abs != 0 else np.nan

                records.append({
                    'NS': NS,
                    'Scenario': scenario_id,
                    'SampleSize': n,
                    'FDH_MSE': round(mean_fdh_mse, 4),
                    'EAT_MSE': round(mean_eat_mse, 4),
                    'Ratio_MSE': round(ratio_mse, 4),
                    'FDH_Bias': round(mean_fdh_bias, 4),
                    'EAT_Bias': round(mean_eat_bias, 4),
                    'Ratio_Bias': round(ratio_bias, 4),
                    'FDH_AbsBias': round(mean_fdh_abs, 4),
                    'EAT_AbsBias': round(mean_eat_abs, 4),
                    'Ratio_AbsBias': round(ratio_abs, 4)
                })

        elapsed = time.time() - start_time
        time_records.append({'NS': NS, 'ElapsedSec': elapsed})
        print(f"NS={NS} completed in {elapsed:.2f} seconds")

    df_details = pd.DataFrame(records)
    df_summary = df_details.groupby('NS').agg({
        'FDH_MSE':'mean','EAT_MSE':'mean','Ratio_MSE':'mean',
        'FDH_Bias':'mean','EAT_Bias':'mean','Ratio_Bias':'mean',
        'FDH_AbsBias':'mean','EAT_AbsBias':'mean','Ratio_AbsBias':'mean'
    }).reset_index()
    df_times = pd.DataFrame(time_records)

    df_details = df_details.round(4)
    df_summary = df_summary.round(4)
    df_times = df_times.round(4)

    df_details.to_csv('monte_carlo_results_detailed.csv', index=False, float_format='%.4f')
    df_summary.to_csv('monte_carlo_results_summary.csv', index=False, float_format='%.4f')
    df_times.to_csv('monte_carlo_times.csv', index=False, float_format='%.4f')

    with pd.ExcelWriter('monte_carlo_full_results.xlsx') as writer:
        df_details.to_excel(writer, sheet_name='Detailed', index=False)
        df_summary.to_excel(writer, sheet_name='Summary', index=False)
        df_times.to_excel(writer, sheet_name='Times', index=False)

    return df_details, df_summary, df_times


In [4]:
detailed_df, summary_df, times_df = run_monte_carlo()

NS 10 scen 1 n 25 Trial 0
Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2579375
Academic license 2579375 - for non-commercial use only - registered to ma___@iitd.ac.in
NS 10 scen 1 n 25 Trial 1
NS 10 scen 1 n 25 Trial 2
NS 10 scen 1 n 25 Trial 3
NS 10 scen 1 n 25 Trial 4
NS 10 scen 1 n 25 Trial 5
NS 10 scen 1 n 25 Trial 6
NS 10 scen 1 n 25 Trial 7
NS 10 scen 1 n 25 Trial 8
NS 10 scen 1 n 25 Trial 9
NS 10 scen 1 n 25 Trial 10
NS 10 scen 1 n 25 Trial 11
NS 10 scen 1 n 25 Trial 12
NS 10 scen 1 n 25 Trial 13
NS 10 scen 1 n 25 Trial 14
NS 10 scen 1 n 25 Trial 15
NS 10 scen 1 n 25 Trial 16
NS 10 scen 1 n 25 Trial 17
NS 10 scen 1 n 25 Trial 18
NS 10 scen 1 n 25 Trial 19
NS 10 scen 1 n 50 Trial 0
NS 10 scen 1 n 50 Trial 1
NS 10 scen 1 n 50 Trial 2
NS 10 scen 1 n 50 Trial 3
NS 10 scen 1 n 50 Trial 4
NS 10 scen 1 n 50 Trial 5
NS 10 scen 1 n 50 Trial 6
NS 10 scen 1 n 50 Trial 7
NS 10 scen 1 n 50 Trial 8
NS 10 scen 1 n 50 Trial 9
NS 10 scen 1 n 50 Trial 10
NS 1

In [8]:
detailed_df

Unnamed: 0,NS,Scenario,SampleSize,FDH_MSE,EAT_MSE,Ratio_MSE,FDH_Bias,EAT_Bias,Ratio_Bias,FDH_AbsBias,EAT_AbsBias,Ratio_AbsBias
0,10,1,25,0.7763,0.6889,0.8874,-0.1866,-0.0058,0.031,0.686,0.6514,0.9495
1,10,1,50,0.7489,0.6995,0.934,-0.1418,-0.0339,0.2389,0.6768,0.6608,0.9764
2,10,1,75,0.7393,0.7185,0.9719,-0.1241,-0.0485,0.3907,0.672,0.6695,0.9963
3,10,2,25,1.6536,1.809,1.094,-0.2997,0.3923,-1.3089,1.0266,1.0752,1.0473
4,10,2,50,1.7797,2.0053,1.1268,-0.2958,0.4072,-1.3768,1.0686,1.1453,1.0718
5,10,2,75,1.8531,2.3082,1.2456,-0.2794,0.3848,-1.3772,1.0956,1.2353,1.1276
6,10,3,25,1.7061,1.7083,1.0013,-0.3237,0.3737,-1.1545,1.0394,1.0318,0.9927
7,10,3,50,1.8595,2.093,1.1256,-0.3181,0.4242,-1.3334,1.0873,1.1597,1.0666
8,10,3,75,1.8312,2.2885,1.2497,-0.3114,0.6002,-1.9273,1.0863,1.2284,1.1308
9,10,4,25,1.7037,1.6288,0.9561,-0.3249,0.388,-1.1942,1.0194,0.9773,0.9587


In [9]:
times_df

Unnamed: 0,NS,ElapsedSec
0,10,482.5201
1,15,401.3482
2,20,348.6176


In [10]:
summary_df

Unnamed: 0,NS,FDH_MSE,EAT_MSE,Ratio_MSE,FDH_Bias,EAT_Bias,Ratio_Bias,FDH_AbsBias,EAT_AbsBias,Ratio_AbsBias
0,10,1.5249,1.7086,1.0908,-0.2718,0.3485,-1.0809,0.9675,1.0212,1.0472
1,15,1.5883,1.7983,1.0944,-0.2739,0.4728,-1.5149,0.9885,1.0486,1.0494
2,20,1.5212,1.6985,1.0773,-0.2651,0.5377,-1.8331,0.9686,1.0196,1.0408


In [11]:
detailed_df.to_csv('resss/monte_carlo_results_detailed.csv', index=False, float_format='%.4f')
summary_df.to_csv('resss/monte_carlo_results_summary.csv', index=False, float_format='%.4f')
times_df.to_csv('resss/monte_carlo_times.csv', index=False, float_format='%.4f')


In [13]:
detailed_df.columns

Index(['NS', 'Scenario', 'SampleSize', 'FDH_MSE', 'EAT_MSE', 'Ratio_MSE',
       'FDH_Bias', 'EAT_Bias', 'Ratio_Bias', 'FDH_AbsBias', 'EAT_AbsBias',
       'Ratio_AbsBias'],
      dtype='object')

In [21]:
detailed_df = detailed_df.drop(['Ratio_AbsBias', 'Ratio_Bias'], axis=1)

In [25]:
dfA = detailed_df.groupby(['NS'], as_index=False).mean()
dfA

Unnamed: 0,NS,Scenario,SampleSize,FDH_MSE,EAT_MSE,Ratio_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,10,2.5,50.0,1.524858,1.708633,1.090842,-0.271842,0.3485,0.967525,1.021217
1,15,2.5,50.0,1.58825,1.798325,1.094408,-0.273908,0.472842,0.988483,1.048625
2,20,2.5,50.0,1.521192,1.6985,1.077275,-0.265117,0.537658,0.968625,1.01965


In [26]:
dfA['EAT_MSE']/dfA['FDH_MSE']

0    1.120519
1    1.132268
2    1.116559
dtype: float64

In [27]:
dfB = detailed_df.groupby(['Scenario'], as_index=False).mean()
dfB

Unnamed: 0,Scenario,NS,SampleSize,FDH_MSE,EAT_MSE,Ratio_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,1,15.0,50.0,0.766211,0.674044,0.881422,-0.153756,0.026811,0.683111,0.644778
1,2,15.0,50.0,1.837322,2.122189,1.156067,-0.286889,0.517844,1.087833,1.176678
2,3,15.0,50.0,1.809767,2.0736,1.144244,-0.317778,0.580067,1.074011,1.153867
3,4,15.0,50.0,1.765767,2.070778,1.1683,-0.322733,0.687278,1.054556,1.144


In [28]:
dfB['EAT_MSE']/dfB['FDH_MSE']

0    0.879711
1    1.155044
2    1.145783
3    1.172736
dtype: float64

In [29]:
dfC = detailed_df.groupby(['SampleSize'], as_index=False).mean()
dfC

Unnamed: 0,SampleSize,NS,Scenario,FDH_MSE,EAT_MSE,Ratio_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,25,15.0,2.5,1.514825,1.50975,0.975075,-0.283975,0.427225,0.963417,0.955433
1,50,15.0,2.5,1.573908,1.811675,1.1102,-0.268317,0.45975,0.983892,1.053342
2,75,15.0,2.5,1.545567,1.884033,1.17725,-0.258575,0.472025,0.977325,1.080717


In [30]:
dfC['EAT_MSE']/dfC['FDH_MSE']

0    0.996650
1    1.151068
2    1.218992
dtype: float64

MULTI OUTPUT

In [38]:
TRIALS = 20
NS = 10
SAMPLE_SIZES = [25, 50, 75]

SCENARIOS = {
    5: {
        "inputs": 2,
        "outputs": 2,
        "func": None,
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=(n, 2))),
    },
}


In [39]:
def generate_multi_data(scenario, n, p):
    x = np.random.uniform(1, 10, size=(n, scenario["inputs"]))
    ln_y2_y1 = np.random.uniform(-1.5, 1.5, size=n)

    ln_y1 = -1 + 0.5 * ln_y2_y1 + 0.25 * (ln_y2_y1 ** 2) \
            - 1.5 * np.log(x[:, 0]) - 0.6 * np.log(x[:, 1]) + 0.2 * (np.log(x[:, 0]) ** 2) + 0.05 * (np.log(x[:, 1]) ** 2) \
            - 0.1 * np.log(x[:, 0]) * np.log(x[:, 1]) + 0.05 * np.log(x[:, 0]) * ln_y2_y1 - 0.05 * np.log(x[:, 1]) * ln_y2_y1

    min_ln_y1 = np.min(ln_y1)
    if min_ln_y1 < 0:
        ln_y1 += np.abs(min_ln_y1) + 0.1

    ln_y2 = ln_y1 + ln_y2_y1
    y1 = np.exp(ln_y1)
    y2 = np.exp(ln_y2)
    y_true = np.column_stack((y1, y2))

    inefficiency = scenario["inefficiency"](n)
    
    num_efficient = int(p * n)
    efficient_indices = np.random.choice(n, num_efficient, replace=False)
    inefficiency[efficient_indices] = 0.0

    y_obs = y_true - inefficiency
    y_obs = y_obs.reshape(n, scenario["outputs"])
    y_obs[y_obs < 0] = 1e-6

    return x, y_obs, y_true


In [42]:
import time
import numpy as np
import pandas as pd

P_VALUES = [0, 0.1, 0.25]

def monte_carlo_testing():
    records = []
    time_records = []

    for p in P_VALUES:
        start_time = time.time()
        for scenario_id, scenario in SCENARIOS.items():
            for n in SAMPLE_SIZES:
                mse_fdh_list, mse_eat_list = [], []
                bias_fdh_list, bias_eat_list = [], []
                abs_fdh_list, abs_eat_list = [], []

                for trial in range(TRIALS):
                    print(f"p={p} scenario={scenario_id} n={n} trial={trial}")
                    x, y_obs, y_true = generate_multi_data(scenario, n, p)

                    # FDH
                    fdh = FDH(x, y_obs)
                    df_fdh = fdh.fdh_output_vrs()
                    y_fdh = y_obs * df_fdh['efficiency'].values.reshape(-1, 1)

                    # EAT
                    df = pd.DataFrame(x, columns=[f"x[{i}]" for i in range(scenario["inputs"])])
                    df[[f"y[{i}]" for i in range(scenario["outputs"])]] = y_obs
                    eat_model = eat.EAT(
                        df,
                        [f"x[{i}]" for i in range(scenario["inputs"])],
                        [f"y[{i}]" for i in range(scenario["outputs"])],
                        numStop=NS,
                        fold=5
                    )
                    eat_model.fit()
                    X_pred = df[[f"x[{i}]" for i in range(scenario["inputs"])]]
                    y_eat = eat_model.predict(X_pred, [f"x[{i}]" for i in range(scenario["inputs"])])
                    y_eat = y_eat[[f"p_y[{i}]" for i in range(scenario["outputs"])]].values

                    # metrics
                    y_true = y_true.reshape(-1, scenario["outputs"])
                    mse_fdh, bias_fdh, abs_fdh = calculate_metrics(y_true, y_fdh)
                    mse_eat, bias_eat, abs_eat = calculate_metrics(y_true, y_eat)

                    mse_fdh_list.append(mse_fdh)
                    bias_fdh_list.append(bias_fdh)
                    abs_fdh_list.append(abs_fdh)
                    mse_eat_list.append(mse_eat)
                    bias_eat_list.append(bias_eat)
                    abs_eat_list.append(abs_eat)

                # aggregate per (scenario, p, n)
                records.append({
                    'p': p,
                    'Scenario': scenario_id,
                    'SampleSize': n,
                    'FDH_MSE': np.mean(mse_fdh_list),
                    'EAT_MSE': np.mean(mse_eat_list),
                    'FDH_Bias': np.mean(bias_fdh_list),
                    'EAT_Bias': np.mean(bias_eat_list),
                    'FDH_AbsBias': np.mean(abs_fdh_list),
                    'EAT_AbsBias': np.mean(abs_eat_list)
                })

        elapsed = time.time() - start_time
        time_records.append({'p': p, 'ElapsedSec': elapsed})
        print(f"p={p} done in {elapsed:.1f}s")

    df_details = pd.DataFrame(records).round(4)
    df_summary = (
        df_details
        .groupby(['p','Scenario','SampleSize'])
        .agg({
            'FDH_MSE':'mean','EAT_MSE':'mean',
            'FDH_Bias':'mean','EAT_Bias':'mean',
            'FDH_AbsBias':'mean','EAT_AbsBias':'mean'
        })
        .reset_index()
        .round(4)
    )
    df_times = pd.DataFrame(time_records).round(4)

    df_details.to_csv('monte_carlo_p_detailed.csv', index=False)
    df_summary.to_csv('monte_carlo_p_summary.csv', index=False)
    df_times.to_csv('monte_carlo_p_times.csv', index=False)

    with pd.ExcelWriter('monte_carlo_p_full.xlsx') as writer:
        df_details.to_excel(writer, sheet_name='Detailed', index=False)
        df_summary.to_excel(writer, sheet_name='Summary', index=False)
        df_times.to_excel(writer, sheet_name='Times', index=False)

    return df_details, df_summary, df_times


In [46]:
detailed_df, summary_df, times_df = monte_carlo_testing()

p=0 scenario=5 n=25 trial=0
p=0 scenario=5 n=25 trial=1
p=0 scenario=5 n=25 trial=2
p=0 scenario=5 n=25 trial=3
p=0 scenario=5 n=25 trial=4
p=0 scenario=5 n=25 trial=5
p=0 scenario=5 n=25 trial=6
p=0 scenario=5 n=25 trial=7
p=0 scenario=5 n=25 trial=8
p=0 scenario=5 n=25 trial=9
p=0 scenario=5 n=25 trial=10
p=0 scenario=5 n=25 trial=11
p=0 scenario=5 n=25 trial=12
p=0 scenario=5 n=25 trial=13
p=0 scenario=5 n=25 trial=14
p=0 scenario=5 n=25 trial=15
p=0 scenario=5 n=25 trial=16
p=0 scenario=5 n=25 trial=17
p=0 scenario=5 n=25 trial=18
p=0 scenario=5 n=25 trial=19
p=0 scenario=5 n=50 trial=0
p=0 scenario=5 n=50 trial=1
p=0 scenario=5 n=50 trial=2
p=0 scenario=5 n=50 trial=3
p=0 scenario=5 n=50 trial=4
p=0 scenario=5 n=50 trial=5
p=0 scenario=5 n=50 trial=6
p=0 scenario=5 n=50 trial=7
p=0 scenario=5 n=50 trial=8
p=0 scenario=5 n=50 trial=9
p=0 scenario=5 n=50 trial=10
p=0 scenario=5 n=50 trial=11
p=0 scenario=5 n=50 trial=12
p=0 scenario=5 n=50 trial=13
p=0 scenario=5 n=50 trial=14
p=0 s

In [47]:
# 21 min 28 sec

In [48]:
detailed_df

Unnamed: 0,p,Scenario,SampleSize,FDH_MSE,EAT_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,0.0,5,25,796.2539,3417.5218,14.8729,33.6072,15.0525,33.6636
1,0.0,5,50,2523.495,11390.774,33.178,72.8867,33.287,72.911
2,0.0,5,75,3281.6168,13934.5125,40.9525,88.9573,41.0368,88.9769
3,0.1,5,25,725.8711,3510.6539,16.0609,38.7242,16.2373,38.7718
4,0.1,5,50,2345.5657,10317.9623,31.8929,68.4702,31.9968,68.5021
5,0.1,5,75,5833.5852,26348.5539,53.1919,117.7364,53.2676,117.7555
6,0.25,5,25,2744.6018,14322.2101,25.7015,59.9335,25.8358,59.9657
7,0.25,5,50,5558.863,24644.7234,41.3274,88.6159,41.4129,88.6388
8,0.25,5,75,6351.75,27427.9803,49.566,108.9218,49.6313,108.9389


In [49]:
times_df

Unnamed: 0,p,ElapsedSec
0,0.0,449.3288
1,0.1,387.6438
2,0.25,450.9132


In [50]:
summary_df

Unnamed: 0,p,Scenario,SampleSize,FDH_MSE,EAT_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,0.0,5,25,796.2539,3417.5218,14.8729,33.6072,15.0525,33.6636
1,0.0,5,50,2523.495,11390.774,33.178,72.8867,33.287,72.911
2,0.0,5,75,3281.6168,13934.5125,40.9525,88.9573,41.0368,88.9769
3,0.1,5,25,725.8711,3510.6539,16.0609,38.7242,16.2373,38.7718
4,0.1,5,50,2345.5657,10317.9623,31.8929,68.4702,31.9968,68.5021
5,0.1,5,75,5833.5852,26348.5539,53.1919,117.7364,53.2676,117.7555
6,0.25,5,25,2744.6018,14322.2101,25.7015,59.9335,25.8358,59.9657
7,0.25,5,50,5558.863,24644.7234,41.3274,88.6159,41.4129,88.6388
8,0.25,5,75,6351.75,27427.9803,49.566,108.9218,49.6313,108.9389


In [51]:
detailed_df.to_csv('resss2/Pmonte_carlo_results_detailed.csv', index=False, float_format='%.4f')
summary_df.to_csv('resss2/monte_carlo_results_summary.csv', index=False, float_format='%.4f')
times_df.to_csv('resss2/monte_carlo_times.csv', index=False, float_format='%.4f')


In [56]:
detailed_df.columns

Index(['p', 'Scenario', 'SampleSize', 'FDH_MSE', 'EAT_MSE', 'FDH_Bias',
       'EAT_Bias', 'FDH_AbsBias', 'EAT_AbsBias'],
      dtype='object')

In [57]:
dfA = detailed_df.groupby(['p'], as_index=False).mean()
dfA

Unnamed: 0,p,Scenario,SampleSize,FDH_MSE,EAT_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,0.0,5.0,50.0,2200.455233,9580.9361,29.6678,65.1504,29.7921,65.183833
1,0.1,5.0,50.0,2968.340667,13392.390033,33.715233,74.976933,33.8339,75.0098
2,0.25,5.0,50.0,4885.0716,22131.637933,38.864967,85.823733,38.96,85.8478


In [58]:
dfA['EAT_MSE']/dfA['FDH_MSE']

0    4.354070
1    4.511743
2    4.530463
dtype: float64

In [60]:
dfB = detailed_df.groupby(['SampleSize'], as_index=False).mean()
dfB

Unnamed: 0,SampleSize,p,Scenario,FDH_MSE,EAT_MSE,FDH_Bias,EAT_Bias,FDH_AbsBias,EAT_AbsBias
0,25,0.116667,5.0,1422.242267,7083.461933,18.878433,44.0883,19.041867,44.1337
1,50,0.116667,5.0,3475.974567,15451.153233,35.4661,76.6576,35.565567,76.683967
2,75,0.116667,5.0,5155.650667,22570.3489,47.903467,105.205167,47.978567,105.223767


In [61]:
dfB['EAT_MSE']/dfB['FDH_MSE']

0    4.980489
1    4.445128
2    4.377789
dtype: float64