In [14]:
import numpy as np
import matplotlib.pyplot as plt
import eat
import pandas as pd
from models.modelsFDH import FDH

In [15]:
TRIALS = 1
SAMPLE_SIZES = [10]

SCENARIOS = {
    1: {"inputs": 1,
        "outputs": 1,
        "func": lambda x: np.log(x) + 3,
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    2: {"inputs": 2,
        "outputs": 1,
        "func": lambda x1, x2: 0.1*x1 + 0.1*x2 + 0.3*((x1 * x2)**(1/2)),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    3: {"inputs": 3,
        "outputs": 1,
        "func": lambda x1, x2, x3: 0.1*x1 + 0.1*x2 + 0.1*x3 + 0.3*(x1*x2*x3)**(1/3),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    4: {"inputs": 9,
        "outputs": 1,
        "func": lambda x: np.prod(x**0.1, axis=1),  # Fixed row-wise product
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    5: {
        "inputs": 2,
        "outputs": 2,
        "func": lambda x1, x2, y1, y2: (
            -np.log(y1)
            + 0.5 * (np.log(y2) / np.log(y1))
            + 0.25 * (np.log(y2) / np.log(y1)) ** 2
            - 1.5 * np.log(x1)
            - 0.6 * np.log(x2)
            + 0.2 * (np.log(x1)) ** 2
            + 0.05 * (np.log(x2)) ** 2
            - 0.1 * np.log(x1) * np.log(x2)
            + 0.05 * np.log(x1) * (np.log(y2) / np.log(y1))
            - 0.05 * np.log(x2) * (np.log(y2) / np.log(y1))
        ),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=(n, 2))),
    },
}


In [16]:
def generate_data(scenario, n):
    x = np.random.uniform(1, 10, size=(n, scenario["inputs"]))
    if scenario == SCENARIOS[5]:  

        ln_y2_y1 = np.random.uniform(-1.5, 1.5, size=n)

        ln_y1 = -1 + 0.5 * ln_y2_y1 + 0.25 * (ln_y2_y1 ** 2) \
                - 1.5 * np.log(x[:, 0]) - 0.6 * np.log(x[:, 1]) + 0.2 * (np.log(x[:, 0]) ** 2) + 0.05 * (np.log(x[:, 1]) ** 2) \
                - 0.1 * np.log(x[:, 0]) * np.log(x[:, 1]) + 0.05 * np.log(x[:, 0]) * ln_y2_y1 - 0.05 * np.log(x[:, 1]) * ln_y2_y1

        min_ln_y1 = np.min(ln_y1)
        if min_ln_y1 < 0:
            ln_y1 += np.abs(min_ln_y1) + 0.1  

        ln_y2 = ln_y1 + ln_y2_y1

        y1 = np.exp(ln_y1)
        y2 = np.exp(ln_y2)

        y = np.column_stack((y1, y2))

    else:
        if scenario["inputs"] == 1:
            y = scenario["func"](x[:, 0])
        elif scenario["inputs"] == 2:
            y = scenario["func"](x[:, 0], x[:, 1])
        elif scenario["inputs"] == 3:
            y = scenario["func"](x[:, 0], x[:, 1], x[:, 2])
        elif scenario["inputs"] == 9:
            y = scenario["func"](x)
        else:
            raise NotImplementedError("Only 1, 2, 3, and 9 inputs are supported.")
        
    print(y.shape)
    inefficiency = scenario["inefficiency"](n)
    print(inefficiency.shape)
    output = y - inefficiency

    output = output.reshape(n, scenario["outputs"])

    return x, output, inefficiency

In [17]:
def monte_carlo_testing():
    results = {}
    for scenario_id, scenario in SCENARIOS.items():
        # print(type(scenario_id))
        # print(scenario_id)
        # print(scenario)
        # print(scenario.get('id'))
        for n in SAMPLE_SIZES:
            mse_fdh_list, mse_eat_list = [], []
            bias_fdh_list, bias_eat_list = [], []
            abs_bias_fdh_list, abs_bias_eat_list = [], []

            for _ in range(TRIALS):
                # print("\n\n\n")
                print("scen", scenario_id, "n", n, "_", _)
                x, y, inefficiency = generate_data(scenario, n)

                # print("x",x.shape)
                # print(x)

                # print("y", y.shape)
                # print('id', scenario_id)
                print(y)

                fdh = FDH(x, y)
                df = fdh.fdh_output_vrs()
                y_fdh = y * df["efficiency"].values.reshape(-1, 1)

                # print("df", df.shape)
                # print(df["efficiency"])

                # print("y_fdh", y_fdh.shape)
                # print(y_fdh)

                df = pd.DataFrame(
                    x, columns=[f"x[{i}]" for i in range(scenario["inputs"])]
                )
                df[[f"y[{i}]" for i in range(scenario["outputs"])]] = y

                # print("df2", df.shape)
                # print(df)

                eat_model = eat.EAT(
                    df,
                    [f"x[{i}]" for i in range(scenario["inputs"])],
                    [f"y[{i}]" for i in range(scenario["outputs"])],
                    5, 5)
                eat_model.fit()

                x_p = [f"x[{i}]" for i in range(scenario["inputs"])]
                # print("x_p", x_p)
                data_pred = df.loc[:, x_p]
                y_eat = eat_model.predict(data_pred, x_p)

                y_eat = y_eat[[f"p_y[{i}]" for i in range(scenario["outputs"])]]

                # print("y_eat", y_eat.shape)
                # print(y_eat)

                mse_fdh, bias_fdh, abs_bias_fdh = calculate_metrics(y, y_fdh)
                mse_fdh_list.append(mse_fdh)
                bias_fdh_list.append(bias_fdh)
                abs_bias_fdh_list.append(abs_bias_fdh)

                mse_eat, bias_eat, abs_bias_eat = calculate_metrics(y, y_eat)
                mse_eat_list.append(mse_eat)
                bias_eat_list.append(bias_eat)
                abs_bias_eat_list.append(abs_bias_eat)

            results[(scenario_id, n)] = {
                "FDH_MSE": np.mean(mse_fdh_list),
                "EAT_MSE": np.mean(mse_eat_list),
                "FDH_Bias": np.mean(bias_fdh_list),
                "EAT_Bias": np.mean(bias_eat_list),
                "FDH_AbsBias": np.mean(abs_bias_fdh_list),
                "EAT_AbsBias": np.mean(abs_bias_eat_list),
            }

    return results

def calculate_metrics(true_values, estimates):
    mse = np.mean((estimates - true_values) ** 2)
    bias = np.mean(estimates - true_values)
    abs_bias = np.mean(np.abs(estimates - true_values))

    return mse, bias, abs_bias

In [18]:
results = monte_carlo_testing()

scen 1 n 10 _ 0
(10,)
(10,)
[[4.10491914]
 [4.68801675]
 [4.56110963]
 [3.07686084]
 [3.98039714]
 [3.34152392]
 [3.61706766]
 [3.22939142]
 [4.0493708 ]
 [3.12695892]]
scen 2 n 10 _ 0
(10,)
(10,)
[[3.50345474]
 [1.33050111]
 [2.87279893]
 [1.98738178]
 [2.94629272]
 [2.01695349]
 [2.1626102 ]
 [1.76196759]
 [1.54953733]
 [1.48101469]]
scen 3 n 10 _ 0
(10,)
(10,)
[[2.58486454]
 [3.35576491]
 [2.34570568]
 [2.95189776]
 [2.98324809]
 [1.18979067]
 [1.88439032]
 [1.35484599]
 [1.72352371]
 [2.11011165]]
scen 4 n 10 _ 0
(10,)
(10,)
[[3.25253436]
 [2.41237925]
 [2.93055579]
 [3.11599226]
 [4.05796119]
 [2.89676282]
 [2.4529    ]
 [4.32373485]
 [4.07708125]
 [3.4388354 ]]
scen 5 n 10 _ 0
(10, 2)
(10, 2)
[[ 1.951196    0.90516893]
 [ 4.98072839  9.68831322]
 [ 2.91608721  1.21543093]
 [ 6.19949637 20.73866942]
 [11.74202864 18.9550964 ]
 [ 0.94528519  0.47202064]
 [ 2.06608911  1.28301827]
 [ 3.09972452  2.86300964]
 [12.97138119 57.66732716]
 [ 1.75548764  4.44823276]]


In [19]:
for (scenario_id, n), metrics in results.items():
    print(f"Scenario {scenario_id}, Sample size {n}:")
    print(f"  FDH MSE: {metrics['FDH_MSE']:.4f}, EAT MSE: {metrics['EAT_MSE']:.4f}")
    print(f"  FDH Bias: {metrics['FDH_Bias']:.4f}, EAT Bias: {metrics['EAT_Bias']:.4f}")
    print(f"  FDH AbsBias: {metrics['FDH_AbsBias']:.4f}, EAT AbsBias: {metrics['EAT_AbsBias']:.4f}")
    print()

Scenario 1, Sample size 10:
  FDH MSE: 0.0144, EAT MSE: 0.2023
  FDH Bias: 0.0837, EAT Bias: 0.3750
  FDH AbsBias: 0.0837, EAT AbsBias: 0.3750

Scenario 2, Sample size 10:
  FDH MSE: 0.0000, EAT MSE: 0.5846
  FDH Bias: 0.0000, EAT Bias: 0.5990
  FDH AbsBias: 0.0000, EAT AbsBias: 0.5990

Scenario 3, Sample size 10:
  FDH MSE: 0.0000, EAT MSE: 0.1844
  FDH Bias: 0.0000, EAT Bias: 0.3217
  FDH AbsBias: 0.0000, EAT AbsBias: 0.3217

Scenario 4, Sample size 10:
  FDH MSE: 0.0000, EAT MSE: 0.4418
  FDH Bias: 0.0000, EAT Bias: 0.4923
  FDH AbsBias: 0.0000, EAT AbsBias: 0.4923

Scenario 5, Sample size 10:
  FDH MSE: 97.7246, EAT MSE: 618.0592
  FDH Bias: 5.9551, EAT Bias: 16.9908
  FDH AbsBias: 5.9551, EAT AbsBias: 16.9908

