In [1]:
import numpy as np
import matplotlib.pyplot as plt
import eat
import pandas as pd
from models.modelsFDH import FDH

In [2]:
TRIALS = 1
SAMPLE_SIZES = [15, 30, 14]

SCENARIOS = {
    1: {"inputs": 1,
        "outputs": 1,
        "func": lambda x: np.log(x) + 3,
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    2: {"inputs": 2,
        "outputs": 1,
        "func": lambda x1, x2: 0.1*x1 + 0.1*x2 + 0.3*((x1 * x2)**(1/2)),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    3: {"inputs": 3,
        "outputs": 1,
        "func": lambda x1, x2, x3: 0.1*x1 + 0.1*x2 + 0.1*x3 + 0.3*(x1*x2*x3)**(1/3),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    4: {"inputs": 9,
        "outputs": 1,
        "func": lambda x: np.prod(x**0.1, axis=1),  # Fixed row-wise product
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=n))},
    5: {
        "inputs": 2,
        "outputs": 2,
        "func": lambda x1, x2, y1, y2: (
            -np.log(y1)
            + 0.5 * (np.log(y2) / np.log(y1))
            + 0.25 * (np.log(y2) / np.log(y1)) ** 2
            - 1.5 * np.log(x1)
            - 0.6 * np.log(x2)
            + 0.2 * (np.log(x1)) ** 2
            + 0.05 * (np.log(x2)) ** 2
            - 0.1 * np.log(x1) * np.log(x2)
            + 0.05 * np.log(x1) * (np.log(y2) / np.log(y1))
            - 0.05 * np.log(x2) * (np.log(y2) / np.log(y1))
        ),
        "inefficiency": lambda n: np.abs(np.random.normal(0, 0.4, size=(n, 2))),
    },
}


In [3]:
def generate_data(scenario, n):
    x = np.random.uniform(1, 10, size=(n, scenario["inputs"]))
    if scenario == SCENARIOS[5]:  

        ln_y2_y1 = np.random.uniform(-1.5, 1.5, size=n)

        ln_y1 = -1 + 0.5 * ln_y2_y1 + 0.25 * (ln_y2_y1 ** 2) \
                - 1.5 * np.log(x[:, 0]) - 0.6 * np.log(x[:, 1]) + 0.2 * (np.log(x[:, 0]) ** 2) + 0.05 * (np.log(x[:, 1]) ** 2) \
                - 0.1 * np.log(x[:, 0]) * np.log(x[:, 1]) + 0.05 * np.log(x[:, 0]) * ln_y2_y1 - 0.05 * np.log(x[:, 1]) * ln_y2_y1

        min_ln_y1 = np.min(ln_y1)
        if min_ln_y1 < 0:
            ln_y1 += np.abs(min_ln_y1) + 0.1  

        ln_y2 = ln_y1 + ln_y2_y1

        y1 = np.exp(ln_y1)
        y2 = np.exp(ln_y2)

        y = np.column_stack((y1, y2))

    else:
        if scenario["inputs"] == 1:
            y = scenario["func"](x[:, 0])
        elif scenario["inputs"] == 2:
            y = scenario["func"](x[:, 0], x[:, 1])
        elif scenario["inputs"] == 3:
            y = scenario["func"](x[:, 0], x[:, 1], x[:, 2])
        elif scenario["inputs"] == 9:
            y = scenario["func"](x)
        else:
            raise NotImplementedError("Only 1, 2, 3, and 9 inputs are supported.")
        
    inefficiency = scenario["inefficiency"](n)
    output = y - inefficiency

    output = output.reshape(n, scenario["outputs"])

    return x, output, inefficiency

In [4]:
def monte_carlo_testing():
    results = {}
    for scenario_id, scenario in SCENARIOS.items():

        for n in SAMPLE_SIZES:
            mse_fdh_list, mse_eat_list = [], []
            bias_fdh_list, bias_eat_list = [], []
            abs_bias_fdh_list, abs_bias_eat_list = [], []

            for _ in range(TRIALS):

                print("scen", scenario_id, "n", n, "Trial", _)
                x, y, inefficiency = generate_data(scenario, n)
                fdh = FDH(x, y)
                df = fdh.fdh_output_vrs()
                y_fdh = y * df["efficiency"].values.reshape(-1, 1)

                df = pd.DataFrame(
                    x, columns=[f"x[{i}]" for i in range(scenario["inputs"])]
                )
                df[[f"y[{i}]" for i in range(scenario["outputs"])]] = y

                eat_model = eat.EAT(
                    df,
                    [f"x[{i}]" for i in range(scenario["inputs"])],
                    [f"y[{i}]" for i in range(scenario["outputs"])],
                    5, 5)
                eat_model.fit()

                x_p = [f"x[{i}]" for i in range(scenario["inputs"])]

                data_pred = df.loc[:, x_p]
                y_eat = eat_model.predict(data_pred, x_p)

                y_eat = y_eat[[f"p_y[{i}]" for i in range(scenario["outputs"])]]

                mse_fdh, bias_fdh, abs_bias_fdh = calculate_metrics(y, y_fdh)
                mse_fdh_list.append(mse_fdh)
                bias_fdh_list.append(bias_fdh)
                abs_bias_fdh_list.append(abs_bias_fdh)

                mse_eat, bias_eat, abs_bias_eat = calculate_metrics(y, y_eat)
                mse_eat_list.append(mse_eat)
                bias_eat_list.append(bias_eat)
                abs_bias_eat_list.append(abs_bias_eat)

            results[(scenario_id, n)] = {
                "FDH_MSE": np.mean(mse_fdh_list),
                "EAT_MSE": np.mean(mse_eat_list),
                "FDH_Bias": np.mean(bias_fdh_list),
                "EAT_Bias": np.mean(bias_eat_list),
                "FDH_AbsBias": np.mean(abs_bias_fdh_list),
                "EAT_AbsBias": np.mean(abs_bias_eat_list),
            }

    return results

def calculate_metrics(true_values, estimates):
    mse = np.mean((estimates - true_values) ** 2)
    bias = np.mean(estimates - true_values)
    abs_bias = np.mean(np.abs(estimates - true_values))

    return mse, bias, abs_bias

In [5]:
results = monte_carlo_testing()

scen 1 n 15 Trial 0
Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2579375
Academic license 2579375 - for non-commercial use only - registered to ma___@iitd.ac.in
scen 1 n 30 Trial 0
scen 1 n 14 Trial 0
scen 2 n 15 Trial 0
scen 2 n 30 Trial 0
scen 2 n 14 Trial 0
scen 3 n 15 Trial 0
scen 3 n 30 Trial 0
scen 3 n 14 Trial 0
scen 4 n 15 Trial 0
scen 4 n 30 Trial 0
scen 4 n 14 Trial 0
scen 5 n 15 Trial 0
scen 5 n 30 Trial 0
scen 5 n 14 Trial 0


In [6]:
for (scenario_id, n), metrics in results.items():
    print(f"Scenario {scenario_id}, Sample size {n}:")
    print(f"  FDH MSE: {metrics['FDH_MSE']:.4f}, EAT MSE: {metrics['EAT_MSE']:.4f}")
    print(f"  FDH Bias: {metrics['FDH_Bias']:.4f}, EAT Bias: {metrics['EAT_Bias']:.4f}")
    print(f"  FDH AbsBias: {metrics['FDH_AbsBias']:.4f}, EAT AbsBias: {metrics['EAT_AbsBias']:.4f}")
    print()

Scenario 1, Sample size 15:
  FDH MSE: 0.0534, EAT MSE: 0.2606
  FDH Bias: 0.1344, EAT Bias: 0.3639
  FDH AbsBias: 0.1344, EAT AbsBias: 0.3639

Scenario 1, Sample size 30:
  FDH MSE: 0.1398, EAT MSE: 0.4558
  FDH Bias: 0.2137, EAT Bias: 0.5417
  FDH AbsBias: 0.2137, EAT AbsBias: 0.5417

Scenario 1, Sample size 14:
  FDH MSE: 0.0123, EAT MSE: 0.1389
  FDH Bias: 0.0545, EAT Bias: 0.2769
  FDH AbsBias: 0.0545, EAT AbsBias: 0.2769

Scenario 2, Sample size 15:
  FDH MSE: 0.0229, EAT MSE: 0.1218
  FDH Bias: 0.0682, EAT Bias: 0.2588
  FDH AbsBias: 0.0682, EAT AbsBias: 0.2588

Scenario 2, Sample size 30:
  FDH MSE: 0.0046, EAT MSE: 0.3366
  FDH Bias: 0.0191, EAT Bias: 0.4188
  FDH AbsBias: 0.0191, EAT AbsBias: 0.4188

Scenario 2, Sample size 14:
  FDH MSE: 0.0000, EAT MSE: 0.9314
  FDH Bias: 0.0000, EAT Bias: 0.7780
  FDH AbsBias: 0.0000, EAT AbsBias: 0.7780

Scenario 3, Sample size 15:
  FDH MSE: 0.0000, EAT MSE: 0.8353
  FDH Bias: 0.0000, EAT Bias: 0.7195
  FDH AbsBias: 0.0000, EAT AbsBias: 