In [None]:
import pandas as pd
import numpy as np
import torch
import random
import matplotlib as mpl
from utils.neural_nets import NN_Module_2C
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import TransformedTargetRegressor
from skorch import NeuralNetRegressor
from tqdm import tqdm
from utils.metrics import mean_absolute_errors, mean_relative_errors
from utils.params import tab_dir

%config InlineBackend.figure_format ='retina'
mpl.style.use("ggplot")
torch.manual_seed(42)
torch.cuda.manual_seed(42)
random.seed(42)
np.random.seed(42)

In [None]:
df = pd.read_csv("data/flow_data_2c_1.csv").astype(np.float32)
df.shape

In [None]:
for i, threshold in enumerate(tqdm([0, 0.05, 0.1, 0.15, 0.2, 0.25])):
    indices = []
    for j, row in df.iterrows():
        if threshold <= row["phi_L"] <= (1 - threshold):
            indices.append(j)

    sample = df.loc[indices].sample(200_000, random_state=42)
    X_train, X_test, Y_train, Y_test = train_test_split(
        sample.iloc[:, -140:],
        sample.iloc[:, :5],
        test_size=0.1,
        random_state=42,
    )
    np.save(f"logs/phi_l/Y_test_{i}", Y_test, allow_pickle=False)

    net = NeuralNetRegressor(
        module=NN_Module_2C,
        module__num_targets=5,
        criterion=torch.nn.MSELoss,
        optimizer=torch.optim.AdamW,
        lr=2e-05,
        max_epochs=100,
        verbose=0,
        device="cuda" if torch.cuda.is_available() else "cpu",
    )
    tt = TransformedTargetRegressor(regressor=net, transformer=MinMaxScaler())
    pipe = Pipeline(
        [
            ("scale", MinMaxScaler()),
            ("tt", tt),
        ]
    )

    pipe.fit(X_train, Y_train)

    Y_pred = pipe.predict(X_test)
    np.save(f"logs/phi_l/Y_pred_{i}", Y_pred, allow_pickle=False)

In [None]:
errors = []
for i in range(6):
    Y_test = np.load(f"logs/phi_l/Y_test_{i}.npy", allow_pickle=False)
    Y_pred = np.load(f"logs/phi_l/Y_pred_{i}.npy", allow_pickle=False)

    errs = mean_relative_errors(Y_test, Y_pred)
    errs.append(np.mean(mean_relative_errors(Y_test, Y_pred)))
    errs.append(mean_absolute_errors(Y_test, Y_pred)[4])

    errors.append(errs)

errors = np.array([errors[i] for i in [0, 3, 1, 4, 2, 5]])

In [None]:
df_errors = pd.DataFrame(errors)
df_errors = df_errors.astype(str)

for index, row in df_errors.iterrows():
    for j in df_errors.columns:
        if j < 5:
            if errors[index, j] == errors[:, j].min():
                df_errors.at[index, j] = (
                    "\green{" + f"{float(row[j])*100:.2f}\%" + "}"
                )
            else:
                df_errors.at[index, j] = f"{float(row[j])*100:.2f}\%"
        else:
            if errors[index, j] == errors[:, j].min():
                df_errors.at[index, j] = (
                    "\green{" + f"{float(row[j]):.3f}" + "}"
                )
            else:
                df_errors.at[index, j] = f"{float(row[j]):.3f}"

df_errors.index = [
    "$\phi^l \in$ [0, 1]",
    "$\phi^l \in$ [0.05, 0.95]",
    "$\phi^l \in$ [0.1, 0.9]",
    "$\phi^l \in$ [0.15, 0.85]",
    "$\phi^l \in$ [0.2, 0.8]",
    "$\phi^l \in$ [0.25, 0.75]",
]

df_errors.to_latex(
    buf=tab_dir + "/phi_l_errors.tex",
    header=[
        r"MRE ($M_w^s$)",
        "MRE ($PDI^s$)",
        "MRE ($M_w^l$)",
        "MRE ($PDI^l$)",
        "Avg. MRE",
        "MAE ($\phi^l$)",
    ],
    column_format="lrrrrrr",
    index=True,
    escape=False,
    caption=r"Mean relative error (MRE) and the averaged MRE of the $M_w^s$, $PDI^s$, $M_w^l$ and $PDI^l$ target attributes, as well as the mean absolute error (MAE) of $\phi^l$ using various ranges of valid $\phi^l$ values (180,000 training and 20,000 testing instances, $\frac{M_w^l}{M_w^s}>PDI_{max}^{1}$ bimodal dataset)",
    label="tab:phi_l_errors",
    position="htb",
)