In [None]:
import pandas as pd
import numpy as np
import torch
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
from utils.neural_nets import NN_Module_2C
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import TransformedTargetRegressor
from skorch import NeuralNetRegressor
from skorch.callbacks import EarlyStopping
from utils.charts import loss_plot, abs_error_plot, rel_error_plot
from utils.metrics import mean_absolute_errors, mean_relative_errors
from utils.params import fig_dir, tab_dir

%config InlineBackend.figure_format ='retina'
mpl.style.use("ggplot")
torch.manual_seed(42)
torch.cuda.manual_seed(42)
random.seed(42)
np.random.seed(42)

In [None]:
df = pd.read_csv("data/flow_data_2c_1.csv").astype(np.float32)
df.shape

In [None]:
indices = []
for i, row in df.iterrows():
    if (
        0.1 <= row["phi_L"] <= 0.9
        and row["PDI_S"] >= 2
        and row["PDI_L"] >= 2
        and row["M_W_S"] >= 1_287_000
        and row["M_W_L"] >= 1_287_000
    ):
        indices.append(i)

df = df.loc[indices].sample(45_000)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    df.iloc[:, -140:], df.iloc[:, :5], test_size=1 / 9, random_state=42
)

print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

In [None]:
np.save(f"data/predictions/nn/2c_best/Y_test_2", Y_test, allow_pickle=False)

In [None]:
net = NeuralNetRegressor(
    module=NN_Module_2C,
    module__num_targets=5,
    module__num_features=100,
    criterion=torch.nn.MSELoss,
    optimizer=torch.optim.AdamW,
    lr=2e-05,
    max_epochs=500,
    callbacks=[EarlyStopping(patience=10)],
    device="cuda" if torch.cuda.is_available() else "cpu",
)
tt = TransformedTargetRegressor(regressor=net, transformer=MinMaxScaler())
pipe = Pipeline(
    [
        ("scale", MinMaxScaler()),
        ("tt", tt),
    ]
)

col_indices = np.concatenate(
    [np.arange(start=0, stop=50), np.arange(start=70, stop=120)]
)
pipe.fit(X_train.iloc[:, col_indices], Y_train)

In [None]:
history = pd.DataFrame(pipe[1].regressor_.history)
history = history[["epoch", "train_loss", "valid_loss", "dur"]]
history.set_index("epoch", inplace=True)

loss_plot(history.loc[:, "train_loss"], history.loc[:, "valid_loss"])
plt.savefig(fig_dir + "/nn_2c_loss.png", dpi=300, bbox_inches='tight', pad_inches=0)

In [None]:
Y_pred = pipe.predict(X_test.iloc[:, col_indices])

In [None]:
np.save("data/predictions/nn/2c_best/Y_pred_2", Y_pred, allow_pickle=False)

In [None]:
errors = []

In [None]:
Y_test = np.load("data/predictions/nn/2c_best/Y_test_2.npy", allow_pickle=False)
Y_pred = np.load("data/predictions/nn/2c_best/Y_pred_2.npy", allow_pickle=False)

errs = mean_relative_errors(Y_test, Y_pred)
errs.append(np.mean(mean_relative_errors(Y_test, Y_pred)))
errs.append(mean_absolute_errors(Y_test, Y_pred)[4])
errors.append(errs)

In [None]:
mod_errors = []
for i, row in enumerate(errors):
    new_row = []
    for j, error in enumerate(row):
        if j < 5:
            new_row.append(f"{error*100:.2f}\%")
        else:
            new_row.append(f"{error:.3f}")
    mod_errors.append(new_row)
mod_errors

In [None]:
df_errors = pd.DataFrame(mod_errors, dtype=str)
df_errors.index = ["Full Dataset", "Reduced Target Range"]

df_errors.to_latex(
    buf=tab_dir + "/2c_best_errors_2.tex",
    header=[
        r"MRE ($M_w^s$)",
        "MRE ($PDI^s$)",
        "MRE ($M_w^l$)",
        "MRE ($PDI^l$)",
        "Avg. MRE",
        "MAE ($\phi^l$)",
    ],
    column_format="lrrrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Mean relative error (MRE) and the averaged MRE of the $M_w^s$, $PDI^s$, $M_w^l$ and $PDI^l$ target attributes, as well as the mean absolute error (MAE) of the best performing bimodal models with and without restricting the target ranges by $M_w \geq$ 1,287,000 and $PDI \geq$ 2 (360,000 training and 40,000 testing instances for the full dataset, 40,000 training and 5,000 testing instances for the reduced target range, using only the first 50 features each for $G'$ and $G''$, $\phi^l \in$ [0.1, 0.9], $\frac{M_w^l}{M_w^s}>PDI_{max}^{1}$ bimodal dataset)",
    label="tab:2c_best_errors",
    position="htb",
)

In [None]:
Y_test = np.load("data/predictions/nn/2c_best/Y_test.npy", allow_pickle=False)
Y_pred = np.load("data/predictions/nn/2c_best/Y_pred.npy", allow_pickle=False)

In [None]:
errs = mean_relative_errors(Y_test.values, Y_pred)
errs.append(np.mean(mean_relative_errors(Y_test.values, Y_pred)))
errs.append(mean_absolute_errors(Y_test.values, Y_pred)[4])

errors = []
for i, error in enumerate(errs):
    if i < 5:
        errors.append(f"{error*100:.2f}\%")
    else:
        errors.append(f"{error:.3f}")
errors

In [None]:
labels = [
    "$M_w^s$ [$g/mol$]",
    "$PDI^s$",
    "$M_w^l$ [$g/mol$]",
    "$PDI^l$",
    "$\phi^l$",
]
fig, ax = plt.subplots()
fig.set_figheight(10.6)
fig.set_figwidth(8)

abs_error_plot(Y_test.values, Y_pred, labels)
plt.subplots_adjust(wspace=0.15, hspace=0.42)

plt.savefig(
    fig_dir + "/nn_2c_2_abs_errors.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
labels = ["$M_w^s$ [$g/mol$]", "$PDI^s$", "$M_w^l$ [$g/mol$]", "$PDI^l$"]

fig, ax = plt.subplots()
fig.set_figheight(9.8)
fig.set_figwidth(8)

rel_error_plot(Y_test.values, Y_pred, labels)
plt.subplots_adjust(wspace=0.35, hspace=0.46)

plt.savefig(
    fig_dir + "/nn_2c_2_rel_errors.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
np.mean(mean_relative_errors(Y_test.values, Y_pred))