In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from utils.params import tab_dir, fig_dir, label_color
from utils.tables import summary_stats_targets_2c, summary_stats_features_2c
from utils.charts import (
    dual_hist_box_plot,
    feature_attrs_iqr_plot,
    dual_log_boxplot,
    corr_plot,
    datapoints_scatter,
)
%config InlineBackend.figure_format ='retina'
mpl.style.use("ggplot")

In [None]:
df = pd.read_csv("data/flow_data_2c_None.csv")
df.shape

In [None]:
targets = df.iloc[:, :5]
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]
features = pd.DataFrame(
    {
        "G'": storage_modulus.values.flatten(),
        "G''": loss_modulus.values.flatten(),
    }
)

summary_stats_targets_2c(targets).to_latex(
    buf=tab_dir + "/describe_targets_2c_None.tex",
    header=[
        r"$M_w^s$ [$\nicefrac{g}{mol}$]",
        "$PDI^s$",
        r"$M_w^l$ [$\nicefrac{g}{mol}$]",
        "$PDI^l$",
        "$\phi^l$",
    ],
    column_format="lrrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption="Summary statistics for the $M_w^s$, $PDI^s$, $M_w^l$, $PDI^l$, and $\phi^l$ target attributes (no restrictions bimodal dataset)",
    label="tab:describe_targets_2c_None",
    position="htb",
)

summary_stats_features_2c(features).to_latex(
    buf=tab_dir + "/describe_features_2c_None.tex",
    header=["$G'$ [Pa]", "$G''$ [Pa]"],
    column_format="lrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $G'$ and $G''$ features (no restrictions bimodal dataset)",
    label="tab:describe_features_2c_None",
    position="htb",
)

In [None]:
dual_hist_box_plot(
    df["M_W_S"], df["M_W_L"], "$M_w^s$ [$g/mol$]", "$M_w^l$ [$g/mol$]"
)
plt.savefig(
    fig_dir + "/box_hist_2c_None_mw.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["PDI_S"], df["PDI_L"], "$PDI^s$", "$PDI^l$")
plt.savefig(
    fig_dir + "/box_hist_2c_None_pdi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["phi_L"], None, "$\phi^l$", None)
plt.savefig(
    fig_dir + "/box_hist_2c_None_phi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]

feature_attrs_iqr_plot(storage_modulus, loss_modulus)
plt.savefig(
    fig_dir + "/feature_attrs_iqr_2c_None.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_log_boxplot(
    df.iloc[:, 5],
    df.iloc[:, 75],
    df.iloc[:, 62],
    df.iloc[:, 132],
    [-10, -8, -6, -4, -2, 0, 2, 4],
    [5.2, 5.3, 5.4, 5.5, 5.6],
    "Frequency = $10^{-6}$ $s^{-1}$",
    r"Frequency $\approx 10^{4}$ $s^{-1}$",
)
plt.savefig(
    fig_dir + "/boxplot_freq_2c_None.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
corr_plot(df.iloc[:, 5:].corr())
plt.savefig(
    fig_dir + "/corr_2c_None.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
fig, ax = plt.subplots()
fig.set_figheight(5)
fig.set_figwidth(7.5)
datapoints_scatter(df.sample(20000))
plt.title("No restrictions", color=label_color, size=12)
plt.savefig(
    fig_dir + "/datapoints_scatter_2c_None.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
df = pd.read_csv("data/flow_data_2c_1.csv").sample(200_000)
df.shape

In [None]:
targets = df.iloc[:, :5]
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]
features = pd.DataFrame(
    {
        "G'": storage_modulus.values.flatten(),
        "G''": loss_modulus.values.flatten(),
    }
)

summary_stats_targets_2c(targets).to_latex(
    buf=tab_dir + "/describe_targets_2c_1.tex",
    header=[
        r"$M_w^s$ [$\nicefrac{g}{mol}$]",
        "$PDI^s$",
        r"$M_w^l$ [$\nicefrac{g}{mol}$]",
        "$PDI^l$",
        "$\phi^l$",
    ],
    column_format="lrrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $M_w^s$, $PDI^s$, $M_w^l$, $PDI^l$, and $\phi^l$ target attributes ($\frac{M_w^l}{M_w^s}>PDI_{max}^{1}$ bimodal dataset)",
    label="tab:describe_targets_2c_1",
    position="htb",
)

summary_stats_features_2c(features).to_latex(
    buf=tab_dir + "/describe_features_2c_1.tex",
    header=["$G'$ [Pa]", "$G''$ [Pa]"],
    column_format="lrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $G'$ and $G''$ features ($\frac{M_w^l}{M_w^s}>PDI_{max}^{1}$ bimodal dataset)",
    label="tab:describe_features_2c_1",
    position="htb",
)

In [None]:
dual_hist_box_plot(
    df["M_W_S"], df["M_W_L"], "$M_w^s$ [$g/mol$]", "$M_w^l$ [$g/mol$]"
)
plt.savefig(
    fig_dir + "/box_hist_2c_1_mw.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["PDI_S"], df["PDI_L"], "$PDI^s$", "$PDI^l$")
plt.savefig(
    fig_dir + "/box_hist_2c_1_pdi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["phi_L"], None, "$\phi^l$", None)
plt.savefig(
    fig_dir + "/box_hist_2c_1_phi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]

feature_attrs_iqr_plot(storage_modulus, loss_modulus)
plt.savefig(
    fig_dir + "/feature_attrs_iqr_2c_1.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_log_boxplot(
    df.iloc[:, 5],
    df.iloc[:, 75],
    df.iloc[:, 62],
    df.iloc[:, 132],
    [-12, -10, -8, -6, -4, -2, 0, 2, 4],
    [5.1, 5.2, 5.3, 5.4, 5.5, 5.6],
    "Frequency = $10^{-6}$ $s^{-1}$",
    r"Frequency $\approx 10^{4}$ $s^{-1}$",
)
plt.savefig(
    fig_dir + "/boxplot_freq_2c_1.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
corr_plot(df.iloc[:, 5:].corr())
plt.savefig(
    fig_dir + "/corr_2c_1.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
fig, ax = plt.subplots()
fig.set_figheight(5)
fig.set_figwidth(7.5)
datapoints_scatter(df.sample(20000))
plt.title(r"$\frac{M_w^l}{M_w^s}>PDI_{max}^{1}$", color=label_color, size=12)
plt.savefig(
    fig_dir + "/datapoints_scatter_2c_1.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
df = pd.read_csv("data/flow_data_2c_1.5.csv")
df.shape

In [None]:
targets = df.iloc[:, :5]
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]
features = pd.DataFrame(
    {
        "G'": storage_modulus.values.flatten(),
        "G''": loss_modulus.values.flatten(),
    }
)

summary_stats_targets_2c(targets).to_latex(
    buf=tab_dir + "/describe_targets_2c_1_5.tex",
    header=[
        r"$M_w^s$ [$\nicefrac{g}{mol}$]",
        "$PDI^s$",
        r"$M_w^l$ [$\nicefrac{g}{mol}$]",
        "$PDI^l$",
        "$\phi^l$",
    ],
    column_format="lrrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $M_w^s$, $PDI^s$, $M_w^l$, $PDI^l$, and $\phi^l$ target attributes ($\frac{M_w^l}{M_w^s}>PDI_{max}^{1.5}$ bimodal dataset)",
    label="tab:describe_targets_2c_1_5",
    position="htb",
)

summary_stats_features_2c(features).to_latex(
    buf=tab_dir + "/describe_features_2c_1_5.tex",
    header=["$G'$ [Pa]", "$G''$ [Pa]"],
    column_format="lrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $G'$ and $G''$ features ($\frac{M_w^l}{M_w^s}>PDI_{max}^{1.5}$ bimodal dataset)",
    label="tab:describe_features_2c_1_5",
    position="htb",
)

In [None]:
dual_hist_box_plot(
    df["M_W_S"], df["M_W_L"], "$M_w^s$ [$g/mol$]", "$M_w^l$ [$g/mol$]"
)
plt.savefig(
    fig_dir + "/box_hist_2c_1_5_mw.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["PDI_S"], df["PDI_L"], "$PDI^s$", "$PDI^l$")
plt.savefig(
    fig_dir + "/box_hist_2c_1_5_pdi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["phi_L"], None, "$\phi^l$", None)
plt.savefig(
    fig_dir + "/box_hist_2c_1_5_phi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]

feature_attrs_iqr_plot(storage_modulus, loss_modulus)
plt.savefig(
    fig_dir + "/feature_attrs_iqr_2c_1_5.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_log_boxplot(
    df.iloc[:, 5],
    df.iloc[:, 75],
    df.iloc[:, 62],
    df.iloc[:, 132],
    [-12, -10, -8, -6, -4, -2, 0, 2, 4],
    [5.1, 5.2, 5.3, 5.4, 5.5, 5.6],
    "Frequency = $10^{-6}$ $s^{-1}$",
    r"Frequency $\approx 10^{4}$ $s^{-1}$",
)
plt.savefig(
    fig_dir + "/boxplot_freq_2c_1_5.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
corr_plot(df.iloc[:, 5:].corr())
plt.savefig(
    fig_dir + "/corr_2c_1_5.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
df = pd.read_csv("data/flow_data_2c_2.csv")
df.shape

In [None]:
targets = df.iloc[:, :5]
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]
features = pd.DataFrame(
    {
        "G'": storage_modulus.values.flatten(),
        "G''": loss_modulus.values.flatten(),
    }
)

summary_stats_targets_2c(targets).to_latex(
    buf=tab_dir + "/describe_targets_2c_2.tex",
    header=[
        r"$M_w^s$ [$\nicefrac{g}{mol}$]",
        "$PDI^s$",
        r"$M_w^l$ [$\nicefrac{g}{mol}$]",
        "$PDI^l$",
        "$\phi^l$",
    ],
    column_format="lrrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $M_w^s$, $PDI^s$, $M_w^l$, $PDI^l$, and $\phi^l$ target attributes ($\frac{M_w^l}{M_w^s}>PDI_{max}^{2}$ bimodal dataset)",
    label="tab:describe_targets_2c_2",
    position="htb",
)

summary_stats_features_2c(features).to_latex(
    buf=tab_dir + "/describe_features_2c_2.tex",
    header=["$G'$ [Pa]", "$G''$ [Pa]"],
    column_format="lrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption=r"Summary statistics for the $G'$ and $G''$ features ($\frac{M_w^l}{M_w^s}>PDI_{max}^{2}$ bimodal dataset)",
    label="tab:describe_features_2c_2",
    position="htb",
)

In [None]:
dual_hist_box_plot(
    df["M_W_S"], df["M_W_L"], "$M_w^s$ [$g/mol$]", "$M_w^l$ [$g/mol$]"
)
plt.savefig(
    fig_dir + "/box_hist_2c_2_mw.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["PDI_S"], df["PDI_L"], "$PDI^s$", "$PDI^l$")
plt.savefig(
    fig_dir + "/box_hist_2c_2_pdi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_hist_box_plot(df["phi_L"], None, "$\phi^l$", None)
plt.savefig(
    fig_dir + "/box_hist_2c_2_phi.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
storage_modulus = df.iloc[:, 5:75]
loss_modulus = df.iloc[:, 75:145]

feature_attrs_iqr_plot(storage_modulus, loss_modulus)
plt.savefig(
    fig_dir + "/feature_attrs_iqr_2c_2.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_log_boxplot(
    df.iloc[:, 5],
    df.iloc[:, 75],
    df.iloc[:, 62],
    df.iloc[:, 132],
    [-12, -10, -8, -6, -4, -2, 0, 2, 4],
    [5.1, 5.2, 5.3, 5.4, 5.5, 5.6],
    "Frequency = $10^{-6}$ $s^{-1}$",
    r"Frequency $\approx 10^{4}$ $s^{-1}$",
)
plt.savefig(
    fig_dir + "/boxplot_freq_2c_2.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
corr_plot(df.iloc[:, 5:].corr())
plt.savefig(
    fig_dir + "/corr_2c_2.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
df1 = pd.read_csv("data/flow_data_2c_1.5.csv")
df2 = pd.read_csv("data/flow_data_2c_2.csv")

In [None]:
fig, ax = plt.subplots()
fig.set_figheight(2.5)
fig.set_figwidth(7.5)
plt.subplot2grid(shape=(1, 2), loc=(0, 0), colspan=1)
plt.title(r"$\frac{M_w^l}{M_w^s}>PDI_{max}^{1.5}$", color=label_color, size=12)
datapoints_scatter(df1.sample(20000), dual=True, s=0.5, legend=False)
plt.subplot2grid(shape=(1, 2), loc=(0, 1), colspan=1)
plt.title(r"$\frac{M_w^l}{M_w^s}>PDI_{max}^{2}$", color=label_color, size=12)
datapoints_scatter(df2.sample(20000), dual=True, s=0.5, yaxis=False)
plt.subplots_adjust(wspace=0.019)
plt.savefig(
    fig_dir + "/datapoints_scatter_2c_1.5_2.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)