In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from utils.params import fig_dir, tab_dir, blue_main_color
from utils.tables import summary_stats_1c
from utils.charts import (
    dual_hist_box_plot,
    feature_attrs_iqr_plot,
    dual_log_boxplot,
    corr_plot,
)
%config InlineBackend.figure_format ='retina'
mpl.style.use("ggplot")

In [None]:
df = pd.read_csv("data/flow_data_1c.csv")
df.shape

In [None]:
targets = df.iloc[:, :2]
storage_modulus = df.iloc[:, 2:42]
loss_modulus = df.iloc[:, 72:112]
features = pd.DataFrame(
    {
        "G'": storage_modulus.values.flatten(),
        "G''": loss_modulus.values.flatten(),
    }
)

summary_stats_1c(targets, features).to_latex(
    buf=tab_dir + "/describe_1c_diff.tex",
    header=[r"$M_w$ [$\nicefrac{g}{mol}$]", "$PDI$", "$G'$ [Pa]", "$G''$ [Pa]"],
    column_format="lrrrr",
    index=True,
    escape=False,
    bold_rows=True,
    caption="Summary statistics for the $M_w$ and $PDI$ target attributes, as well as the $G'$ and $G''$ features (unimodal dataset)",
    label="tab:describe_1c",
    position="htb",
)

In [None]:
feature_attrs_iqr_plot(storage_modulus, loss_modulus)
plt.savefig(
    fig_dir + "/feature_attrs_iqr_1c.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)

In [None]:
dual_log_boxplot(
    df.iloc[:, 2],
    df.iloc[:, 72],
    df.iloc[:, 59],
    df.iloc[:, 129],
    [-12, -10, -8, -6, -4, -2, 0, 2, 4],
    [5.1, 5.2, 5.3, 5.4, 5.5, 5.6],
    "Frequency = $10^{-6}$ $s^{-1}$",
    r"Frequency $\approx 10^{4}$ $s^{-1}$",
)
plt.savefig(
    fig_dir + "/boxplot_freq_1c.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
corr_plot(df.iloc[:, 2:].corr())
plt.savefig(
    fig_dir + "/corr_1c.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
dual_hist_box_plot(df["M_W"], df["PDI"], "$M_w$ [$g/mol$]", "$PDI$")
plt.savefig(
    fig_dir + "/box_hist_1c.png", dpi=300, bbox_inches="tight", pad_inches=0
)

In [None]:
df = df.sample(40000)

In [None]:
fig, ax = plt.subplots()
fig.set_figheight(5)
fig.set_figwidth(7.5)
plt.scatter(
    df["M_W"], df["PDI"], s=1.5, color=blue_main_color, edgecolors="none"
)
plt.xlabel("$M_w$ [$g/mol$]")
plt.ylabel("$PDI$")
plt.grid(b=False)
ax.set_xmargin(0.01)
ax.set_ymargin(0.017)
plt.savefig(
    fig_dir + "/datapoints_scatter_1c.png",
    dpi=300,
    bbox_inches="tight",
    pad_inches=0,
)