In [None]:
import os
import matplotlib
import plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
DOE_NAME = "doe4"
factors = ["max_time", "batch_size", "seq_len", "seq_est_len",  "est_type"]
response = "FIT"

In [None]:
df_res = pd.read_csv(DOE_NAME + "_res.csv")  # doe1_res.csv

In [None]:
df_res.sort_values(by=response, inplace=True, ascending=False)
df_res["RMSE"] = df_res["RMSE"].fillna(1000)
#df_res["FIT"] = df_res["FIT"] * (df_res["FIT"] > 0) # minimum fit to 0% (easier to interpret)
df_res["FIT"] = df_res["FIT"].fillna(np.min(df_res["FIT"])-0.1)

In [None]:
for factor in factors:
    df_res[factor] = df_res[factor].astype("category")

In [None]:
df_res.head(20)

In [None]:
df_res.tail(20)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_res, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

# Biggest effect seems to be max_time

In [None]:
# Full results: main effects
g = sns.PairGrid(df_res, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(0, 100))
sns.despine(fig=g.fig, left=True)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_res, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(80, 100))
sns.despine(fig=g.fig, left=True)

In [None]:
fig = make_subplots(rows=1, cols=5)
for idx, factor in enumerate(factors):
    aa=fig.add_box(y=df_res["FIT"], x=df_res[factor], row=1, col=idx+1, name=factor)#, label="a") 
    fig.update_yaxes(range=[80, 100], row=1, col=idx+1)
fig.show()

In [None]:
#fig.select_xaxes

In [None]:
df_res.groupby("seq_len")["FIT"].agg(["median", "mean", "std"])
#df_res.groupby("lr")["FIT"].agg(["mean", "std"])

In [None]:
#pd.pivot_table(df_res, index=["seq_len"], columns=["est_type"])

In [None]:
fig = px.scatter(df_res, y="FIT",
                 facet_col="est_type", facet_row="est_direction", color="seq_len",
                 hover_data=["max_time", "seq_est_len"])
fig.show()

In [None]:
df_30min = df_res[(df_res["max_time"] != 300)]

In [None]:
# Full results: main effects
g = sns.PairGrid(df_30min, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
df_zero= df_res[(df_res["max_time"] != 300) & ((df_res["est_type"] == "ZERO"))]

In [None]:
# Full results: main effects
g = sns.PairGrid(df_zero, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
df_5min = df_res[(df_res["max_time"] == 300)]

In [None]:
# Full results: main effects
g = sns.PairGrid(df_5min, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
df_good = df_res[(df_res["max_time"] != 300) & (df_res["est_type"]== "FF")]

In [None]:
# Full results: main effects
g = sns.PairGrid(df_good, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
fig = px.scatter(df_30min, y="FIT",
                 facet_col="est_type", facet_row="est_direction", color="seq_len",
                 hover_data=["max_time", "seq_est_len"])
fig.show()

In [None]:
# Full results: main effects
g = sns.PairGrid(df_30min, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_res, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(80, 100))
sns.despine(fig=g.fig, left=True)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_30min, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(90, 100))
sns.despine(fig=g.fig, left=True)

In [None]:
df_5min.head(20)

In [None]:
df_short_est = df_res[(df_res["seq_est_len"] == 10)]
df_short_est.head(20)

In [None]:

# Full results: main effects
g = sns.PairGrid(df_short_est, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.pointplot, scale=1.3)
sns.despine(fig=g.fig, left=True)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_short_est, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(90, 100))
sns.despine(fig=g.fig, left=True)

In [None]:
df_short_train = df_res[(df_res["seq_len"] == 40) & (df_res["seq_est_len"] == 10)]
df_short_train.head(20)

In [None]:
# Full results: main effects
g = sns.PairGrid(df_short_train, y_vars=response,
                 x_vars=factors,
                 height=5, aspect=.5)
g.map(sns.boxplot)
g.set(ylim=(90, 100))
sns.despine(fig=g.fig, left=True)