# Visualizing the log data:

#### Importing packages

In [None]:
from glob import glob
import warnings 
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from polyphys.visualize import tuner
from polyphys.visualize import plotter
from polyphys.manage.parser import SumRuleCyl, TransFociCyl

### Importing datasets

In [None]:
database = '/Users/amirhsi_mini/OneDrive - University of Waterloo/PhD Research/Jupyter/Datasets/'
project = 'TransFociCub'
df_path = database + project + "-allInOne/"
if project == 'HnsCub':
    df_path = database + project + "-ens1_2-allInOne/"
thermo = pd.read_parquet(
    df_path + "allInOne-" + project + "-thermo.parquet.brotli")
run_stat = pd.read_csv(df_path + "allInOne-" + project + "-runStat.csv")
wall_time = pd.read_csv(df_path + "allInOne-" + project + "-wallTimeStat.csv")

### Visualizing the run stats: 

In [None]:
## This is for SumRUleCyl project:
# droping prblematic (broken, restarted, faked) run stats and keep complete
# ones:
run_stat_comp = run_stat[run_stat.lineage_name.str.contains('ens[1-8]$|j0[12]$')]
run_stat_comp.reset_index(inplace=True,drop=True)
run_stat_comp
# keep data from the production phase in whic the loop_timesteps is equal to
# 5000000 ro larger than that:
#run_stat_prod = run_stat_comp[run_stat_comp.loop_timesteps >= 5000000]
sel_cols = ['Pair_total_pct', 'Bond_total_pct', 'Neigh_total_pct',
            'Comm_total_pct', 'Output_total_pct', 'Modify_total_pct',
            'Other_total_pct']
agg_funcs = {key: np.mean for key in sel_cols}
run_stats_per_cor_per_atoms = run_stat_prod.groupby(['n_cores','n_atoms'])[sel_cols].agg(agg_funcs)
run_stats_per_cor_per_atoms.reset_index(inplace=True)
run_stats_per_cor_per_atoms_per_cat = pd.melt(
    run_stats_per_cor_per_atoms,
    id_vars=['n_cores','n_atoms'],
    value_vars= sel_cols,
    var_name='category',
    value_name='pct_of_total_time'
)

In [None]:
sel_cols = ['Pair_total_pct', 'Bond_total_pct', 'Neigh_total_pct',
            'Comm_total_pct', 'Output_total_pct', 'Modify_total_pct',
            'Other_total_pct']
agg_funcs = {key: np.mean for key in sel_cols}
run_stats_per_cor_per_atoms = run_stat.groupby(['n_cores','n_atoms'])[sel_cols].agg(agg_funcs)
run_stats_per_cor_per_atoms.reset_index(inplace=True)
run_stats_per_cor_per_atoms_per_cat = pd.melt(
    run_stats_per_cor_per_atoms,
    id_vars=['n_cores','n_atoms'],
    value_vars= sel_cols,
    var_name='category',
    value_name='pct_of_total_time'
)

In [None]:
fontsize = 14
save_to = "./"
ext = 'pdf'
new_labels = ['Category', 'Pair', 'Bond', 'Neigh', 'Comm', 'Output', 'Modify', 'Other', '# of cores', '1', '4', '8', '16']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,4.5))
sns.set_context(
    font_scale=3,
    rc={
        'font.family': "Times New Roman",
        'mathtext.default': 'regular',
        "text.usetex": True,
        "font.size": fontsize
    }
)
ax_sns = sns.lineplot(
    x='n_atoms',
    y='pct_of_total_time',
    hue='category',
    style='n_cores',
    legend='full',
    markers=True,
    data=run_stats_per_cor_per_atoms_per_cat,
    ax=ax
)
ax_sns.set_xlabel(r"# of particles")
ax_sns.set_ylabel(r"% of the total time")
ax_sns.set(xscale='log')
#handles, labels = ax_sns.get_legend_handles_labels()
#ax_sns.legend(handles = handles, labels=new_labels, frameon=False)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
output = "-".join(["runStat", project]) + "." + ext
fig.savefig(save_to + output, bbox_inches='tight')

In [None]:
sel_cols = [ 'n_cores', 'n_atoms','timestep_sec','dangerous_builds']
agg_funcs = {}
agg_funcs['timestep_sec'] = np.mean
agg_funcs['dangerous_builds'] = np.sum

In [None]:
step_per_sec_stats = run_stat.groupby(['n_cores','n_atoms'])[sel_cols].agg(agg_funcs)
step_per_sec_stats.reset_index(inplace=True)

In [None]:
step_per_sec_stats['timestep_sec_core'] = \
    step_per_sec_stats['timestep_sec'] / step_per_sec_stats['n_cores']

In [None]:
fontsize = 14
save_to = "./"
ext = 'pdf'
#new_labels = ['Category', 'Pair', 'Bond', 'Neigh', 'Comm', 'Output', 'Modify', 'Other', '# of cores', '1', '4', '8', '16']
save_to = './'
ext = 'pdf'
plot_context = 'talk'
font_scale = 2
height = 6
aspect = 1.618
ylabel_pad = 50
rc_params= {
    'axes.facecolor': 'aliceblue',
    'mathtext.default': 'regular',
    'text.usetex': True,
    'axes.grid': True,
    'axes.grid.axis': 'both',
    #'axes.grid.which': 'both'
}
facet_kws = {
    'sharey': False,
    'sharex': False,
    'legend_out': True,        
}
fig_title_kws = {'fontsize': 34, 'x': 0.5, 'y': 0.93}
loc='lower left'
font_family = 'sans-serif'
move_legend_kws = {
    'ncol': 1,
    'bbox_to_anchor': (1., 0.95),
    'frameon': True,
    'facecolor': 'aliceblue',
    #'borderpad': 0.1,
    'markerscale': 1.5
}

sns.set_theme(
    context=plot_context,
    style='ticks',
  #  palette=color_palette,
    font='Times New Roman',
    font_scale=font_scale,
    rc=rc_params
)
stat_grid = sns.relplot(
    x='n_atoms',
    y='timestep_sec_core',
    col='n_cores',
    col_wrap=2,
  #  marker='s',
    data=step_per_sec_stats,
    legend='full',
    kind='line',
    height=height,
    aspect=aspect,
    facet_kws=facet_kws,
)
col_attr = 'n_cores'
attr_labels = {
    'n_cores': r"$n_{{core}}$"
}
stat_grid.set_xlabels(r"$n_{atoms}$")
stat_grid.set_ylabels(r"${steps}/{sec.n_{core}}$")
stat_grid.set_titles(attr_labels[col_attr] + r"$={col_name}$")
stat_grid.tight_layout(w_pad=0)
#sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
#output = "-".join(["runStat", project_name]) + "." + ext
#fig.savefig(save_to + output, bbox_inches='tight')

### Visualizing wall-time stats

In [None]:
wall_time.columns

In [None]:
wall_time_per_whole = wall_time.groupby(['whole'])['ensemble','whole','n_cores', 'n_atoms','wall_time_hr'].agg({'ensemble': 'last','whole':'last','n_cores':'last', 'n_atoms':'last', 'wall_time_hr': np.sum})
wall_time_per_whole.reset_index(inplace=True,drop=True)
wall_time_per_ensemble = wall_time_per_whole.groupby(['ensemble'])['ensemble','n_cores','n_atoms','wall_time_hr'].agg({'ensemble': 'last','n_cores':'last', 'n_atoms':'last', 'wall_time_hr': np.mean})
wall_time_per_ensemble.reset_index(inplace=True,drop=True)
wall_time_per_ensemble.sort_values('n_cores')
wall_time_per_ensemble_sorted = wall_time_per_ensemble.sort_values(by=['n_cores','n_atoms'])
wall_time_per_ensemble_sorted

### Filtered data based on n_cores

In [None]:
n_core = 32
filtered = wall_time_per_ensemble_sorted.loc[wall_time_per_ensemble_sorted['n_cores']==n_core,:]

fontsize = 12
save_to = "./"
ext = 'pdf'
#new_labels = ['Category', 'Pair', 'Bond', 'Neigh', 'Comm', 'Output', 'Modify', 'Other', '# of cores', '1', '4', '8', '16']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,6))
sns.set_theme(
    context='paper',
    style='ticks',
    font='Times New Roman',
    font_scale=2,
    rc={
        'mathtext.default': 'regular',
        "text.usetex": True,
        "font.size": fontsize,
        'axes.grid': True,
        'axes.grid.axis': 'both',
        'axes.grid.which': 'both'
    }
)
ax_sns = sns.regplot(
    x='n_atoms',
    y='wall_time_hr',
    marker='s',
    data=filtered,
    ax=ax
)

ax_sns.set_xlabel("Number of particles")
ax_sns.set_ylabel("Wall time (hours)")
#sns.move_legend(ax_sns, title="# of cores", loc= "upper left", bbox_to_anchor=(1, 1))
output = "-".join(["wallTimeStat", project, f'nCores{n_core}']) + "." + ext
fig.savefig(save_to + output, bbox_inches='tight')
#plt.close()

### Wall stats:

In [None]:
fontsize = 12
save_to = "./"
ext = 'pdf'
#new_labels = ['Category', 'Pair', 'Bond', 'Neigh', 'Comm', 'Output', 'Modify', 'Other', '# of cores', '1', '4', '8', '16']
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12,9))
sns.set_theme(
    context='paper',
    style='ticks',
    font='Times New Roman',
    font_scale=2,
    rc={
        'mathtext.default': 'regular',
        "text.usetex": True,
        "font.size": fontsize,
        'axes.grid': True,
        'axes.grid.axis': 'both',
        'axes.grid.which': 'both'
    }
)
ax_sns = sns.lineplot(
    x='n_atoms',
    y='wall_time_hr',
    style='n_cores',
    hue='n_cores',
    marker='s',
    data=wall_time_per_ensemble_sorted,
    ax=ax
)

ax_sns.set_xlabel("Number of particles")
ax_sns.set_ylabel("Wall time (hours)")
sns.move_legend(ax_sns, title="Number of cores", loc= "upper left", bbox_to_anchor=(1, 1))
output = "-".join(["wallTimeStat", project]) + "." + ext
fig.savefig(save_to + output, bbox_inches='tight')
#plt.close()

### Visualizing thermodynamic data

In [None]:
thermo.columns

In [None]:
#sel_cols = ['Step', 'TotEng', 'KinEng', 'Temp', 'PotEng', 'E_bond', 'E_angle',
#       'E_dihed', 'E_impro', 'E_vdwl', 'E_coul', 'E_long', 'Press',
#        'whole', 'ensemble_long', 'ensemble', 'space',
#        'dcyl', 'dmon_large', 'nmon_large',
#        'nmon_small', 'dcrowd', 'dt', 'bdump',
#       'adump', 'ensemble_id',  'phi_c_bulk'] # multi
thermo_style = 'one' # 'multi'
sel_cols = {
    'SumRuleCyl': {
        'one': [
            'Step', 'Temp', 'E_pair', 'E_mol', 'TotEng', 'Press',
            'lineage_name', 'whole', 'ensemble_long', 'ensemble',
            'space', 'nmon', 'epsilon', 'dcyl', 'lcyl', 'dcrowd',
            'ncrowd', 'dt', 'bdump', 'adump', 'ensemble_id',
            'phi_m_bulk', 'rho_m_bulk', 'phi_c_bulk', 'rho_c_bulk'
        ]
    },
    'HnsCub': {
        'one': ['Step',
            'KinEng', 'PotEng', 'E_tail', 'Ecouple', 'Econserve', 'E_vdwl',
            'E_bond', 'E_angle', 'E_mol', 'Temp', 'Press', 'nhns', 'dcrowd',
            'ensemble_id', 'phi_c_bulk', 'lineage_name', 'whole',
            'ensemble_long', 'space', 'E_pair', 'TotEng'
        ]
    }
}
thermo_equil = thermo.loc[:,sel_cols[project][thermo_style]]

In [None]:
round_to = 0.025
rounding_func = lambda x, round_to: np.round(np.rint((x / round_to)) * round_to, 3)
thermo_equil['phi_c_bulk_round'] = thermo_equil['phi_c_bulk'].apply(
    rounding_func, args=[round_to]
)

In [None]:
sns.relplot(
    x="Step",
    y="E_bond",
    col='phi_c_bulk_round',
    col_wrap=3,
    hue='ensemble_id',
    ci=None,
    alpha=0.5,
    legend="full",
    kind="line",
    facet_kws={"sharey": False},
    data=thermo_equil
)

In [None]:
sns.relplot(
    x="Step",
    y="Press",
    col='phi_c_bulk_round',
    col_wrap=3,
    hue='ensemble_id',
    ci=None,
    alpha=0.5,
    legend="full",
    kind="line",
    facet_kws={"sharey": False},
    data=thermos_equil
)

### Filter themrmo by time step

In [None]:
thermp_freq = 10000
equilibration_total_steps = 1000000
sampling_first_step = equilibration_total_steps + thermp_freq
#thermos_equil = thermos.loc[thermos["Step"]>=sampling_first_step,:]
thermos_equil.reset_index(inplace=True, drop=True)

## Not finished: 

In [None]:
colors = ['royalblue', 'firebrick']
lineage = 'whole'
geometry = 'biaxial'

if len(thermos) % 2 == 0:
    n_cols = len(thermos)//2
else:
    n_cols = len(thermos)//2 + 1
plt.rcParams.update({
    "text.usetex": True, 
})
mpl.rcParams['font.family'] = "Times New Roman"
mpl.rcParams['mathtext.default'] = "regular"
plt.rcParams['font.size'] = 14
fig, axes = plt.subplots(2, n_cols, sharex=False, figsize=(9,12))
xdata ='step'
ydata= 'press'
thermp_freq = 10000
equilibration_steps = 1000000
sampling_first_step = equilibration_steps + thermp_freq
sampling_first_row = sampling_first_step // thermp_freq
for idx, (ax, color, (data_path, data_org)) in enumerate(zip(axes.flat, colors, thermos.items())):
    data_info = TransFoci(
            data_path,
            geometry=geometry,
            group='bug',
            lineage=lineage
        )
    data = data_org.copy()
    data.drop_duplicates(inplace=True)
    data.reset_index(inplace=True, drop=True)
    #data = data.iloc[sampling_first_row:,:]
    y_mean = data.loc[sampling_first_row:,ydata].mean()
    ax.set_title(fr"$\phi_c^{{bulk}}={np.round(data_info.phi_c_bulk,3)}$")
    ax.axhline(
        y=y_mean, alpha=0.8, ls='--',
        c=color, 
        label=fr"$\bar{{P}}={np.round(y_mean,3)}$"
        )
    ax.plot(
        data.loc[sampling_first_row:,xdata],
        data.loc[sampling_first_row:,ydata], color=color, alpha=0.7,
            label=r"$P(t)$"
    )    
    ax.set_ylabel(r"Pressure, $P(t)$")
    #ax.set_xlabel(r"Time, ${t}/{\Delta t}$")
    ax.set_xlabel(r"Time Step")
    ax.legend()
fname = ydata+"-time-"+data_info.space+"."+'pdf'
fig.tight_layout()
plt.savefig(fname, bbox_inches='tight')
#plt.close()

In [None]:
_, axes = plt.subplots(nrows=2, ncols=1, figsize=(16, 9))
for idx, (ax, color, (data_path, data_org)) in enumerate(zip(axes.flat, colors, thermos.items())):
    #transition_time = 5500
    data = data_org.copy()
    data.drop_duplicates(inplace=True)
    data.reset_index(inplace=True)
    #Use of the following options:
    #data = gyr_t[:transition_time] # before transition at t~5500
    data = data['eVdwl'].to_numpy() # after transition at t~5500
    data = data[101:]
    #data = gyr_t # whole date
    result = analyzer.error_calc_block(data, './block_analysis') 
    #ax.plot(result['si'])
    ax.grid(True, which="both")
    ax.errorbar(result['ntransfroms'], result['si'], yerr=result['si_err'], fmt='--o')
    ax.set_xlabel(r"Number of transformation, $n_{block}$")
    ax.set_ylabel(r"Statistical inefficiency, $s(n_{block})$")

In [None]:
colors = ['royalblue', 'firebrick']
lineage = 'whole'
geometry = 'biaxial'

if len(thermos) % 2 == 0:
    n_cols = len(thermos)//2
else:
    n_cols = len(thermos)//2 + 1
plt.rcParams.update({
    "text.usetex": True, 
})
mpl.rcParams['font.family'] = "Times New Roman"
mpl.rcParams['mathtext.default'] = "regular"
plt.rcParams['font.size'] = 14
fig, axes = plt.subplots(2, n_cols, sharex=False, figsize=(9,12))
xdata ='step'
ydata= 'press'
thermp_freq = 10000
equilibration_steps = 1000000
sampling_first_step = equilibration_steps + thermp_freq
sampling_first_row = sampling_first_step // thermp_freq
for idx, (ax, color, (data_path, data_org)) in enumerate(zip(axes.flat, colors, thermos.items())):
    data_info = TransFoci(
            data_path,
            geometry=geometry,
            group='bug',
            lineage=lineage
        )
    data = data_org.copy()
    data.drop_duplicates(inplace=True)
    data.reset_index(inplace=True, drop=True)
    #data = data.iloc[sampling_first_row:,:]
    y_mean = data.loc[sampling_first_row:,ydata].mean()
    ax.set_title(fr"$\phi_c^{{bulk}}={np.round(data_info.phi_c_bulk,3)}$")
    correlations(data.loc[sampling_first_row:,ydata], lags=40)
    ax.set_ylabel(r"Pressure, $P(t)$")
    #ax.set_xlabel(r"Time, ${t}/{\Delta t}$")
    ax.set_xlabel(r"Time Step")
    ax.legend()
fname = ydata+"-time-"+data_info.space+"."+'pdf'
fig.tight_layout()
plt.savefig(fname, bbox_inches='tight')
#plt.close()