In [None]:
from glob import glob
import pandas as pd
import seaborn as sns
from PipeLine import *

## Analysis

In [None]:
database = '/Users/amirhsi_mini/logs/'
log_groups = glob(database+"/N*-logs/")
geometry = 'cylindrical'

In [None]:
# analyze log files
for log_group in log_groups:
    log_files = glob(log_group+'/N*.log')
    log_files = PipeLine.file_reader(log_files,extensions=['.log'])
    sample_log = log_files[0][0].split("/")[-1]
    details_out , runtime_out = PipeLine.log_outputs(sample_log, geometry)
    PipeLine.lammps_log_details(log_files, details_out , runtime_out)

# Viz

In [None]:
log_pairs = glob('../sumrule_logs_csvs/*.csv')
log_pairs = PipeLine.file_reader(log_pairs,extensions=['_runtime.csv','_details.csv'])

In [None]:
runtime_dfs = []
details_dfs = []
for log_pair in log_pairs:
    runtime_df = pd.read_csv(log_pair[0])
    runtime_df.wall_time = pd.to_timedelta(runtime_df.wall_time)
    runtime_df['total_time_hr'] = runtime_df.wall_time.dt.total_seconds() / 3600
    runtime_dfs.append(runtime_df)
    
    details_df = pd.read_csv(log_pair[1])
    details_dfs.append(details_df)
        
df_of_runtimes = pd.concat(runtime_dfs)
df_of_runtimes.to_csv('log-runtime-all_in_one.csv')
df_of_details = pd.concat(details_dfs)
df_of_details.to_csv('log-details-all_in_one.csv')

In [None]:
log_stat = df_of_runtimes.groupby('ncores').agg(['mean','median','std','min','max','count'])
log_stat.columns = ['_'.join(col).strip() for col in log_stat.columns.values]
log_stat.reset_index(inplace=True)
log_stat['natoms_median_per_core'] = log_stat['natoms_median']/log_stat['ncores']
log_stat['std_percentage'] = log_stat['total_time_hr_std']/log_stat['total_time_hr_mean']
log_stat.to_csv('log_stat_summary.csv')

In [None]:
data = df_of_runtimes
#fig, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(6, 6))
facet_grid = sns.lmplot(x="natoms", y="total_time_hr", hue='ncores', data=data,legend_out=False)#,height=4, aspect=16/9);
facet_grid.tight_layout()
facet_grid.set_axis_labels("Particles","Wall time (hours)")
facet_grid.legend.set_title("CPUs")
facet_grid.set(xlim=(0, 200000), ylim=(0, 45))
#plt.legend(title="CPUs", loc='upper right')
xlabels = ['{:,.0f}'.format(x) + 'K' for x in facet_grid.ax.get_xticks()/1000]
facet_grid.set_xticklabels(xlabels)
output="../walltime_vs_natoms.pdf"
facet_grid.savefig(output)

g = sns.FacetGrid(data, col="ncores",sharex=False,sharey=False)
g.map(sns.regplot, "natoms", "total_time_hr")
g.set_titles(col_template="{col_name} CPUs")
g.set_axis_labels("Particles" ,"Wall time (hours)")
#g.axes[0][0].set_xlim((1000, 5000))
#g.axes[0][1].set_xlim((5000, 25000))
#g.axes[0][2].set_xlim((25000, 50000))
#g.axes[0][3].set_xlim((50000, 100000))
for i in range(6):
    xlabels = ['{:,.0f}'.format(x) + 'K' for x in g.axes[0][i].get_xticks()/1000]
    g.axes[0][i].set_xticklabels(xlabels)


#g.axes[0][4].set_xlim((100000,150000))
g.axes[0][0].set_xlim((1000, 5000))
g.tight_layout()
output="../walltime_vs_natoms_separate.pdf"
g.savefig(output)