In [1]:
import numpy as np
import pandas as pd
import glob
import sys,os
sys.path.append('..')
sys.path.append('../../../../utils')
sys.path.append('../../../../third')
from multiprocessing import Pool, Manager, cpu_count 
import pymp
import qgrid
import plotly.express as px
import gezi
from gezi import tqdm, line
tqdm.pandas()
from IPython.display import display

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:10000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

In [3]:
def gen_df(root):
  dfs= Manager().list()
  pattern = f'{root}/*/metrics.csv'
  files = glob.glob(pattern)
  if not files:
    return None
  files = sorted(files, key=lambda x: os.path.getmtime(x))
  ps = min(len(files), cpu_count())
  with pymp.Parallel(ps) as p:
#     for i in tqdm(p.range(len(files)),desc='gen_df'):
    for i in p.range(len(files)):
      file = files[i]
      if not gezi.non_empty(file):
        continue
      df = pd.read_csv(file)
      df['model'] = os.path.basename(os.path.dirname(file))
      df['mtime'] = os.path.getmtime(file)
      df['ctime'] = os.path.getctime(file)
      df['step'] = [x + 1 for x in range(len(df))]
      dfs.append(df)
  df = pd.concat(list(dfs))
  return df

In [4]:
def gen_history(root):
  dfs= Manager().list()
  pattern = f'{root}/*/history.csv'
  files = glob.glob(pattern)
  if not files:
    return None
  files = sorted(files, key=lambda x: os.path.getmtime(x))
  ps = min(len(files), cpu_count())
  with pymp.Parallel(ps) as p:
#     for i in tqdm(p.range(len(files)),desc='gen_history', leave=False):
    for i in p.range(len(files)):
      file = files[i]
      if not gezi.non_empty(file):
        continue
      df = pd.read_csv(file)
      df['model'] = os.path.basename(os.path.dirname(file))
      df['mtime'] = os.path.getmtime(file)
      df['ctime'] = os.path.getctime(file)
      df['step'] = [x + 1 for x in range(len(df))]
      dfs.append(df)
  df = pd.concat(list(dfs))
  return df

In [25]:
v = 34
mark = 'offline'
# mark = 'online'

In [26]:
root = f'../working/{mark}/{v}'

In [27]:
# history = gen_history(root)

In [28]:
def show_loss():
  return history.groupby(['step', 'model'])['val_loss'] \
    .aggregate(np.mean).reset_index() \
    .pivot('step', 'model', 'val_loss')

In [29]:
# show_loss()
models = []

In [30]:
def show(key, action='score', num=100, step=4):
  metric = f'{key}/{action}'
  d = df
  if models:
    d = d[d.model.isin(models)]
  figs = line(d, metric, x='step', color='model', return_figs=True)
  if step:
    d = d[d.step == 4]
  res = d.groupby(['step', 'model'])[metric] \
      .aggregate(np.mean).reset_index() \
      .pivot('step', 'model', metric)
  for fig in figs:
    display(fig)
  d = d[['model', metric]].sort_values([metric], ascending=True).reset_index(drop=True)
  d = d.head(num)
  display(d)
  d[f'{metric}_str'] = ['%.4f' % x for x in d[metric].values]
  display(px.bar(d, x='model', y=metric,  color='model', text=f'{metric}_str'))
  return res

In [31]:
df = gen_df(root)

In [32]:
# models = ['doc', 'feed', 'feed.freeze']
models = []
show('all')

Unnamed: 0,model,all/score
0,pos.action,0.680845
1,pos.action.neg.finish,0.681274
2,pos,0.682432
3,action,0.682843
4,pos.action.neg.finish.today,0.68355
5,baseline,0.683701
6,pos.action.neg,0.685866


model,action,baseline,pos,pos.action,pos.action.neg,pos.action.neg.finish,pos.action.neg.finish.today
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4,0.682843,0.683701,0.682432,0.680845,0.685866,0.681274,0.68355


In [33]:
show('hotdoc')

Unnamed: 0,model,hotdoc/score
0,pos,0.681721
1,pos.action,0.682009
2,pos.action.neg.finish,0.682037
3,action,0.682392
4,baseline,0.683742
5,pos.action.neg.finish.today,0.683755
6,pos.action.neg,0.685326


model,action,baseline,pos,pos.action,pos.action.neg,pos.action.neg.finish,pos.action.neg.finish.today
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4,0.682392,0.683742,0.681721,0.682009,0.685326,0.682037,0.683755


In [34]:
show('colddoc')

Unnamed: 0,model,colddoc/score
0,pos.action.neg.finish,0.68409
1,action,0.688053
2,pos.action.neg.finish.today,0.690738
3,pos.action,0.691285
4,pos.action.neg,0.694584
5,pos,0.697374
6,baseline,0.697567


model,action,baseline,pos,pos.action,pos.action.neg,pos.action.neg.finish,pos.action.neg.finish.today
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4,0.688053,0.697567,0.697374,0.691285,0.694584,0.68409,0.690738
