# Imports

In [None]:
#export
import os
import sys
sys.path.append(os.path.join(os.getcwd(),'exp'))

import json
import yaml
import datetime
import collections
from pathlib import Path
from collections import OrderedDict

import numpy as np
import pandas as pd
from nb_base_exp import *#BaseExperiment, BaseProject, load_txt_log
from config import cfg

In [None]:
import plotly.express as px
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# Code

In [None]:
#export
def load_log(path):
    columns = ['date','epoch','acc','val_acc']
    types = [str, np.int32, np.float32, np.float32]
    log_path = list(path.rglob('*/*/log.txt'))[0]

    _, logs = load_txt_log(path=log_path, types=types)

    df = pd.DataFrame(logs, columns=columns)
    df.dropna(inplace=True)
    df = df.astype(dict(zip(columns, types)))

    df['val_acc'].replace(-1., np.NaN, inplace=True)
    df.fillna(method='ffill', inplace=True)
    df.set_index('epoch', inplace=True)
    timeDF=(pd.to_datetime(df['date'].str.strip(), format='%H:%M:%S'))
    del df['date']# = timeDF
    df['duration'] = timeDF - timeDF[0]
    return df

def load_yaml_cfg(path):
    params_path = list(path.rglob('*/*/train.yaml'))[0]
    with open(params_path, 'r') as f:
        d = yaml.safe_load(f)
    d = flatten(d, sep='|')
    d = OrderedDict(sorted(d.items(), key=lambda x:x[0], reverse=False))
    return d

def load_json_cfg(path):
    params_path = list(path.rglob('*/*/params.json'))[0]
    with open(params_path, 'r') as f:
        d = json.load(f)
    d = flatten(d, sep='|')
    d = OrderedDict(sorted(d.items(), key=lambda x:x[0], reverse=False))
    return d

def load_cfg(*args, **kwargs):
    return load_json_cfg(*args, **kwargs)

def check_valid(cpath):
    flag_files = set(['log.txt', 'params.json'])
    where_to_look = 'output'
    output = list(Path(cpath).rglob(where_to_look))
    files=[]
    if output:
        files = list(output[0].glob('*/*'))

    return flag_files.issubset(set([f.name for f in files]))



In [None]:
#export
class GenExperiment(BaseExperiment):
    def __init__(self,  path):
        super(GenExperiment, self).__init__(path, log_reader=load_log, cfg_reader=load_cfg)
        self.start_time = self.parse_name()

    def parse_name(self, prefix='run_'):
        date = self.name.strip(prefix)#.rstrip('.lock')
        start_time = datetime.datetime.strptime(date, '%Y_%b_%d_%H_%M_%S_%f')
        return start_time

    def total_time(self):
        return self.log_data['duration'][self.__len__()-1]
    
class GenProject(BaseProject):
    def __init__(self,  root):
        super(GenProject, self).__init__(root, valid_func=check_valid, Experiment=GenExperiment)
    
    def extend_base(self):
        dd = {}
        for run in self.exps:
            run_best = run.best(num=3, col='val_acc')
            #run_best_pct = 100 - int( 100 * (run.total_time() - dur) / run.total_time())
            d = {}
            d['start'] = run.start_time
            d['name'] = run.name
            d['tt'] = run.total_time()
            d['t2b'] = run_best['duration'].values[0]
            d['ba'] = round(run_best.mean()[['acc']].values[0],3)
            d['bva'] = round(run_best.mean()[['val_acc']].values[0],3)
            d['la'] = round(run.log_data['acc'][-10:].mean(),3)
            #d.update(**run.params)
            dd[run.name] = d
        #res_df = pd.DataFrame(results, columns=['Name','duration', 'LRM', 'best_acc', 'best_val', 'best_pct', *run.params.keys()])

        df = pd.DataFrame(dd).T
        df = pd.concat([df,self.base_table()], axis=1)
        df.set_index('start', drop=True, inplace=True)
#         for x in ['tt','t2b']:
#             df[x] = pd.DatetimeIndex(df[x]).strftime("%H:%M:%S")
            
        df['ba'] = df['ba'].astype(float)
        df['la'] = df['la'].astype(float)
        df['bva'] = df['bva'].astype(float)
        df = df.round(4)
        
        
        return df

In [None]:
def plot(cols=None, window=1,limits=[-1,-1], ax=None, x=None):
        if ax is None:
            f,ax = plt.subplots(1)
        if cols is None:
            cols = ['acc']
        if x is not None:
            cols.append(x)
            
        data = e.log_data
        llim, rlim = limits
        rlim = len(data) if rlim <=0 else rlim
        llim = max(0, llim)
        
        
        d = data.iloc[llim:rlim,:]
        d = d.loc[:,cols]
        for c in cols:
            if c != 'duration':
                d[c] = d[c].rolling(window).mean() 
        
        d.plot(figsize=(12,7), ax=ax, x=x)
        
def mod_df(df, col='acc', window=1, div=1):
    d = df.loc[:, [col, 'duration']]
    s = d['duration'].apply(lambda x: x.seconds)/div
    d.loc[:,'duration'] = s
    
    s = d[col].rolling(window).mean()
    d.loc[:,col] = s
    return d

def plot_run(run, col, ax,  window=5):
    d = mod_df(run.log_data, col=col, window=window)
    d.plot('duration', ax=ax)

# Tests

In [None]:
root = Path(cfg.DAG.RUNS)
#root = Path('../__crsch_cycle/cycler_runs/')

bp = GenProject(root)
df = bp.extend_base()

In [None]:
#df.sort_index(ascending=False).head(5)
df.drop(['name'], axis=1).sort_values('bva', ascending=False).head(10)

In [None]:
s = df['bva']
print(s.mean())
s = s.sort_index()
plt.plot(s.rolling(3).mean().values)

In [None]:
df['tt'].sum()

In [None]:
# df = df[df['ba']>.3]
# df.shape

In [None]:
# df = df[df['crossover_chance']==1.99]
# df.shape
# names = df.sort_index()['name'][:30].values
# p = Path('../__crsch_cycle/cycler_runs/')
# for name in names:
#     pi = p/name
#     shutil.rmtree(pi)

In [None]:
k = 'bva'
tdf = df.sort_index(ascending=False)#[:80]
fig = px.parallel_coordinates(tdf, color=k,
              dimensions=['genom|combine_chance', 'genom|crossover_chance', 'genom|mutate_chance', 'post|exp_power',
                         #'dec_f0','dec_f1','dec_f2','dec_f3',
                          'bva'],
              color_continuous_scale=px.colors.diverging.Tealrose,
              range_color=[0.45,df[k].max()],
              color_continuous_midpoint=.5)
fig.show()

In [None]:
plot_df = df[:1]
top = df.sort_values(by='bva', ascending=False)[16:18]
plot_df = pd.concat([plot_df,top])

In [None]:
rs = bp.exps
f, a = plt.subplots(1, figsize=(15,8))
#l_lim, r_lim = 0, 1000

rsp = [r for r in rs if r.name in plot_df.name.values]

[plot_run(r, col='acc', ax=a, window=25) for r in rsp]

a.legend([r.name for r in rsp])
#a.legend(['logs'])

#a.set_xticks(np.arange(l_lim, 700, 10))
#a.set_yticks(np.arange(0.1, .9, 0.05))
plt.ylim(0.0,.6)
plt.grid()

# More tests

In [None]:
root = Path('./results/bo3/')
bp = GenProject(root)
df = bp.extend_base()

In [None]:
df.sort_values('bva', ascending=False).head(5)

In [None]:
df.shape

In [None]:
class HyperSet:
    def  __init__(self, cfg, param_list, target):
        self.cfg = cfg
        self.params = param_list
        self.target = target
        
    def read_cfg(self, map_names):
        if map_names is None: map_names = {p:p for p in self.params}
            
        points = {}
        for p in self.params:
            p_mapped = map_names[p]
            points[p_mapped] = self.cfg[p]
        return points
    
    def create_record(self, map_names=None):
        return {
            'points':self.read_cfg(map_names),
            'target':self.target
        }

In [None]:
base_e = bp.exps[0]
base_e.cfg_data

In [None]:
from exp.nb_base_exp import flatten

In [None]:
pairs = [('TRAIN|DS', ['data1']), ('TRAIN|LR',1e-5), ('MODEL|ARCH|n1', 8)]
sep = '|'

d = {}
for p in pairs:
    di = create_nested(defaultdict(dict), *p)
    dict_merge(d, di)
d

In [None]:
with open('./train.yaml', 'r') as f:
    d = yaml.safe_load(f)

In [None]:
flat_d = flatten(d, sep='|')

In [None]:
k, v = list(flat_d.items())[17]
k, v

In [None]:
with open('./train_mod.yaml', 'w') as f:
    yaml.safe_dump(d, f)

In [None]:
bo_params = ['combine_chance', 'crossover_chance', 'mutate_chance']
lat_names = ['cc', 'cr','mc']
map_names = dict(zip(bo_params,lat_names))

bo_target = 'val_acc'

hps = []
for e in bp.exps:
    target = e.log_data[bo_target].max()
    cfg = e.cfg_data
    hp = HyperSet(cfg, bo_params, target)
    rec = hp.create_record(map_names)
    hps.append(rec)


In [None]:
hps

In [None]:
hp_warm = []
for e in bp.exps:
    target = e.log_data[bo_target].max()
    points = {}
    for p in bo_params:
        points[p] = e.cfg_data[p]
    hp_warm.append({'target': target,
                    'points': points})

In [None]:
hp_warm

# Exports

In [None]:
!python3 extra/n2s.py gen_exp.ipynb 