In [None]:
import os
from glob import glob
import json
import random

import numpy as np
import pandas as pd
import torch
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

%matplotlib inline

In [None]:
def create_df(files, v_tag, t_tags, extract_func, drop):
    df = []
    for exp_path in files:
        # load config.json
        cfg_path = os.path.dirname(exp_path) + '/1/config.json'
        f = open(cfg_path)
        cfg = json.load(f)
        f.close()

        # extract config
        cfg_dict = {}
        cfg_dict.update(cfg['train_cfg'])
        cfg_dict.update(cfg['meta_cfg'])

        # load run.json
        run_path = os.path.dirname(exp_path) + '/1/run.json'
        f = open(run_path)
        run = json.load(f)
        f.close()
        if run['status'] != "COMPLETED":
            print(cfg_path)
            continue

        # extract model name
        if (cfg_dict['model'] == 'DAN_sim' or cfg_dict['model'] == 'DAN_alt') and cfg_dict['alpha'] == 0:
            cfg_dict['model'] = 'CNN'
        elif cfg_dict['model'] in ['DAN_sim', 'DAN_alt']:
            cfg_dict['model'] = 'DAN'
        elif cfg_dict['model'] == 'AFLAC'  and cfg_dict['p_d'] == 'dependent_y':
            pass
        elif cfg_dict['model'] == 'AFLAC':
            cfg_dict['model'] = 'AFLAC-Abl'
        elif cfg_dict['model'] == 'CIDDG':
            pass
        else:
            continue

        # extract scores
        event = EventAccumulator(exp_path)
        event.Reload()
        if v_tag in event.Tags()['scalars']:
            # add validation
            w_times, step_nums, vals = zip(*event.Scalars(v_tag))
            cfg_dict[v_tag] = max(vals) * 100
            idx_by_vtag = np.argmax(vals)

            # add scores
            cfg_dict = extract_func(event, cfg_dict, v_tag, t_tags, idx_by_vtag)
        df.append(cfg_dict)

    df = pd.io.json.json_normalize(df)
    df = df.fillna('NA')
    drop = list(set(df.columns) & set(drop))
    df = df.drop(drop, axis=1)
    return df

def summary(df, v_tag, t_tags):
    tags_tv = t_tags + [v_tag]
    tags_tv = list(set(tags_tv))
    groupby = list(df.columns.drop(tags_tv + ['seed']))
    df[tags_tv] = df[tags_tv].astype(np.float)
    summary = df.groupby(groupby)[tags_tv].agg([np.mean, 'sem']).sort_values((v_tag, 'mean'), ascending=False)
    summary = summary.reset_index().drop_duplicates(subset=['model'])
    return summary


def extract_by_validation(event, cfg_dict, v_tag, t_tags, idx):
    for t_tag in t_tags:
        w_times, step_nums, vals = zip(*event.Scalars(t_tag))
        cfg_dict[t_tag] = vals[idx] * 100
    return cfg_dict


def result(df_sum, v_tag, t_name):
    df_sum = df_sum.set_index('model').drop(['alpha', v_tag], axis=1).round(1)
    df_sum = df_sum.reindex(['CNN', 'DAN', 'CIDDG', 'AFLAC-Abl', 'AFLAC'])
    df_sum = df_sum.T.reset_index().T
    df_sum.columns = df_sum.loc['level_1']
    df_sum =  df_sum.drop(['level_0', 'level_1'])
    df_sum = df_sum.groupby([t_name, t_name], axis=1).apply(lambda x: x.astype(str).apply('±'.join, 1)).T
    df_sum.columns.name = ''
    df_sum.index.name = 'Target'
    return df_sum

def aggregate(results):
    df_result = pd.concat(results)
    df_result_ = df_result.applymap(lambda x: x.split('±')[0]).copy()
    df_result_ = df_result_.astype(float)
    df_result.loc['Avg'] = df_result_.mean(axis=0).round(1)
    return df_result

drop = ['batch_size', 'lr', 'dataset_class.py/type', 'dataset_name', 'n_iter', 'train.py/function', 
        'validation', 'alpha_scheduler', 'lr_scheduler', 'biased', 'weight_decay', 'gpu', 
        'num_train_d', 'n_checks', 'test_key', 'p_d', 'num_train_e']

In [None]:
results = []
keys = ['0', '15', '30', '45', '60', '75']
for key in keys:
    files = glob('logs/MNISTR'+key+'1_BM/*/*events*')
    tags = ['MNISTR_'+key]
    v_tag = 'MNISTR_'+key+'_y_acc_valid'
    t_tags =  ['MNISTR_'+key+'_y_acc_test']
    df = create_df(files, v_tag, t_tags, extract_by_validation, drop)
    df_sum = summary(df, v_tag, t_tags)
    results.append(result(df_sum, v_tag, 'M' + key))

df_result = aggregate(results)
df_result['Dataset'] = 'BMNISTR-1'
bmnistr1 = df_result.reset_index().set_index(['Dataset', 'Target'])
bmnistr1