In [1]:
#libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
import torch
from STOMPnet import STOMPnet
from utils import MultiChannelNet
import os
import h5py
import yaml
import json

This notebook loads all objects produced by the program with the following hashes

In [2]:
data_hash ='a505fc258b'
train_hash = '8058f0b1ba'

Set paths

In [13]:
outdir = 'output/'
data_dir = f'data_{data_hash}/'
data_filename = os.path.join(outdir, data_dir, 'data.h5')
config_filename = os.path.join(outdir, data_dir, 'config.yaml')
save_dir = os.path.join(outdir, data_dir, 'training_results/')
train_info_dir = os.path.join(save_dir, train_hash)

Load dataset objects

In [22]:
# load the hdf data (one file for each seed)
with h5py.File(data_filename, 'r') as f:
    datasets = {}
    for group_name, group in f.items():
        datasets[group_name] = {key: np.array(
            value) for key, value in group.items()}
# load the config file
with open(config_filename, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
    print(json.dumps(config['file_attrs'],sort_keys=True, indent=4))

{
    "A": 2,
    "K": 3,
    "M": 2,
    "N": 4,
    "T": 128,
    "action_selection_method": "greedy",
    "corr": 1.0,
    "ensemble": "sum",
    "env": false,
    "ground_model_name": "bitpop",
    "num_episodes": 1,
    "num_seeds": 10,
    "output": "output/",
    "sps": 16,
    "stablefac": 8.0,
    "timestamp": "20240326_164234"
}


Load training objects

In [28]:
# load the training arguments
with open(train_info_dir + "/args.yaml", 'r') as f:
    training_args = yaml.load(f, Loader=yaml.FullLoader)
    print(json.dumps(training_args,sort_keys=True, indent=4))
# load results
results = np.load(train_info_dir + "/results.npy", allow_pickle=True).item()
print(results.keys())
#load final state dict
state_dict_final = torch.load(train_info_dir + "/state_dict_final.pt")
#load checkpoints
state_dict_checkpoints = [torch.load(train_info_dir + f"/state_dict_{epoch}.pt") 
                          for epoch in range(5,training_args['epochs']+1,5)]

{
    "L": 100,
    "M": 2,
    "P": 100000.0,
    "batch_size": 16,
    "data_dir": "data_a505fc258b/",
    "data_seed": 0,
    "epochs": 20,
    "interval": 5,
    "learning_rate": 5e-05,
    "model_name": "stomp",
    "n_features": 2,
    "num_codebooks": 10,
    "outdir": "output/",
    "seed": 0
}
dict_keys(['loss', 'accuracy'])


bring into a function

In [3]:
def get_all(data_hash,train_hash):
    
    #get paths
    outdir = 'output/'
    data_dir = f'data_{data_hash}/'
    data_filename = os.path.join(outdir, data_dir, 'data.h5')
    config_filename = os.path.join(outdir, data_dir, 'config.yaml')
    save_dir = os.path.join(outdir, data_dir, 'training_results/')
    train_info_dir = os.path.join(save_dir, train_hash)
    
    # load the hdf data (one file for each seed)
    with h5py.File(data_filename, 'r') as f:
        datasets = {}
        for group_name, group in f.items():
            datasets[group_name] = {key: np.array(
                value) for key, value in group.items()}
    
    # load the config file
    with open(config_filename, 'r') as f:
        data_config = yaml.load(f, Loader=yaml.FullLoader)
        
    # load the training arguments
    with open(train_info_dir + "/args.yaml", 'r') as f:
        training_args = yaml.load(f, Loader=yaml.FullLoader)
    
    # load results
    results = np.load(train_info_dir + "/results.npy", allow_pickle=True).item()
    
    #load final state dict
    state_dict_final = torch.load(train_info_dir + "/state_dict_final.pt")
    
    #load checkpoints
    state_dict_checkpoints = [torch.load(train_info_dir + f"/state_dict_{epoch}.pt") 
                              for epoch in range(5,training_args['epochs']+1,5)]
    
    return data_config, datasets, training_args, results, state_dict_final, state_dict_checkpoints

In [4]:
data_config, datasets, training_args, results, state_dict_final, state_dict_checkpoints=get_all(data_hash,train_hash)

Now, load data from a multiple runs

In [None]:
def get_perf_data(train_hash,data_hash):
    outdir = 'output/'
    data_dir = f'data_{data_hash}/'
    data_filename = os.path.join(outdir, data_dir, 'data.h5')
    config_filename = os.path.join(outdir, data_dir, 'config.yaml')
    save_dir = os.path.join(outdir, data_dir, 'training_results/')
    train_info_dir = os.path.join(save_dir, train_hash)
    results = np.load(train_info_dir + "/results.npy", allow_pickle=True).item()
    return results

def get_data_hash (N,data_paras=data_config):
    data_paras['N']=N
    data_hash = data_hash_function(data_paras) #TODO!!!
    return data_hash

def get_train_hash (P, model_name,train_paras=training_args):
    train_paras['P']=P
    train_paras['model_name']=model_name
    train_hash = train_hash_function(train_paras) #TODO!!!
    return train_hash

In [None]:
N_list = [1e1, 1e2, 1e3, 1e4]
model_name_list= ['stomp','single','multis']
P_list = [1e5, 1e6, 1e7, 1e8, 1e9]
perf_data={}
for nit,N in enumerate(N_list):
    data_hash = get_data_hash(N, data_paras)
    perf_data[N]={}
    for pit,P in enumerate(P_list):
        perf_data[N][P]={}
        for mit,model_name in enumerate(model_name_list):
            train_hash = get_train_hash(N,P,train_paras)
            perf_data[N][P][model_name] = get_perf_data(train_hash,data_hash)

plot accuracy and loss

In [None]:
fig,ax=pl.subplots(2,1,figsize=(10,6))
mkr=['-','--',':']
colors=pl.cm.get_cmap('viridis')(np.linspace(0, 1, len(P_list)))
for mit,model_name in enumerate(model_name_list):
    for pit,P in enumerate(P_list):
        if mit==0:
            ax[0].plot(perf_dict[N][P][model_name]['loss'],mkr[mit],color=colors[pit],label=model_name)
        else:
            ax[0].plot(perf_dict[N][P][model_name]['loss'],mkr[mit],color=colors[pit])
        ax[0].set_ylabel('per-agent action loss')
        ax[0].set_xlabel('epoch')
        ax[0].set_ylim(0,0.05)
        if git==0:
            ax[1].plot(perf_dict[N][P][model_name]['accuracy'],mkr[mit],color=colors[pit],label=P)
        else:
            ax[1].plot(perf_dict[N][P][model_name]['accuracy'],mkr[mit],color=colors[pit])
        ax[1].set_ylabel('action prediction accuracy')
        ax[1].set_xlabel('epoch')
        ax[1].set_ylim(0,1)
ax[0].legend(frameon=False,bbox_to_anchor=(1.2, 1.05),title='training seed')
ax[1].legend(frameon=False,bbox_to_anchor=(1.01, 1.05),title='model')
fig.suptitle(data_settings_str+'\n'+training_settings_str)
#     acc == (true == mdl(x).max(1).item() / true.size(0)
fig.tight_layout()
fig.savefig('results_fig.png', transparent=True, bbox_inches="tight", dpi=300)