In [12]:
import os
import sys
sys.path.insert(0, os.path.abspath('/private/home/jbgaya/Work/salina/salina_cl/'))

# Loading logs

In [13]:
import salina.logger
LOGS=salina.logger.read_directory("/private/home/jbgaya/Work/salina/salina_cl/results/run/cartpole_7tasks/",use_bz2=True)

== Read  /private/home/jbgaya/Work/salina/salina_cl/results/run/cartpole_7tasks/ppo_subspace/seed=0
== Read  /private/home/jbgaya/Work/salina/salina_cl/results/run/cartpole_7tasks/ppo_finetune/seed=0
Found 2 logs


# Extracting available scenarios

In [8]:
def extract_scenario(log):
    values={}
    for k in log.hps:
        if k.startswith("scenario"):
            values[k]=log.hps[k]
    return values

def has_scenario(log,scenario):
    s=extract_scenario(log)
    s=str(s)
    return s==str(scenario)
    
def unique_scenarios(logs):
    _unique_scenarios={}
    for l in logs.logs:
        scenario=extract_scenario(l)
        _unique_scenarios[str(scenario)]=scenario
    _unique_scenarios=[v for s,v in _unique_scenarios.items()]
    return _unique_scenarios
print("Found ",len(unique_scenarios(LOGS))," unique scenarios")

def generate_scenario_html(scenario):
    results=["<h2>Scenario</h2>"]
    results.append("<ul>")
    
    for k,v in scenario.items():
        results.append("<li><b>"+k+"</b> ="+str(v)+"</li>")
    results.append("</ul>")
    return "".join(results)

Found  1  unique scenarios


In [19]:
def generate_hps_html(hps):
    results=["<h2>Hyper-parameters</h2>"]
    results.append("<ul>")
    
    for k,v in hps.items():
        if k.startswith("model"):
            results.append("<li><b>"+k+"</b> ="+str(v)+"</li>")
    results.append("</ul>")
    return "".join(results)

def generate_reward_html(reward_mean,reward_std):
    results=["<h2>Reward</h2>"]
    results.append("<table>")
    n,_=reward_mean.shape
    
    results.append("<tr><td>Task \\ Stage </td>")
    for stage in range(n): results.append("<td><b>"+str(stage)+"</b></td>")
    results.append("</tr>")
    
    for task in range(n):
        results.append("<tr><td><b>"+str(task)+"</b></td>")
        for stage in range(n): 
            r=reward_mean[task][stage]
            rs=reward_std[task][stage]
            if rs != 0:
                results.append("<td>"+str(r)+"<i>("+str(rs)+")</i></td>")
            else:
                results.append("<td>"+str(r))
        results.append("</tr>")
    results.append("</table>")
    return "".join(results)


# Extracting unique hps

In [20]:
# Remove the run information and extrat the hps as a str in each log
import copy
def extract_hps(log):
    values={}
    for k,v in log.hps.items():
        if not k=="model/seed" and not k.endswith("device"):
            values[k]=v
    return values
    

# Scenario Analysis

In [21]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML

def analyze_runs(logs):
    print("Analyzing ",len(logs)," logs")
    hps=extract_hps(logs[0])
    dfs=[]
    for log in logs:
        df=log.to_dataframe()
        _cols=[c for c in df.columns if c.startswith("evaluation")]+["iteration"]        
        df=df[_cols]
        dfs.append(df)
    
    df=pd.concat(dfs)
    df_mean=df.groupby("iteration",as_index=False).mean()
    df_std=df.groupby("iteration",as_index=False).std()
    columns=[c for c in df_mean.columns if not c=="iteration"]
    df_mean=df_mean.dropna(subset=columns,how="all")
    df_std=df_std.dropna(subset=columns,how="all")
    n_tasks=df_mean["iteration"].max()+1
    #Collection reward
    r_mean=np.zeros((n_tasks,n_tasks))
    r_std=np.zeros((n_tasks,n_tasks))
    memory_mean=np.zeros((n_tasks,))
    memory_std=np.zeros((n_tasks,))
    for task in range(n_tasks):
        for stage in range(n_tasks):
            n="evaluation/"+str(task)+"/avg_reward"
            d=df_mean[df_mean["iteration"]==stage]
            
            reward_mean=d.iloc[0][n]
            memory_mean[stage]=d.iloc[0]["evaluation/memory/n_parameters"]
            r_mean[task][stage]=round(reward_mean,0)
            
            d=df_std[df_std["iteration"]==stage]
            try:
                reward_std=d.iloc[0][n]
                memory_std[stage]=d.iloc[0]["evaluation/memory/n_parameters"]
            except:
                reward_std = 0
                memory_std[stage]=memory_mean[stage]
            
            r_std[task][stage]=reward_std
    return r_mean,r_std,memory_mean,memory_std,hps
        
    
def analyze_scenario(logs,scenario):
    h=generate_scenario_html(scenario)
    display(HTML(h))
    per_hps={}
    for log in logs.logs:
        if not has_scenario(log,scenario):
            continue
        h=extract_hps(log)
        str_h=str(h)
        if not str_h in per_hps:
            per_hps[str_h]=[]
        per_hps[str_h].append(log)
    
    print("Found ",len(per_hps)," different Hps values")
    
    for h in per_hps:
        reward_mean,reward_std,memory_mean,memory_std,hps=analyze_runs(per_hps[h])
    
        #Generate HTML
        h=generate_hps_html(hps)
        display(HTML(h))
        h=generate_reward_html(reward_mean,reward_std,)
        display(HTML(h))

for scenario in unique_scenarios(LOGS): 
    analyze_scenario(LOGS,scenario)


Found  2  different Hps values
Analyzing  1  logs


0,1,2,3,4,5,6,7
Task \ Stage,0.0,1.0,2.0,3.0,4.0,5.0,6.0
0,43.0,43.0,46.0,44.0,45.0,45.0,45.0
1,42.0,179.0,180.0,181.0,179.0,181.0,181.0
2,22.0,95.0,174.0,179.0,172.0,173.0,175.0
3,34.0,42.0,47.0,67.0,65.0,62.0,66.0
4,44.0,53.0,58.0,60.0,69.0,67.0,74.0
5,83.0,112.0,105.0,105.0,107.0,116.0,116.0
6,44.0,103.0,127.0,136.0,130.0,133.0,155.0


Analyzing  1  logs


0,1,2,3,4,5,6,7
Task \ Stage,0.0,1.0,2.0,3.0,4.0,5.0,6.0
0,43.0,76.0,196.0,200.0,200.0,200.0,200.0
1,37.0,107.0,200.0,200.0,200.0,200.0,200.0
2,20.0,70.0,192.0,119.0,109.0,105.0,125.0
3,35.0,56.0,71.0,167.0,185.0,199.0,123.0
4,44.0,62.0,80.0,169.0,179.0,194.0,116.0
5,81.0,109.0,183.0,170.0,134.0,185.0,182.0
6,42.0,78.0,198.0,200.0,200.0,200.0,200.0


In [27]:
d = {"avg_reward":100}

In [32]:
str(d)

"{'avg_reward': 100}"