In [1]:
import os 
os.chdir("../src")
print(os.getcwd())

import glob
import re
import numpy as np
import pandas as pd
import gym
import matplotlib.pyplot as plt
from IPython.display import display
from IPython.utils import io

from envs.goal_demo_wrapper import GoalDemoWrapper
from utils.load_confs import load_parameters, load_paths

%matplotlib inline
%load_ext autoreload
%autoreload 2

/u/clw4542/research/rl_ifo_mujoco/src


### Visualize Hyperparameter Search Results

In [2]:
params = load_parameters()

def read_eval_results(env_id, log_path:str):
    '''Returns the results at the last evaluation time step
    if the training finished
    '''
    try:
        data = np.load(os.path.join(log_path, "evaluations.npz"))
    except FileNotFoundError:
        return None
    
    if data["timesteps"][-1] == params["sac"][env_id]["max_timesteps"] * 0.2:
        return np.squeeze(data["results"][-1])  # shape (n_eval_pts, 64, 1)
    else: 
        return None

#### Huber

In [3]:
expt_name = "huber+env_time_feat_rl"
hyperparam_dir_base = f"/scratch/cluster/clw4542/hyperparam_search/{expt_name}*"
# hyperparam_dirs = glob.glob(hyperparam_dir_base)

ENV_IDS = ["swimmer", "ant", "halfcheetah", "walker2d", "hopper"]
results = {env_id: {} for env_id in ENV_IDS}

for hyperparam_dirpath in glob.glob(hyperparam_dir_base):
    dirname = os.path.basename(hyperparam_dirpath)
    alpha = re.search( "alpha=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("alpha=", "")
    gamma = re.search("gamma=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("gamma=", "")
    imit_rew_coef = re.search("imit_rew_coef=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("imit_rew_coef=", "")
    if imit_rew_coef == "0.01" or imit_rew_coef == "0.1":
        continue
    log_paths = os.path.join(hyperparam_dirpath, "log", "*")
    for log_path in glob.glob(log_paths):
        env_id = re.search("sac_\w*", log_path).group().replace("sac_", "").lower()
        last_returns = read_eval_results(env_id, log_path)
        if last_returns is None:
            continue
        # add results to dictionary
        paramset_name = f"alpha={alpha}_gamma={gamma}_imit-rew-coef={imit_rew_coef}"
        if paramset_name in results[env_id]:
            results[env_id][paramset_name] = np.concatenate(results[env_id][paramset_name], last_returns)
        else:
            results[env_id][paramset_name] = last_returns
            
for env_id in ENV_IDS: 
    for paramset, returns in results[env_id].items():
        results[env_id][paramset] = np.mean(returns)

In [7]:
best_param_dfs = []
for env_id in ENV_IDS:
    res_df = pd.DataFrame.from_dict(results[env_id], orient='index').sort_values(by=0, ascending=False)
#     best_param_df = res_df
    best_param_df = res_df[res_df[0] == res_df.max()[0]]
    best_param_df[1] = env_id
    best_param_dfs.append(best_param_df)

best_param_dfs = pd.concat(best_param_dfs)
best_param_dfs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/st

Unnamed: 0,0,1
alpha=1_gamma=0.8_imit-rew-coef=0.2,28.27095,swimmer
alpha=0.8_gamma=0.4_imit-rew-coef=0.2,3262.518066,ant
alpha=0.4_gamma=0.6_imit-rew-coef=0.2,7496.855469,halfcheetah
alpha=0.9_gamma=0.01_imit-rew-coef=0.2,3388.210693,walker2d
alpha=0.2_gamma=0.1_imit-rew-coef=0.2,2508.619629,hopper


In [6]:
for env_id in ENV_IDS:
    print(env_id)
    print(best_param_dfs[best_param_dfs[1] == env_id].shape)

swimmer
(95, 2)
ant
(96, 2)
halfcheetah
(98, 2)
walker2d
(50, 2)
hopper
(95, 2)


#### Huber2

In [8]:
expt_name = "huber2+env_time_feat_rl"
hyperparam_dir_base = f"/scratch/cluster/clw4542/hyperparam_search/{expt_name}*"
# hyperparam_dirs = glob.glob(hyperparam_dir_base)

ENV_IDS = ["swimmer", "ant", "halfcheetah", "hopper"]
results = {env_id: {} for env_id in ENV_IDS}

for hyperparam_dirpath in glob.glob(hyperparam_dir_base):
    dirname = os.path.basename(hyperparam_dirpath)
    alpha = re.search( "alpha=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("alpha=", "")
    gamma = re.search("gamma=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("gamma=", "")
    imit_rew_coef = re.search("imit_rew_coef=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("imit_rew_coef=", "")
    if imit_rew_coef == "0.01" or imit_rew_coef == "0.1":
        continue
    log_paths = os.path.join(hyperparam_dirpath, "log", "*")
    for log_path in glob.glob(log_paths):
        env_id = re.search("sac_\w*", log_path).group().replace("sac_", "").lower()
        last_returns = read_eval_results(env_id, log_path)
        if last_returns is None:
            continue
        # add results to dictionary
        paramset_name = f"alpha={alpha}_gamma={gamma}_imit-rew-coef={imit_rew_coef}"
        if paramset_name in results[env_id]:
            results[env_id][paramset_name] = np.concatenate(results[env_id][paramset_name], last_returns)
        else:
            results[env_id][paramset_name] = last_returns
            
for env_id in ENV_IDS: 
    for paramset, returns in results[env_id].items():
        results[env_id][paramset] = np.mean(returns)

In [24]:
best_param_dfs = []
ENV_IDS = ["swimmer", "ant", "halfcheetah", "hopper"]
for env_id in ENV_IDS:
    res_df = pd.DataFrame.from_dict(results[env_id], orient='index').sort_values(by=0, ascending=False)
#     best_param_df = res_df
    best_param_df = res_df[res_df[0] == res_df.max()[0]]
    best_param_df[1] = env_id
    best_param_dfs.append(best_param_df)

best_param_dfs = pd.concat(best_param_dfs)
best_param_dfs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pa

Unnamed: 0,0,1
alpha=1_gamma=0.9_imit-rew-coef=0.2,26.160452,swimmer
alpha=0.1_gamma=0.1_imit-rew-coef=0.2,1154.933594,ant
alpha=0.8_gamma=0.6_imit-rew-coef=0.2,7467.095703,halfcheetah
alpha=0.8_gamma=0.8_imit-rew-coef=0.2,2643.909424,hopper


#### SBS

In [19]:
expt_name = "sbs_time_feat_rl"
hyperparam_dir_base = f"/scratch/cluster/clw4542/hyperparam_search/{expt_name}*"

ENV_IDS = ["swimmer", "ant", "halfcheetah", "walker2d", "hopper"]
results = {env_id: {} for env_id in ENV_IDS}

for hyperparam_dirpath in glob.glob(hyperparam_dir_base):
    dirname = os.path.basename(hyperparam_dirpath)
    dist_scale = re.search( "dist_scale=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("dist_scale=", "")
    tau = re.search("tau=\\d{1,3}(\\.\\d{1,3})?", dirname).group().replace("tau=", "")

    log_paths = os.path.join(hyperparam_dirpath, "log", "*")
    for log_path in glob.glob(log_paths):
        env_id = re.search("sac_\w*", log_path).group().replace("sac_", "").lower()
        last_returns = read_eval_results(env_id, log_path)
        if last_returns is None:
            continue
        # add results to dictionary
        paramset_name = f"dist_scale={dist_scale}_tau={tau}"
        if paramset_name in results[env_id]:
            results[env_id][paramset_name] = np.concatenate(results[env_id][paramset_name], last_returns)
        else:
            results[env_id][paramset_name] = last_returns
            
for env_id in ENV_IDS:  
    for paramset, returns in results[env_id].items():
        results[env_id][paramset] = np.mean(returns)

In [20]:
best_param_dfs = []
for env_id in ENV_IDS:
    res_df = pd.DataFrame.from_dict(results[env_id], orient='index').sort_values(by=0, ascending=False)
    best_param_df = res_df[res_df[0] == res_df.max()[0]]
    best_param_df[1] = env_id
    best_param_dfs.append(best_param_df)
    
#     print(f"{env_id} best paramset is")
best_param_dfs = pd.concat(best_param_dfs)
best_param_dfs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/panda

Unnamed: 0,0,1
dist_scale=0.6_tau=0.8,35.280991,swimmer
dist_scale=0.01_tau=0.99,2992.549316,ant
dist_scale=0.99_tau=0.99,10383.414062,halfcheetah
dist_scale=0.4_tau=0.4,4498.157227,walker2d
dist_scale=0.01_tau=0.6,3466.066406,hopper
