In [3]:
import sys
sys.path.append("/home/benjamin/RewardCurriculum/")

import os
os.system("export MKL_SERVICE_FORCE_INTEL=1")

import gymnasium as gym
from collections import defaultdict
import panda_gym
from stable_baselines3.common.vec_env import DummyVecEnv
import json
import numpy as np
import tqdm

import os
from utils.configs import get_config
from utils.env_wrappers import make_vec_env, get_env
from argparse import Namespace
import cv2
from IPython.display import HTML
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

In [4]:
import matplotlib.font_manager as font_manager

font_dir = ['.']
for font in font_manager.findSystemFonts(font_dir):
    font_manager.fontManager.addfont(font)

In [5]:
def rollout_episodes(env, learner, n_episodes=1, max_steps=200, reset_every=20, task=None):
    if task is None:
        weights = np.zeros((1, learner.scheduler.reward_dim))
        weights[:, -1] = 1

    goals = []
    rewards = []
    
    obs = env.reset()
    current_goal = env.get_attr("task")[0].get_goal()
    goals.append(current_goal[3:])
    n_steps = 0
    
    # if desired_position is not None:
    #     position[3:] = desired_position
    #     env.env_method("set_goal", position)
    #     obs["desired_goal"] = position
    
    # if desired_position is not None:
    #     positions.append(desired_position)
    # else:
    #     positions.append(obs["desired_goal"][:, 3:])
        
    for step_idx in range(max_steps):            
        act = learner.predict(obs, weights=weights, deterministic=False)[0]
        obs, reward, _, info = env.step(act)
        
        if step_idx % reset_every == 0:
            env.get_attr("task")[0].reset()
            current_goal = env.get_attr("task")[0].get_goal()
        
        goals.append(current_goal[3:])
        n_steps += 1

        rewards.append(reward)
        
    return rewards, goals

In [36]:
os.chdir("/home/benjamin/RewardCurriculum")
results_dir = "/home/benjamin/RewardCurriculum/results/panda_pick_and_place_obstacle_long_v2"

In [None]:
generalization_results = defaultdict(list)

master_goals = []
master_rewards = []
for subfolder in tqdm.tqdm(os.listdir(results_dir)):
    folder_path = os.path.join(results_dir, subfolder)
    if not os.path.isdir(folder_path):
        continue
    
    args = Namespace()
    args.env_name = json.load(open(os.path.join(folder_path, "config.json"), "r"))["environment"]["env_name"]
    args.seed = json.load(open(os.path.join(folder_path, "config.json"), "r"))["seed"]
    args.config_path = os.path.join(folder_path, "config_original.json")
    args.continue_from = None

    remaining_args = ["--environment.wrapper_kwargs.0.reward_threshold", "-0.05"]
    
    config = get_config(args.config_path, args, remaining_args)
    config["environment"]["wrappers"] += ["SingleTaskRewardWrapper"]
    config["environment"]["wrapper_kwargs"] += [{}]
    
    make_env_fn = lambda wrappers, wrapper_kwargs, ignore_keyword="ignore" : get_env(config["environment"]["env_name"], wrappers=wrappers, wrapper_kwargs=wrapper_kwargs, ignore_keyword=ignore_keyword)
    env = make_vec_env(make_env_fn, 
                        n_envs=config["environment"]["n_envs"], 
                        env_kwargs={"wrappers": config["environment"]["wrappers"], "wrapper_kwargs": config["environment"]["wrapper_kwargs"]},
                        monitor_kwargs={"allow_early_resets": True},
                        seed=config["seed"], vec_env_cls=DummyVecEnv)
    
    learner = config["learner_class"].load(os.path.join(folder_path, "evaluations", "best_model.zip"), env=env)
    
    cl_type = str(config["learner_kwargs"]["scheduler_class"]).split(" ")[-1][2:-2]
    # for position in tqdm.tqdm(desired_positions):
    rewards, _ = rollout_episodes(env, learner, n_episodes=1, max_steps=1000, reset_every=10)
    rewards = np.stack(rewards)
    # goals = np.stack(goals)
    
    generalization_results[cl_type].append(rewards)
    
for cl_type in generalization_results.keys():
    generalization_results[cl_type] = np.stack(generalization_results[cl_type])

In [8]:
results = generalization_results["SetterSolver"][:, np.arange(9, 1009, 10), 0]
print(results.sum(axis=1).mean())
    
fig, ax = plt.subplots(1, 1, figsize=(20, 10))

plt.imshow(results)

NameError: name 'generalization_results' is not defined

In [30]:
import pickle
pickle.dump(generalization_results, open(os.path.join(results_dir, "generalization_perturbation.pkl"), "wb"))

# Generate a Latex table

In [41]:
import pickle

result_dirs = [
    "/home/benjamin/RewardCurriculum/results/panda_pick_and_place_sphere_long",
    "/home/benjamin/RewardCurriculum/results/panda_pick_and_place_long",
    "/home/benjamin/RewardCurriculum/results/panda_pick_and_place_obstacle_long_v2"
]

generalization_results = [pickle.load(open(os.path.join(path, "generalization_perturbation.pkl"), "rb")) for path in result_dirs]

In [57]:
is_success_indices = np.arange(9, 1009, 10)
# print(is_success_indices)

algorithms = ["ManualTask", "Random", "SACX", "BiPaRS", "SetterSolver", "CurrOT", "ALPGMM"]

for cl_type in algorithms:
    
    mean_results = []
    std_results = []
    processed_results = []
    for idx, _generalization_results in enumerate(generalization_results):
        # print(cl_type)
        _results = _generalization_results.get(cl_type, np.zeros_like(_generalization_results["SetterSolver"]))[:, is_success_indices, 0]
        processed_results.append(_results)
        mean_results.append(_results.sum(axis=1).mean())
        std_results.append(_results.sum(axis=1).std())
        # print(f"${mean_results[idx]:.2f}$\\% \\par $\\pm {std_results[idx]:.2f}$ &", end=" ")
        
    mean_results = np.mean(mean_results)
    processed_results = np.concatenate(processed_results, axis=0).sum(axis=0)
    print(f"${mean_results:.2f}$\\%", end="")
         
    print()

$5.93$\%
$11.77$\%
$23.70$\%
$0.00$\%
$24.23$\%
$30.20$\%
$24.27$\%
