In [28]:
import gymnasium as gym
import numpy as np
from IPython.display import display, Video
from gplearn.genetic import _Program, SymbolicRegressor

from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pickle

import os
os.environ['MUJOCO_GL']='egl'

In [29]:
import minari
dataset = minari.load_dataset("Minimal-Hopper-Expert-v5")
episode = dataset[0]
gp_scaler = StandardScaler()
gp_scaler.fit(episode.observations)

In [30]:
def benchmark_models(regressors, scaler, piecewise_models, env_name, seed):
    env = gym.make(env_name, render_mode='rgb_array')
    env = gym.wrappers.RecordVideo(env, "videos", episode_trigger=lambda x: True)

    total_reward = 0.0
    obs = env.reset(seed=seed)[0]
        
    action = np.zeros(env.action_space.shape[0])
     
    terminated, truncated = False, False

    t = 0
    interval_size = 100

    error_dict = {}
    error = 0
    while not (terminated or truncated):
        
        predicted_action = np.ones(shape=env.action_space.shape[0])
        for action in range(3):
            regressor, _ = regressors[f"action{action}"]
            nn_scaled_obs = scaler.transform(obs.reshape(1,-1))
            #gp_scaled_obs = gp_scaler.transform(obs.reshape(1,-1))
            predicted_action[action] = regressor.predict(nn_scaled_obs).item()

        print(t)
        print("      OBS:", [round(o, 2) for o in obs])
        print("HIST. OBS:", [round(o, 2) for o in episode.observations[t]])
                      
        predicted_action = np.clip(predicted_action, -1, 1)
        print("      NN: ", [round(o, 2) for o in predicted_action])

        #gp_action = np.zeros(shape=env.action_space.shape[0])
        

        #action1, action2, action3 = piecewise_models[t // interval_size]
        #action1, action2, action3 = piecewise_models[0]
        
        #gp_action[0] = action1.execute(gp_scaled_obs)[0]
        #gp_action[1] = action2.execute(gp_scaled_obs)[0]
        #gp_action[2] = action3.execute(gp_scaled_obs)[0]
        #gp_action = np.clip(gp_action, -1, 1)

        #print(action1)
        #print(action2)
        #print(action3)

        #print("      GP: ", [round(o, 2) for o in gp_action])

        print("    TRUE: ", [round(o, 2) for o in episode.actions[t]])

        print()
        
        #error += mean_squared_error(gp_action, predicted_action)

        #if t % interval_size == 0:
            #error_dict[t] = error
            #error = 0

        #if t < 20:
        obs, reward, terminated, truncated, _ = env.step(predicted_action)
        #else:
         #   obs, reward, terminated, truncated, _ = env.step(episode.actions[t])

        t += 1
        total_reward += reward

        if t == 120:
            break

    print(f"Total reward: {total_reward:.3f}")
    env.close()

In [31]:
nn_model_path = 'Hopper-nn.pkl'
gp_model_path = 'piecewise-learner-5.pkl'

with open(nn_model_path, 'rb') as f:
    # if there's a big difference between scalers (GP and NN) it might be an issue, it's roughly 99% the same tho
    regressors, scaler = pickle.load(f)
    
with open(gp_model_path, 'rb') as f:
    piecewise_models = pickle.load(f)
    
benchmark_models(regressors, scaler, piecewise_models, "Hopper-v5", seed=24012000)



  logger.warn(


0
      OBS: [1.25, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0]
HIST. OBS: [1.25, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0]
      NN:  [-0.87, 0.63, 0.98]
    TRUE:  [-1.0, 0.99, 1.0]

1
      OBS: [1.25, 0.0, -0.01, 0.0, 0.01, 0.04, -0.12, -0.73, -1.36, 0.82, 1.38]
HIST. OBS: [1.25, 0.0, -0.01, 0.0, 0.01, 0.04, -0.12, -0.84, -1.55, 0.93, 1.4]
      NN:  [-0.33, -0.61, 1.0]
    TRUE:  [-0.25, -0.99, 1.0]

2
      OBS: [1.25, -0.01, -0.02, 0.0, 0.02, -0.04, -0.22, -1.43, -1.77, -0.04, 2.85]
HIST. OBS: [1.25, -0.01, -0.02, 0.0, 0.02, -0.07, -0.21, -1.65, -1.8, -0.45, 2.9]
      NN:  [-0.2, -1.0, 0.95]
    TRUE:  [-0.34, -1.0, 1.0]

3
      OBS: [1.24, -0.02, -0.03, -0.0, 0.05, -0.14, -0.32, -2.18, -1.95, -1.43, 4.27]
HIST. OBS: [1.24, -0.03, -0.04, -0.01, 0.05, -0.19, -0.32, -2.56, -2.19, -1.82, 4.39]
      NN:  [-0.11, -0.92, 0.95]
    TRUE:  [-0.24, -1.0, 1.0]

4
      OBS: [1.24, -0.04, -0.05, -0.02, 0.09, -0.23, -0.43, -2.79, -2.0, -2.71, 5.67]
HIST. OBS: [1

In [32]:
# Display recorded video
display(Video(url='videos/rl-video-episode-0.mp4'))