In [1]:
import jax
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns

from functools import partial
from scipy.interpolate import splrep, splev, RBFInterpolator
from tqdm.notebook import tqdm

from adaptive_time.utils import set_directory_in_project


sns.set_style("darkgrid")
sns.set_palette("colorblind")

In [2]:
set_directory_in_project()

Changed working directory to /home/bryanpu1/projects/adaptive_time


'/home/bryanpu1/projects/adaptive_time'

In [3]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from joblib import Parallel, delayed

In [4]:
1 - np.power(1.2, -np.arange(50000) / 10000)

array([0.00000000e+00, 1.82319895e-05, 3.64636465e-05, ...,
       5.98100446e-01, 5.98107774e-01, 5.98115101e-01])

In [7]:
os.makedirs("./smoothed_data", exist_ok=True)

In [9]:
env_names = {
    "swimmer-v3": (40000, 7, 1.3),
    "hopper-v3": (50000, 7, 1.3),
    "ant-v3": (50000, 5, 1.3),
    "cheetah-v3": (50000, 5, 1.3),
}
num_seeds = 10

def smooth_rewards(
    env_name: str,
    timesteps: int,
    degree: int,
    base: float,
    seed: int,
):
    data = np.load("./data/{}/Rewards_{}_1000_{}.npy".format(env_name, timesteps, seed)).T
    smoothed_data = []

    poly = PolynomialFeatures(degree=degree)
    for traj_i in data:
        x_range = 1 - np.power(base, -np.arange(len(traj_i)) / 10000)
        x_range = x_range[:, None]
        features = poly.fit_transform(x_range)
        model = LinearRegression()
        model.fit(features, traj_i)
        smoothed_data.append(model.predict(features))

    smoothed_data = np.array(smoothed_data)
    np.save("./smoothed_data/{}/Rewards_{}_1000_{}-smoothed.npy".format(env_name, timesteps, seed), smoothed_data.T)

for env_name, (timesteps, degree, base) in tqdm(env_names.items()):
    os.makedirs("./smoothed_data/{}".format(env_name), exist_ok=True)
    Parallel(
        n_jobs=num_seeds
    )(
        delayed(smooth_rewards)(
            env_name,
            timesteps,
            degree,
            base,
            seed,
        )
        for seed in range(num_seeds)
    )

  0%|          | 0/4 [00:00<?, ?it/s]

  pid = os.fork()


In [None]:
assert 0

In [None]:
env_names = {
    "swimmer-v3": (40000, 7500),
    "hopper-v3": (50000, 1000),
    "ant-v3": (50000, 1000),
    "cheetah-v3": (50000, 1000),
}
num_seeds = 10
for seed in range(num_seeds):
    for env_name, (timesteps, kernel_size) in tqdm(env_names.items()):
        data = np.load("./data/{}/Rewards_{}_1000_{}.npy".format(env_name, timesteps, seed)).T
        kernel = np.ones(kernel_size) / kernel_size
        smoothed_data = jax.vmap(partial(jax.numpy.convolve, mode="same"), in_axes=[0, None])(data, kernel)

        ncols = 4
        fig, axes = plt.subplots(nrows=5, ncols=ncols, figsize=(25, 15), layout="constrained")

        for traj_i in range(20):
            ax = axes[traj_i // ncols, traj_i % ncols]
            ax.plot(smoothed_data[traj_i])
        plt.title("{} {}".format(env_name, seed))
        plt.show()

        np.save("./data/{}/Rewards_{}_1000_{}-smoothed.npy".format(env_name, timesteps, seed), smoothed_data.T)