In [None]:
from UDR.udr import train_test_ppo_with_udr
from PPO.ppo_test import train_and_test_policy

n_episodes = 20000
mean_timestep = 300
target_env = "CustomHopper-target-v0"
source_env = "CustomHopper-source-v0"
base_output_folder = "./udr_output"

# optimized for PPO without UDR in source environment
optimized_clip_range = 0.19877024509129543
optimized_learning_rate = 0.0008
optimized_gamma = 0.992

print("*** PPO without UDR ***")
train_and_test_policy(
    train_env=source_env,
    test_env=target_env,
    output_folder=f"./{base_output_folder}/no-udr/",
    clip_range=optimized_clip_range,
    learning_rate=optimized_learning_rate,
    gamma=optimized_gamma,
    episodes=n_episodes,
    timesteps=mean_timestep,
    print_std_deviation=True
)

deltas = [0.2, 0.5, 0.8]
for delta in deltas:
    print(f"\n*** PPO with UDR delta: {delta} ***")
    output_folder = f"{base_output_folder}/{delta}"

    train_test_ppo_with_udr(
		output_folder=output_folder,
		train_env=source_env,
		test_env=target_env,
		episodes=n_episodes,
		clip_range=optimized_clip_range,
        learning_rate=optimized_learning_rate,
        gamma=optimized_gamma,
		timesteps=mean_timestep,
		delta=delta,
		print_std_deviation=True
	)

*** PPO without UDR ***


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def get_rewards (filename, window_size: int = 30):
    with open(filename, 'r') as f:
        lines = f.readlines()
        data = [float(line.strip()) for line in lines]

        means = []
        positions = []
        for i in range(0, len(data), window_size):
            window = data[i:i+window_size]
            mean_value = np.mean(window)
            means.append(mean_value)
            positions.append(i + window_size//2)
        
        return means, positions
    
path_test_resources = './trained-models/udr'

for delta in deltas:
    r, p = get_rewards(f"{path_test_resources}/{delta}/test_rewards_CustomHopper-target-v0.txt")
    plt.plot(p, r, label=f'Source->target test rewards with UDR(delta={delta})', linewidth=2)


s_t_test_rewards, st_positions = get_rewards(f'{path_test_resources}/no-udr/test_rewards_CustomHopper-target-v0.txt')
plt.plot(st_positions, s_t_test_rewards, label=f'Source->target test rewards without UDR', linewidth=2)

plt.ylabel("Rewards")
plt.xlabel(f"Window ({10}) episodes")
plt.title("PPO test performance using different UDR parameters")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()