# DDPG reward shaping with p=0.1 & e = 40 reward

In [61]:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
from scipy.integrate import solve_ivp

import gym_gyroscope_env
import spinup
import torch
from functools import partial

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import create_env
from custom_functions.custom_functions import load_agent
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test
from custom_functions.custom_functions import evaluate_control
from custom_functions.custom_functions import read_progress

import pickle

## Baseline on virtual environment

In [2]:
env_name = 'GyroscopeEnv-v1'
simu_args = {
    'dt': 0.05,
    'ep_len': 100,
    'seed': 2
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 5000, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 15000,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m0_005', exp_name='m0_005'),
                    save_file = True)

# Apply m0_005 on real gyro

In [6]:
pkl_file = open('gyro_data/m0_ep001-010_np.pkl', 'rb')
data1 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m0_ep011-020_np.pkl', 'rb')
data2 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m0_ep021-030_np.pkl', 'rb')
data3 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m0_ep031-040_np.pkl', 'rb')
data4 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m0_ep041-050_np.pkl', 'rb')
data5 = pickle.load(pkl_file)
pkl_file.close()

In [7]:
data = np.concatenate((data1,data2,data3,data4,data5), axis=0)
data.shape

(750, 111, 9)

# Continue training on virtual environment

In [8]:
# env_name = 'GyroscopeEnv-v1'
env_name = 'GyroscopeRealEnv-v0'

# simu_args = {
#     'dt': 0.05,
#     'ep_len': 100,
#     'seed': 2
# }
simu_args = {
    'ep_len': 100,
    'data': data
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

pkl_file = open('m0_005/ac.pkl', 'rb')
trained_ac = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m0_005/ac_targ.pkl', 'rb')
trained_ac_targ = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m0_005/replay_buffer.pkl', 'rb')
trained_bf = pickle.load(pkl_file)
pkl_file.close()

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 50, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 0,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m1_005', exp_name='m1_005'),
                    save_file = True,
                    trained_ac = trained_ac,
                    trained_ac_targ = trained_ac_targ,
                    trained_bf = trained_bf)

[32;1mLogging data to m1_005/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7ff3dfaf5830>, 'GyroscopeRealEnv-v0', simu_args={'ep_len': 100, 'data': array([[[ 6.81725606e-01,  5.84947064e-01, -6.66017643e-01, ...,\n          1.54524916e+01,  1.52331500e-01, -8.49609000e-02],\n        [ 6.94291976e-01,  3.30520366e-01, -5.56061900e-01, ...,\n          1.53369343e+01,  9.92000000e-01, -3.07617200e-01],\n        [ 7.16283125e-01,  3.48582324e-01, -5.12079603e-01, ...,\n          1.52540596e+01,  9.92000000e-01, -9.27730000e-03],\n        ...,\n        [ 2.39232281e+00, -2.20878238e+00,  8.15243294e-01, ...,\n          1.56201115e+01,  9.92000000e-01, -1.00000000e+00],\n        [ 2.27451308e+00, -2.56167999e+00,  6.29889327e-01, ...,

In [65]:
progress = read_progress('m1_005')
np.mean(progress[:,1])

-405.35034060000004

# Apply m1_005 on real gyro

In [21]:
pkl_file = open('gyro_data/m1_ep001-010_np.pkl', 'rb')
data1 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m1_ep011-020_np.pkl', 'rb')
data2 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m1_ep021-030_np.pkl', 'rb')
data3 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m1_ep031-040_np.pkl', 'rb')
data4 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m1_ep041-050_np.pkl', 'rb')
data5 = pickle.load(pkl_file)
pkl_file.close()

In [22]:
data = np.concatenate((data1,data2,data3,data4,data5), axis=0)
data.shape

(750, 111, 9)

# Continue training on virtual environment

In [24]:
# env_name = 'GyroscopeEnv-v1'
env_name = 'GyroscopeRealEnv-v0'

# simu_args = {
#     'dt': 0.05,
#     'ep_len': 100,
#     'seed': 2
# }
simu_args = {
    'ep_len': 100,
    'data': data
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

pkl_file = open('m1_005/ac.pkl', 'rb')
trained_ac = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m1_005/ac_targ.pkl', 'rb')
trained_ac_targ = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m1_005/replay_buffer.pkl', 'rb')
trained_bf = pickle.load(pkl_file)
pkl_file.close()

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 50, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 0,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m2_005', exp_name='m2_005'),
                    save_file = True,
                    trained_ac = trained_ac,
                    trained_ac_targ = trained_ac_targ,
                    trained_bf = trained_bf)

[32;1mLogging data to m2_005/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7ff3dfaf5830>, 'GyroscopeRealEnv-v0', simu_args={'ep_len': 100, 'data': array([[[-1.49225651e-01, -2.75454488e+00,  5.34070751e-02, ...,\n          2.63758726e+01,  1.38700000e-04,  0.00000000e+00],\n        [-1.72787596e-01, -1.12808801e+00, -2.65464579e-01, ...,\n          2.63187603e+01,  1.81252900e-01,  1.00000000e+00],\n        [-1.13097336e-01,  6.97375345e-01, -3.54999970e-01, ...,\n          2.62789337e+01,  9.92000000e-01,  1.00000000e+00],\n        ...,\n        [-6.53451272e-01, -1.90758920e+00,  1.48754412e+00, ...,\n          2.60885500e+01, -6.17641100e-01, -3.26660200e-01],\n        [-8.16814090e-01, -3.17507470e+00,  1.77499985e+00, ...,

In [66]:
progress = read_progress('m2_005')
np.mean(progress[:,1])

-403.78617500000007

# Apply m2_005 on real gyro

In [38]:
pkl_file = open('gyro_data/m2_ep001-010_np.pkl', 'rb')
data1 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m2_ep011-020_np.pkl', 'rb')
data2 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m2_ep021-030_np.pkl', 'rb')
data3 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m2_ep031-040_np.pkl', 'rb')
data4 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m2_ep041-050_np.pkl', 'rb')
data5 = pickle.load(pkl_file)
pkl_file.close()

In [39]:
data = np.concatenate((data1,data2,data3,data4,data5), axis=0)
data.shape

(750, 111, 9)

# Continue training on virtual environment

In [40]:
# env_name = 'GyroscopeEnv-v1'
env_name = 'GyroscopeRealEnv-v0'

# simu_args = {
#     'dt': 0.05,
#     'ep_len': 100,
#     'seed': 2
# }
simu_args = {
    'ep_len': 100,
    'data': data
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

pkl_file = open('m2_005/ac.pkl', 'rb')
trained_ac = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m2_005/ac_targ.pkl', 'rb')
trained_ac_targ = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m2_005/replay_buffer.pkl', 'rb')
trained_bf = pickle.load(pkl_file)
pkl_file.close()

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 50, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 0,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m3_005', exp_name='m3_005'),
                    save_file = True,
                    trained_ac = trained_ac,
                    trained_ac_targ = trained_ac_targ,
                    trained_bf = trained_bf)

[32;1mLogging data to m3_005/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7ff3dfaf5830>, 'GyroscopeRealEnv-v0', simu_args={'ep_len': 100, 'data': array([[[-4.71238898e-03,  5.13363048e-01, -1.99176974e+00, ...,\n          2.06086650e+01,  3.07843300e-01,  4.03320300e-01],\n        [ 6.75442421e-02,  1.21217818e+00, -1.67761048e+00, ...,\n          2.09554855e+01,  4.93698700e-01,  1.00000000e+00],\n        [ 1.50796447e-01,  1.63770544e+00, -1.19851760e+00, ...,\n          2.13252274e+01, -7.25266600e-01, -6.00585900e-01],\n        ...,\n        [-1.88495559e-02, -2.03810918e+00, -1.44513262e-01, ...,\n          2.06791573e+01,  6.66278800e-01, -1.00000000e+00],\n        [-7.06858347e-02, -1.52157647e+00, -4.08407045e-01, ...,

In [67]:
progress = read_progress('m3_005')
np.mean(progress[:,1])

-399.8493876

# Apply m3_005 on real gyro

In [51]:
pkl_file = open('gyro_data/m3_ep001-010_np.pkl', 'rb')
data1 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m3_ep011-020_np.pkl', 'rb')
data2 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m3_ep021-030_np.pkl', 'rb')
data3 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m3_ep031-040_np.pkl', 'rb')
data4 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m3_ep041-050_np.pkl', 'rb')
data5 = pickle.load(pkl_file)
pkl_file.close()

In [52]:
data = np.concatenate((data1,data2,data3,data4,data5), axis=0)
data.shape

(750, 111, 9)

# Continue training on virtual environment

In [53]:
# env_name = 'GyroscopeEnv-v1'
env_name = 'GyroscopeRealEnv-v0'

# simu_args = {
#     'dt': 0.05,
#     'ep_len': 100,
#     'seed': 2
# }
simu_args = {
    'ep_len': 100,
    'data': data
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

pkl_file = open('m3_005/ac.pkl', 'rb')
trained_ac = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m3_005/ac_targ.pkl', 'rb')
trained_ac_targ = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m3_005/replay_buffer.pkl', 'rb')
trained_bf = pickle.load(pkl_file)
pkl_file.close()

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 50, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 0,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m4_005', exp_name='m4_005'),
                    save_file = True,
                    trained_ac = trained_ac,
                    trained_ac_targ = trained_ac_targ,
                    trained_bf = trained_bf)

[32;1mLogging data to m4_005/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7ff3dfaf5830>, 'GyroscopeRealEnv-v0', simu_args={'ep_len': 100, 'data': array([[[ 1.39800873e-01,  1.60723294e+00,  1.97606178e+00, ...,\n         -1.88373755e+01, -6.42295400e-01,  1.00000000e+00],\n        [ 1.61792022e-01,  8.17166053e-01,  2.33577414e+00, ...,\n         -1.85318030e+01,  9.40320800e-01,  1.00000000e+00],\n        [ 1.22522113e-01, -3.98092254e-01,  2.86670330e+00, ...,\n         -1.81354707e+01, -1.23606900e-01,  5.05371100e-01],\n        ...,\n        [-2.91539798e+00, -1.11910651e-01, -1.32732290e+00, ...,\n         -1.86419491e+01,  3.33271000e-02, -1.00000000e+00],\n        [-2.89654843e+00,  9.97226331e-02, -1.13568574e+00, ...,

In [68]:
progress = read_progress('m4_005')
np.mean(progress[:,1])

-399.66352020000005

# Apply m4_005 on real gyro

In [74]:
pkl_file = open('gyro_data/m4_ep001-010_np.pkl', 'rb')
data1 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m4_ep011-020_np.pkl', 'rb')
data2 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m4_ep021-030_np.pkl', 'rb')
data3 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m4_ep031-040_np.pkl', 'rb')
data4 = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('gyro_data/m4_ep041-050_np.pkl', 'rb')
data5 = pickle.load(pkl_file)
pkl_file.close()

In [75]:
data = np.concatenate((data1,data2,data3,data4,data5), axis=0)
data.shape

(750, 111, 9)

# Continue training on virtual environment

In [76]:
# env_name = 'GyroscopeEnv-v1'
env_name = 'GyroscopeRealEnv-v0'

# simu_args = {
#     'dt': 0.05,
#     'ep_len': 100,
#     'seed': 2
# }
simu_args = {
    'ep_len': 100,
    'data': data
}
reward_func = 'PE'
reward_args = {
    'qx1': 1, 
    'qx2': 0.2, 
    'qx3': 1, 
    'qx4': 0.2, 
    'pu1': 0.1, 
    'pu2': 0.1,
    'p': 0.1,
    'e': 40
}
env_fn_ = partial(env_fn, env_name, simu_args = simu_args, reward_func = reward_func, reward_args = reward_args)

pkl_file = open('m4_005/ac.pkl', 'rb')
trained_ac = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m4_005/ac_targ.pkl', 'rb')
trained_ac_targ = pickle.load(pkl_file)
pkl_file.close()

pkl_file = open('m4_005/replay_buffer.pkl', 'rb')
trained_bf = pickle.load(pkl_file)
pkl_file.close()

spinup.ddpg_pytorch(env_fn_, 
                    ac_kwargs = dict(hidden_sizes=[128,32], activation=torch.nn.ReLU), 
                    seed = 0, 
                    steps_per_epoch = 1500, 
                    epochs = 50, 
                    replay_size = 8000000, 
                    gamma = 0.96, 
                    polyak = 0.999, 
                    pi_lr = 0.002,
                    q_lr = 0.002,
                    batch_size = 100, 
                    start_steps = 0,
                    act_noise = 0.1,
                    max_ep_len = 100, 
                    logger_kwargs = dict(output_dir='m5_005', exp_name='m5_005'),
                    save_file = True,
                    trained_ac = trained_ac,
                    trained_ac_targ = trained_ac_targ,
                    trained_bf = trained_bf)

[32;1mLogging data to m5_005/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            128,
            32
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"functools.partial(<function env_fn at 0x7ff3dfaf5830>, 'GyroscopeRealEnv-v0', simu_args={'ep_len': 100, 'data': array([[[-1.77499985e-01, -9.38700136e-01, -2.65150420e+00, ...,\n          6.85860549e+00,  1.38700000e-04,  0.00000000e+00],\n        [-2.18340689e-01, -8.32815792e-01, -2.65464579e+00, ...,\n          6.86982621e+00, -9.50000000e-01,  1.00000000e+00],\n        [-2.76460154e-01, -1.01291450e+00, -2.48185820e+00, ...,\n          6.89572016e+00, -9.50000000e-01,  3.52050800e-01],\n        ...,\n        [ 2.52426970e+00, -6.33611067e-01, -2.70019889e+00, ...,\n          7.23305129e+00,  8.91960400e-01,  6.21582000e-01],\n        [ 2.54940244e+00,  2.16500344e-01, -2.43787590e+00, ...,

In [77]:
progress = read_progress('m5_005')
np.mean(progress[:,1])

-397.1045966000001