In [1]:
from varyingsim.envs.push_box_circle import PushBoxCircle, sin_com_slow
from varyingsim.datasets.fov_dataset import SmoothFovDataset, SmoothFovDatasetGenerator
from varyingsim.util.learn import learn_every_k_single

from varyingsim.algos.BufferedModel import BufferedModel
from varyingsim.algos.BufferedOSI import BufferedOSI

from varyingsim.models.mujoco_dynamics import MuJoCoDynamics, MuJoCoDynamicsFlat
from varyingsim.models.osi import OSI


import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

import matplotlib.pyplot as plt

import pickle

In [2]:
env = PushBoxCircle()

N = 4
T = 2000
d_obs = env.model.nq + env.model.nv
d_in = d_obs + env.model.nu

In [3]:
cur_act = np.array([0.0, 0])
def act_fn(obs, i, t):
    global cur_act # TODO: is there a better way?
    if t == 0: # new episode! new control!
        cur_act = np.array([0.1 + np.random.rand() * 0.7, np.random.rand() * 2 * np.pi])
    return cur_act

location = '/data/varyingsim/push_box_circle.pickle'
# location = "D:\\data\\varyingsim\\push_box_circle_small.pickle"
gen = SmoothFovDatasetGenerator(location,  PushBoxCircle, sin_com_slow, act_fn, N, T)

In [4]:
dataset = SmoothFovDataset(location, 1)

In [5]:
dataset[1]

{'context_obs': array([[0. , 0. , 0.1, 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
         0. , 0. , 0. , 0. ]]),
 'context_act': array([[0.10801523, 5.02092998]]),
 'obs': array([-9.80585197e-23, -1.53509041e-22,  9.99967587e-02,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.60700971e-01,
        -1.90452613e-01, -2.89161407e-19, -4.52677547e-19, -4.90387740e-03,
         5.01305787e-18, -3.20224159e-18, -1.39973793e-34, -3.28008613e-02,
         1.02914494e-01]),
 'act': array([0.10801523, 5.02092998]),
 'fov': array([4.18879017e-05, 1.50000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00]),
 'obs_prime': array([-3.89022412e-22,  2.34378668e-11,  9.99919410e-02,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  2.60668170e-01,
        -1.90349698e-01, -2.93263601e-19,  4.62093750e-08, -4.73260981e-03,
        -5.09095718e-07, -3.16005771e-18,  3.19844162e-20, -3.28008613e-02,
         1.02914494e-01]),


In [6]:
def run_buff_model(buffer_size=1, learn_freq=1, lr=1e-3, d_hidden=64):
    model = nn.Sequential(
        nn.Linear(d_in, d_hidden),
        nn.ReLU(),
        nn.Linear(d_hidden, d_hidden),
        nn.ReLU(),
        nn.Linear(d_hidden, d_obs)
    )

    mujoco_model = MuJoCoDynamics(env, model)
    optim = torch.optim.SGD(mujoco_model.parameters(), lr=lr)

    algo = BufferedModel(buffer_size, env, mujoco_model, optim, mujoco_model.delta_layer.mse_loss)

    info = learn_every_k_single(algo, dataset, learn_freq, print_iter=1500)

    return {'algo': algo, 'mujoco_model': model, 'algo': algo, 'info': info}

In [7]:
def run_buff_osi(buffer_size=1, learn_freq=1, lr=1e-3, h=32, d_hidden_dynamics=64,
                 d_latent=4, d_hidden_shared=32, d_hidden_osi=64):
    model = nn.Sequential(
        nn.Linear(d_in, d_hidden_dynamics),
        nn.ReLU(),
        nn.Linear(d_hidden_dynamics, d_hidden_dynamics),
        nn.ReLU(),
        nn.Linear(d_hidden_dynamics, d_obs)
    )

    mujoco_model = MuJoCoDynamicsFlat(env, model)
    optim = torch.optim.SGD(mujoco_model.parameters(), lr=lr)

    osi_model = OSI(h, d_in, d_latent, d_hidden_shared, d_hidden_osi)
    
    algo = BufferedOSI(buffer_size, env, osi_model, mujoco_model, optim, h)

    info = learn_every_k_single(algo, dataset, learn_freq, print_iter=1500)

    return {'algo': algo, 'mujoco_model': model, 'algo': algo, 'info': info}


In [8]:
# varying buffer size exp
# TODO: vary learn_freq with bbuffer_sizer size?
run = run_buff_osi
infos = []
for buffer_size in [1, 2000]:
    info = run(buffer_size=buffer_size)
    info['buffer_size'] = buffer_size
    infos.append(info)

TypeError: forward() takes 2 positional arguments but 4 were given

In [None]:
with open('infos_small.pickle', 'wb') as f:
    pickle.dump(infos, f)

In [None]:
all_trains = []
all_tests = []
buf_sizes = []

for run_info in infos:
    info = run_info['info']
    train_losses = info['train_losses']
    test_losses = info['test_losses']
    buffer_size = run_info['buffer_size']
    all_trains.append(train_losses)
    all_tests.append(test_losses)
    buf_sizes.append(buffer_size)

In [None]:
%matplotlib inline
start_idx = 2050
xs = np.arange(start_idx, len(all_trains[0]))

plt.title('train loss')
for i, train_loss in enumerate(all_trains):
    plt.plot(xs, train_loss[start_idx:], label=buf_sizes[i])
    plt.legend()
    print(buf_sizes[i], 'mean',np.mean(train_loss[start_idx:]))
    print(buf_sizes[i], 'max',np.max(train_loss[start_idx:]))

plt.show()
    
plt.title('test loss')
for i, test_loss in enumerate(all_tests):
    plt.plot(xs, test_loss[start_idx:], label=buf_sizes[i])
    plt.legend()
    print(buf_sizes[i], 'mean', np.mean(test_loss[start_idx:]))
    print(buf_sizes[i], 'max',np.max(test_loss[start_idx:]))
plt.show()

