In [1]:


from varyingsim.box import BoxEnv
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from copy import deepcopy
%matplotlib widget


In [2]:
class Buffer:
    def __init__(self, buf_size, buf_shapes):
        self.buf_size = buf_size
        self.cur_buf_size = 0
        self.cur_buf_idx = 0

        self.buffers = []
        for shape in buf_shapes:
            self.buffers.append(np.zeros((buf_size, *shape)))

    def add_to_buffer(self, data):
        for datum, buffer in zip(data, self.buffers):
            buffer[self.cur_buf_idx] = datum

        if self.cur_buf_size < self.buf_size:
            self.cur_buf_size += 1
        self.cur_buf_idx = (self.cur_buf_idx + 1) % self.buf_size

    def get_buffers(self):
        return self.buffers

    def _get_batch_range(self, start, stop, batch_size):
        rand_idxs = np.random.choice(np.arange(start, stop), batch_size)
        ret = []
        for buffer in self.buffers:
            ret.append(buffer[rand_idxs])
        return ret
    
    def get_batch(self, batch_size):
        return self._get_batch_range(0, self.cur_buf_size, batch_size)
    
    def __len__(self):
        return self.cur_buf_size
    
    def reset(self):
        self.cur_buf_size = 0
        self.cur_buf_idx = 0

class ContinualSplitBuffer:
    def __init__(self, buf_size, buf_shapes, train_val_ratio):
        self.train_val_ratio = train_val_ratio
        train_size = int(buf_size * train_val_ratio)
        self.train_buf = Buffer(train_size , buf_shapes)
        self.val_buf = Buffer(buf_size - train_size, buf_shapes)
        
    @property
    def val_idx(self):
        return int(self.cur_buf_size * self.train_val_ratio)
    
    
    def add_to_buffer(self, data):
        if np.random.rand() < self.train_val_ratio:
            self.train_buf.add_to_buffer(data)
        else:
            self.val_buf.add_to_buffer(data)
    
    def get_train_batch(self, batch_size):
        return self.train_buf.get_batch(batch_size)
    
    def get_val_batch(self, batch_size):
        return self.val_buf.get_batch(batch_size)
    
    def __len__(self):
        return len(self.val_buf) + len(self.train_buf)
    
    def reset(self):
        self.val_buf.reset()
        self.train_buf.reset()
    
#     def get_train_batch(self, batch_size):
#         return self._get_batch_range(0, self.val_idx, batch_size)
    
#     def get_val_batch(self, batch_size):
#         return self._get_batch_range(self.val_idx, self.cur_buf_size, batch_size)
    
    
class Learn():

    def __init__(self, model, optim, loss_fn):
        self.model = model
        self.optim = optim
        self.loss_fn = loss_fn
        self.losses = []
        self.frictions = []
    
    def learn(self, batch, include_friction):
        xy = torch.from_numpy(batch[0]).float()
        xy_prev = torch.from_numpy(batch[1]).float()
        xy_vel = torch.from_numpy(batch[2]).float()
        xy_vel_prev = torch.from_numpy(batch[3]).float()
        act = torch.from_numpy(batch[4]).float()
        friction = torch.from_numpy(batch[5]).float()

        pos_prev = torch.cat([xy_prev, xy_vel_prev], dim=1)
        features = torch.cat([xy_prev, xy_vel_prev, act] + ([friction] if include_friction else []) , dim=1)
        labels = torch.cat([xy, xy_vel], dim=1)

        self.optim.zero_grad()
        y_hat = pos_prev + self.model(features)
        loss = self.loss_fn(y_hat, labels)
        loss.backward()
        self.optim.step()
        return loss.item()

def process(buffer, learner, env, prev_obs, obs, a, t, include_friction):
    xy_prev = prev_obs[0:2]
    xy = obs[0:2]
    xy_vel_prev = prev_obs[3:5]
    xy_vel = obs[3:5]
    friction = env.get_floor_friction()
    learner.frictions.append(friction)


    buffer.add_to_buffer([xy, xy_prev, xy_vel, xy_vel_prev, a] + ([friction] if include_friction else []))
    learn_freq = 16
    batch_size = 64

    if t % learn_freq == 0:
        batch = buffer.get_batch(batch_size)
        loss = learner.learn(batch, include_friction)
        learner.losses.append(loss)


In [3]:

def set_mass_step(env, t):
    if t // 2000 % 2 == 0:
        env.set_mass(8.0)
    else:
        env.set_mass(1.0)
        
def set_mass_step(env, t):
    env.set_mass(8.0)

def set_friction_sin(env, t, scale=213):
    env.set_floor_friction(np.sin(t / scale) * 0.15 + 1.15)
    
def set_friction_step(env, t):
    if t // 123 % 2 == 0:
        env.set_floor_friction(1.0)
    else:
        env.set_floor_friction(1.3)

def set_friction_rand(env, t):
    fric_range = [0.8, 1.4]
    fric_change_prob = 0.05
    if np.random.rand() < fric_change_prob:
        env.set_floor_friction(np.random.uniform(fric_range[0], fric_range[1]))

def set_nothing(env, t):
    pass

In [4]:
param_fn = set_friction_sin

In [5]:
shapes = [(2,)] * 5 + [(1,)]
T = 100000

In [6]:
def new_model(d_in, d_out, d_hidden, nonlin = nn.ReLU):
    return nn.Sequential(
    nn.Linear(d_in, d_hidden),
    nonlin(),
    nn.Linear(d_hidden, d_hidden),
    nonlin(),
    nn.Linear(d_hidden, d_hidden),
    nonlin(),
    nn.Linear(d_hidden, d_out))

In [7]:
def run_exp(set_param_fn, buf_size, include_friction):
    
    d_in = 6 + include_friction
    d_out = 4
    d_hidden = 256
    lr = 1e-4
    nonlin = nn.ReLU
    
    buffer = Buffer(buf_size, shapes)
    env = BoxEnv(set_param_fn=set_param_fn)

    model = new_model(d_in, d_out, d_hidden, nonlin)

    optim = torch.optim.Adam(model.parameters(), lr=lr)

    learner = Learn(model, optim, F.mse_loss)

    obs = env.reset()
    t = 0
    scale = 100
    
    qpos = []
    qvel = []
    actions = []
    frictions = []
    
    while t < T:
        a = [np.sin(t / scale), np.cos(t / scale)]
        prev_obs = obs
        qpos.append(env.sim.data.qpos.copy())
        qvel.append(env.sim.data.qvel.copy())
        actions.append(a)
        obs, rew, done, info = env.step(a)
        frictions.append(env.get_floor_friction())

        process(buffer, learner, env, prev_obs, obs, a, t, include_friction)
        t += 1
    
    return learner, buffer, {'qpos': qpos, 'qvel': qvel, 'actions': actions, 'frictions': frictions}

def generate_data(set_param_fn, T_start, H):
    env = BoxEnv(set_param_fn=set_param_fn)

    obs = env.reset()
    t = 0
    scale = 100
    
    qpos = []
    qvel = []
    actions = []
    frictions = []
    
    while t < T_start + H:
        a = [np.sin(t / scale), np.cos(t / scale)]
        
        if t >= T_start:
            prev_obs = obs
            qpos.append(env.sim.data.qpos.copy())
            qvel.append(env.sim.data.qvel.copy())
            actions.append(a)
            obs, rew, done, info = env.step(a)
            frictions.append(env.get_floor_friction())
        t += 1
    
    return {'qpos': qpos, 'qvel': qvel, 'actions': actions, 'frictions': frictions}

def fill_buffer(set_param_fn, buf_size, include_friction):
    env = BoxEnv(set_param_fn=set_param_fn)
    buffer = ContinualSplitBuffer(buf_size, shapes, 0.9)
    obs = env.reset()
    t = 0
    scale = 100
    
    while t < buf_size:
        a = [np.sin(t / scale), np.cos(t / scale)]
        
        prev_obs = obs
        obs, rew, done, info = env.step(a)

        xy_prev = prev_obs[0:2]
        xy = obs[0:2]
        xy_vel_prev = prev_obs[3:5]
        xy_vel = obs[3:5]
        friction = env.get_floor_friction()
        buffer.add_to_buffer([xy, xy_prev, xy_vel, xy_vel_prev, a] + ([friction] if include_friction else []))
            
        t += 1
    
    return buffer

In [8]:
# large_buff_losses = learner.losses
large_include_learner, learner_buffer, states = run_exp(param_fn, T, True)
large_buff_losses = large_include_learner.losses

In [9]:
# small_buff_losses = learner.losses
small_include_learner, _, _ = run_exp(param_fn, 256, True)
small_buff_losses = small_include_learner.losses

In [10]:
# large_no_include = learner.losses
large_no_include_learner, _, _ = run_exp(param_fn, T, False)
large_no_include = large_no_include_learner.losses

In [11]:
small_no_include_learner, _, _ = run_exp(param_fn, 256, False)
small_no_include = small_no_include_learner.losses

In [12]:
%matplotlib widget
start_iter = 25
xs = np.arange(len(small_buff_losses))[start_iter:] * 16

frics_scaled = np.array(small_no_include_learner.frictions[::16][start_iter:]) / 1000

plt.plot(xs, small_buff_losses[start_iter:], label='small buff')
plt.plot(xs, large_buff_losses[start_iter:], label='large buff')
plt.plot(xs, large_no_include[start_iter:], label='large buff no fric')
# plt.plot(xs, small_no_include[start_iter:], label='small buff no fric')
plt.plot(xs, frics_scaled, label='friction')

plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Train i.i.d. model on the data we've seen

In [13]:
include_friction = True
buffer = learner_buffer

def train_iid_model(include_friction):
    d_in = 6 + include_friction
    d_out = 4
    d_hidden = 256
    lr = 1e-3
    model = new_model(d_in, d_out, d_hidden)
    lr = 1e-3
    optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.0)
    learner = Learn(model, optim, F.mse_loss)
    batch_size = 64
    num_epochs = 1
    
    sl_losses = []
    for i in range(0, T * num_epochs, 16):
        batch = buffer.get_batch(batch_size)
        loss = learner.learn(batch, include_friction)
    #     print(i, loss)
        sl_losses.append(loss)
    return model, sl_losses

model_iid_no_fric, no_fric_losses = train_iid_model(False)
model_iid_fric, fric_losses = train_iid_model(True)

In [14]:
%matplotlib widget
start_iter = 500
xs = np.arange(len(small_buff_losses))[start_iter:] * 16
frics_scaled = np.array(small_no_include_learner.frictions[::16][start_iter:]) / 1000
sl_losses = fric_losses if include_friction else no_fric_losses
xs_sl = np.arange(len(sl_losses))[start_iter:] * 16

# plt.plot(xs, small_buff_losses[start_iter:], label='small buff')
plt.plot(xs_sl, sl_losses[start_iter:], label='SL')
# plt.plot(xs, large_no_include[start_iter:], label='large buff no fric')
plt.plot(xs, small_no_include[start_iter:], label='small buff no fric')
plt.plot(xs, frics_scaled, label='friction')

plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
len(large_no_include_learner.frictions)

100000

In [16]:
def state_to_batch(qpos, qvel, act, friction=None):
    xy = qpos[0:2]
    xy_vel = qvel[0:2]
    if friction:
        return np.concatenate([xy, xy_vel, act, friction])
    else:
        return np.concatenate([xy, xy_vel, act])
    
def state_to_xy_vel(qpos, qvel):
    xy = qpos[0:2]
    xy_vel = qvel[0:2]
    return np.concatenate([xy, xy_vel])

def obs_to_xy_vel(obs):
    xy = obs[0:2]
    xy_vel = obs[3:5]
    return xy, xy_vel

def torch_to_xy_vel(output):
    xy = output[0:2]
    xy_vel = output[2:4]
    return xy, xy_vel

In [17]:
def rollout_model_single(model, include_friction, qpos, qvel, actions, frictions):
    idx = 0
    def set_param_fn(env, t):
        env.set_floor_friction(frictions[idx])
    env = BoxEnv(set_param_fn=set_param_fn)
    
    sim_states_prime = []
    model_states_prime = []
    for fric, act in zip(frictions, actions):
        env.set_state(qpos[idx], qvel[idx])
        obs, rew, done, info = env.step(actions[idx])
        sim_states_prime.append(obs_to_xy_vel(obs))
        
        model_state = state_to_batch(qpos[idx], qvel[idx], act, friction=[fric] if include_friction else None)

        model_torch = torch.from_numpy(model_state).float()
        offset = model(model_torch)
        model_pred = state_to_xy_vel(qpos[idx], qvel[idx]) + offset.detach().numpy()
        model_states_prime.append(torch_to_xy_vel(model_pred))
        
        idx += 1
    return sim_states_prime, model_states_prime

def rollout_model(model, include_friction, init_qpos, init_qvel, actions, frictions):
    idx = 0
    def set_param_fn(env, t):
        env.set_floor_friction(frictions[idx])
    env = BoxEnv(set_param_fn=set_param_fn)
    env.set_state(init_qpos, init_qvel)

    sim_states_prime = []
    
    for fric, act in zip(frictions, actions):
        obs, rew, done, info = env.step(actions[idx])
        sim_states_prime.append(obs_to_xy_vel(obs))
        idx += 1
        
    model_states_prime = []
    
    model_state = torch.cat([torch.from_numpy(init_qpos)[:2], torch.from_numpy(init_qvel)[:2]]).float()
    for fric, act in zip(frictions, actions):
        if not include_friction:
            model_input = torch.cat([model_state, torch.tensor(act).float()])
        else:
            model_input = torch.cat([model_state, torch.tensor(act).float(), torch.tensor([fric]).float()])

        offset = model(model_input)
        model_pred = model_state[:4] + offset
        xy, xy_vel = torch_to_xy_vel(model_pred)
        model_states_prime.append((xy.detach().numpy(), xy_vel.detach().numpy()))
        model_state = model_pred.detach().clone()
        
    return sim_states_prime, model_states_prime

def vis_rollouts2(rollout1, rollout2, vscale=1000, c1='r', c2='b'):
    plt.scatter(rollout1[:,0, 0], rollout1[:,0, 1], marker='.', color=c1)
    plt.scatter(rollout2[:,0, 0], rollout2[:,0, 1], marker='.', color=c2)
    
    for x, y, vx, vy in zip(rollout1[:,0, 0], rollout1[:,0, 1], rollout1[:,1, 0], rollout1[:,1, 1]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c1)
        
    for x, y, vx, vy in zip(rollout2[:,0, 0], rollout2[:,0, 1], rollout2[:,1, 0], rollout2[:,1, 1]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c2)
    
    plt.show()
    
def vis_rollouts(rollout1, rollout2, rollout3, vscale=1000, c1='r', c2='b', c3='g', labels=['traj1', 'traj2', 'traj3']):
    plt.scatter(rollout1[:,0, 0], rollout1[:,0, 1], marker='.', color=c1, label=labels[0])
    plt.scatter(rollout2[:,0, 0], rollout2[:,0, 1], marker='.', color=c2, label=labels[1])
    plt.scatter(rollout3[:,0, 0], rollout3[:,0, 1], marker='.', color=c3, label=labels[2])
    
    for x, y, vx, vy in zip(rollout1[:,0, 0], rollout1[:,0, 1], rollout1[:,1, 0], rollout1[:,1, 1]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c1)
        
    for x, y, vx, vy in zip(rollout2[:,0, 0], rollout2[:,0, 1], rollout2[:,1, 0], rollout2[:,1, 1]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c2)
    
    for x, y, vx, vy in zip(rollout3[:,0, 0], rollout3[:,0, 1], rollout3[:,1, 0], rollout3[:,1, 1]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c3)
    plt.legend()
    plt.show()

In [18]:
sim_states_prime, model_states_prime = rollout_model_single(small_no_include_learner.model, False, states['qpos'], states['qvel'], states['actions'], states['frictions'])
seq_ssp = np.array(sim_states_prime)
seq_msp = np.array(model_states_prime)

In [19]:
sim_states_prime, model_states_prime = rollout_model_single(model_iid_no_fric, False, states['qpos'], states['qvel'], states['actions'], states['frictions'])
iid_ssp = np.array(sim_states_prime)
iid_msp = np.array(model_states_prime)

In [20]:
start_idx = 800
H = 2000
%matplotlib widget
vis_rollouts(seq_ssp[start_idx:start_idx+H], seq_msp[start_idx:start_idx+H], iid_msp[start_idx:start_idx+H], labels=['sim', 'seq', 'iid'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [21]:
test_T_start = T
test_H = 3000

# learner = small_no_include_learner
# learner = large_no_include_learner
learner = large_include_learner

test_data = generate_data(param_fn, test_T_start, test_H)
sim_states_prime, model_states_prime = rollout_model_single(learner.model, True, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
seq_ssp = np.array(sim_states_prime)
seq_msp = np.array(model_states_prime)
_, model_states_prime = rollout_model_single(model_iid_fric, True, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
iid_msp = np.array(model_states_prime)
model_iid_fric
seq_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - seq_msp[:, 0]) **2)))
iid_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - iid_msp[:, 0]) **2)))
print(seq_x_err, iid_x_err)
# 0.3001861633402057 0.08439425154122533

0.14304274885636736 0.10107963924621712


In [22]:
start_idx = 0
H = test_H
%matplotlib widget
vis_rollouts(seq_ssp[start_idx:start_idx+H], seq_msp[start_idx:start_idx+H], iid_msp[start_idx:start_idx+H], labels=['sim', 'seq', 'iid'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
test_T_start = T + 23544
test_H = 2000

# learner = small_no_include_learner
# learner = large_no_include_learner
learner = large_include_learner

trunc_idx = 500

test_data = generate_data(param_fn, test_T_start, test_H)
sim_states_prime, model_states_prime = rollout_model_single(model_iid_no_fric, False, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
seq_ssp = np.array(sim_states_prime)[trunc_idx:]
no_fric_msp = np.array(model_states_prime)[trunc_idx:]
_, model_states_prime = rollout_model_single(model_iid_fric, True, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
fric_msp = np.array(model_states_prime)[trunc_idx:]

no_fric_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - no_fric_msp[:, 0]) **2)))
fric_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - fric_msp[:, 0]) **2)))
print(no_fric_x_err, fric_x_err)


0.06977142196086375 0.04536650428627988


In [24]:
start_idx = 0
H = test_H
%matplotlib widget
vis_rollouts(seq_ssp[start_idx:start_idx+H], no_fric_msp[start_idx:start_idx+H], fric_msp[start_idx:start_idx+H], labels=['sim', 'no fric', 'fric'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### iid with validation

In [25]:
N = 100000
buffer = fill_buffer(param_fn, N, True)

def train_iid_model_param(buffer, include_friction, d_hidden=256, lr=1e-3, weight_decay=1e-6, batch_size=64, num_iters=10000, valid_interval=2000):
    d_in = 6 + include_friction
    d_out = 4
    model = new_model(d_in, d_out, d_hidden)
    optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    learner = Learn(model, optim, F.mse_loss)
    num_epochs = 1
    
    sl_losses = []
    valid_models = []
    valid_losses = []
    for i in range(0, num_iters):
        batch = buffer.get_train_batch(batch_size)
        loss = learner.learn(batch, include_friction)
        
        if i % valid_interval == 0:
            valid_batch = buffer.get_val_batch(buffer.val_buf.cur_buf_size)
            xy = torch.from_numpy(valid_batch[0]).float()
            xy_prev = torch.from_numpy(valid_batch[1]).float()
            xy_vel = torch.from_numpy(valid_batch[2]).float()
            xy_vel_prev = torch.from_numpy(valid_batch[3]).float()
            act = torch.from_numpy(valid_batch[4]).float()
            friction = torch.from_numpy(valid_batch[5]).float()

            pos_prev = torch.cat([xy_prev, xy_vel_prev], dim=1)
            features = torch.cat([xy_prev, xy_vel_prev, act] + ([friction] if include_friction else []) , dim=1)
            labels = torch.cat([xy, xy_vel], dim=1)

            y_hat = pos_prev + model(features)
            valid_loss = F.mse_loss(y_hat, labels)
            valid_losses.append(valid_loss.item())
            valid_models.append(deepcopy(model))
            print(i, loss, valid_loss.item())
            
        sl_losses.append(loss)
    return model, sl_losses, valid_losses, valid_models

def train_many_params(buffer, include_friction, params):
    best_models = []
    models = []
    vals = []
    infos = []
    for param in params:
        print(param)
        model, sl_losses, valid_losses, valid_models = train_iid_model_param(buffer, include_friction, **param)
        best_val_idx = np.argmin(valid_losses)
        best_model = valid_models[best_val_idx]
        best_models.append(best_model)
        models.append(model)
        vals.append(valid_losses[best_val_idx])
        info = deepcopy(param)
        info['stop_idx'] = best_val_idx * info['valid_interval']
        infos.append(info)
    return models, best_models, vals, infos

base_params = dict(d_hidden=256, lr=1e-4, weight_decay=0.0, batch_size=64, num_iters=60000, valid_interval=5000)

def run_cv(buffer, include_friction):
    params = [base_params]
    # lr
    lr_1 = deepcopy(base_params)
    lr_1['lr'] = 1e-2
    params.append(lr_1)
    lr_2 = deepcopy(base_params)
    lr_2['lr'] = 1e-4
    params.append(lr_2)
    # wd
    wd_1 = deepcopy(base_params)
    wd_1['weight_decay'] = 1e-6
    params.append(wd_1)
    wd_2 = deepcopy(base_params)
    wd_2['weight_decay'] = 1e-3
    params.append(wd_2)
    # batch size
    bs_1 = deepcopy(base_params)
    bs_1['batch_size'] = 32
    params.append(bs_1)
    bs_2 = deepcopy(base_params)
    bs_2['batch_size'] = 128
    params.append(bs_2)
    
    return train_many_params(buffer, include_friction, params)

In [26]:
# fric_models, fric_best_models, fric_vals, fric_infos = run_cv(buffer, True)
# no_fric_models, no_fric_best_models, no_fric_vals, no_fric_infos = run_cv(buffer, False)

In [27]:
# best_fric_idx = np.argmin(fric_vals)
# best_no_fric_idx = np.argmin(no_fric_vals)
# best_fric_model = fric_models[best_fric_idx]
# best_no_fric_model = no_fric_models[best_no_fric_idx]
# print(fric_vals[best_fric_idx], no_fric_vals[best_no_fric_idx])
# print('best fric', fric_infos[best_fric_idx])
# print('best no fric', no_fric_infos[best_no_fric_idx])

### For quick retraining of models 

In [28]:
base_params = dict(d_hidden=256, lr=1e-4, weight_decay=0.0, batch_size=64, num_iters=60000, valid_interval=5000)

model, sl_losses, fric_vals, fric_models = train_iid_model_param(buffer, True, **base_params)
best_fric_idx = np.argmin(fric_vals)
best_fric_model = fric_models[best_fric_idx]

model, sl_losses, no_fric_vals, no_fric_models = train_iid_model_param(buffer, False, **base_params)
best_no_fric_idx = np.argmin(no_fric_vals)
best_no_fric_model = no_fric_models[best_no_fric_idx]

print(fric_vals[best_fric_idx], no_fric_vals[best_no_fric_idx])

0 0.0031803834717720747 0.002321382286027074
5000 0.00019503076327964664 0.00020559843687806278
10000 0.00018478400306776166 0.00019718454859685153
15000 0.00017277103324886411 0.00016304194286931306
20000 0.0001336237764917314 0.00014334665320347995
25000 0.00014024933625478297 0.00015149022510740906
30000 0.00019689292821567506 0.0001466923567932099
35000 0.00017193678650073707 0.00014605376054532826
40000 0.00021374074276536703 0.00016108837735373527
45000 0.00014638109132647514 0.00014709810784552246
50000 0.0002198463334934786 0.00014388535055331886
55000 0.00010754319373518229 0.00014504608407150954
0 0.006560173816978931 0.005136239808052778
5000 0.0001520379155408591 0.00020807882538065314
10000 0.00018227485998068005 0.0002053323114523664
15000 0.00019080998026765883 0.00017589540220797062
20000 0.00024909223429858685 0.00017972012574318796
25000 0.00021862657740712166 0.00016616580251138657
30000 0.0002055289805866778 0.00016914571460802108
35000 0.00022138954955153167 0.0001

In [29]:
buffer.get_train_batch(1)

[array([[ 0.40416415, -0.38383759]]),
 array([[ 0.40727045, -0.38285544]]),
 array([[-1.54755801, -0.48495638]]),
 array([[-1.54224651, -0.4971875 ]]),
 array([[-0.87214636,  0.48924506]]),
 array([[1.00499471]])]

In [30]:
test_T_start = N + 12345
test_H = 15000

trunc_idx = 0

def set_fric_const(env, t):
    env.set_floor_friction(1.4)

# param_fn = set_friction_sin
# param_fn = set_fric_const

test_data = generate_data(param_fn, test_T_start, test_H)
sim_states_prime, model_states_prime = rollout_model_single(best_no_fric_model, False, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
seq_ssp = np.array(sim_states_prime)[trunc_idx:]
no_fric_msp = np.array(model_states_prime)[trunc_idx:]
_, model_states_prime = rollout_model_single(best_fric_model, True, test_data['qpos'], test_data['qvel'], test_data['actions'], test_data['frictions'])
fric_msp = np.array(model_states_prime)[trunc_idx:]

no_fric_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - no_fric_msp[:, 0]) **2)))
fric_x_err = np.average(np.sqrt(np.sum((seq_ssp[:, 0] - fric_msp[:, 0]) **2)))
print(no_fric_x_err, fric_x_err)


0.09501904218349816 0.16255987130706637


In [31]:
start_idx = 0
H = test_H
%matplotlib widget
vis_rollouts(seq_ssp[start_idx:start_idx+H], no_fric_msp[start_idx:start_idx+H], fric_msp[start_idx:start_idx+H], labels=['sim', 'no fric', 'fric'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Now let's look at the sequential models vs the ones trained iid

In [32]:
def evaluate_models(model, set_param_fn, include_friction, T_start, H):
    env = BoxEnv(set_param_fn=set_param_fn)
    obs = env.reset()
    t = 0
    scale = 100
    losses = []
    frictions = []
    features_all, labels_all = [], []
    y_hats = []
    xy_losses = []
    xy_vel_losses = []
    actions = []
    while t < T_start + H:
        
        a = np.array([np.sin(t / scale), np.cos(t / scale)])
        prev_obs = deepcopy(obs)
        obs, rew, done, info = env.step(a)
        actions.append(a)
        
        if t >= T_start:
            xy = torch.from_numpy(obs[0:2]).float()
            xy_prev = torch.from_numpy(prev_obs[0:2]).float()
            xy_vel = torch.from_numpy(obs[3:5]).float()
            xy_vel_prev = torch.from_numpy(prev_obs[3:5]).float()
            act = torch.from_numpy(a).float()
            friction = torch.from_numpy(np.array([env.get_floor_friction()])).float()

            frictions.append(env.get_floor_friction())
            pos_prev = torch.cat([xy_prev, xy_vel_prev])
            if include_friction:
                features = torch.cat([xy_prev, xy_vel_prev, act, friction])
            else:
                features = torch.cat([xy_prev, xy_vel_prev, act])

            labels = torch.cat([xy, xy_vel])

            features_all.append(features.numpy().copy())
            labels_all.append(labels.numpy().copy())

            y_hat = pos_prev + model(features)

            y_hats.append(y_hat.detach().numpy().copy())
            loss = F.mse_loss(y_hat, labels).item()
            losses.append(loss)
            xy_losses.append(F.mse_loss(y_hat[:2], labels[:2]).item())
            xy_vel_losses.append(F.mse_loss(y_hat[2:], labels[2:]).item())
            
        t += 1
    return losses, frictions, features_all, labels_all, y_hats, xy_losses, xy_vel_losses, actions

In [33]:
T_start  = 0
H = 10000
# param_fn = set_friction_sin
iid_fric_losses, frictions, features_all, labels_all, y_hats, iid_fric_xy_losses, iid_fric_xy_vel_losses, iid_actions = evaluate_models(best_fric_model, param_fn, True, T_start, H)
iid_no_fric_losses, _,_,_,_, iid_no_fric_xy_losses, iid_no_fric_xy_vel_losses,_ = evaluate_models(best_no_fric_model, param_fn, False, T_start, H)
seq_fric_losses, _,_,_,_, seq_fric_xy_losses, seq_fric_xy_vel_losses,_ = evaluate_models(large_include_learner.model, param_fn, True, T_start, H)
seq_no_fric_losses, _,_,_,_, seq_no_fric_xy_losses, seq_no_fric_xy_vel_losses,_ = evaluate_models(large_no_include_learner.model, param_fn, False, T_start, H)
seq_no_fric_losses_small, _,_,_,_, seq_no_fric_xy_losses_small, seq_no_fric_xy_vel_losses_small,_ = evaluate_models(small_no_include_learner.model, param_fn, False, T_start, H)

true_ys = np.array(labels_all)
pred_ys = np.array(y_hats)

In [34]:
def vis_rollouts3(rollout1, rollout2, vscale=1000, c1='r', c2='b'):
    plt.scatter(rollout1[:,0], rollout1[:,1], marker='.', color=c1)
    plt.scatter(rollout2[:,0], rollout2[:,1], marker='.', color=c2)
    
    for x, y, vx, vy in zip(rollout1[:,0], rollout1[:,1], rollout1[:,2], rollout1[:,3]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c1)
        
    for x, y, vx, vy in zip(rollout2[:,0], rollout2[:,1], rollout2[:,2], rollout2[:,3]):
        plt.arrow(x, y, vx / vscale, vy / vscale, width=1e-4, color=c2)
    
    plt.show()

In [35]:
print('iid fric', np.average(iid_fric_losses))
print('iid no fric', np.average(iid_no_fric_losses))
print('seq fric', np.average(seq_fric_losses))
print('seq no fric', np.average(seq_no_fric_losses))
print('seq no fric small buf', np.average(seq_no_fric_losses_small))

iid fric 0.0001344313377417616
iid no fric 0.00014676076289391342
seq fric 0.0001780582777878923
seq no fric 0.0001936906676717996
seq no fric small buf 0.00023260287857224853


In [43]:
%matplotlib widget
xs = np.arange(len(iid_fric_losses))
frictions = np.array(frictions)
plt.plot(xs, iid_fric_losses, label='iid fric')
plt.plot(xs, seq_fric_losses, label='seq fric')
plt.plot(xs, seq_no_fric_losses, label='seq no fric')
plt.plot(xs, seq_no_fric_losses_small, label='seq no fric small')
plt.plot(xs, frictions/ 500.0)

plt.xlabel('iterations')
plt.ylabel('prediction error')
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [42]:
start_T = 290
H = 30

%matplotlib widget
vis_rollouts3(true_ys[start_T:start_T+H], pred_ys[start_T:start_T+H])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
%matplotlib widget
xs = np.arange(len(iid_fric_losses))
frictions = np.array(frictions)
acts = np.array(iid_actions)
labs_np = np.array(labels_all)
y_hats_np = np.array(y_hats)
plt.plot(xs, iid_fric_losses, label='iid fric')
plt.plot(xs, iid_fric_xy_losses, label='iid fric pos')
plt.plot(xs, iid_fric_xy_vel_losses, label='iid fric vel')
plt.plot(xs, frictions/ 500.0, label='friction')
plt.plot(xs, acts[:, 0] / 250.0, label='actx')
plt.plot(xs, acts[:, 1] / 250.0, label='acty')
plt.plot(xs, labs_np[:, 0] / 250.0, label='x')
plt.plot(xs, labs_np[:, 1] / 250.0, label='y')
plt.plot(xs, labs_np[:, 2] / 250.0, label='vx')
plt.plot(xs, labs_np[:, 3] / 250.0, label='vy')
plt.plot(xs, y_hats_np[:, 2] / 250.0, label='vx_hat')
plt.plot(xs, y_hats_np[:, 3] / 250.0, label='vy_hat')
plt.xlabel('iterations')
plt.ylabel('prediction error')
plt.legend()
plt.show()

In [39]:
idx = 2712
print(labs_np[idx])
print(y_hats_np[idx])
print()
print(labs_np[idx+1])
print(y_hats_np[idx+1])
print('error shifted\t', np.mean(np.linalg.norm(labs_np[:-1] - y_hats_np[1:], axis=1)))
print('error orig\t', np.mean(np.linalg.norm(labs_np - y_hats_np, axis=1)))

[0.21586235 0.21057317 1.3629992  0.5525674 ]
[0.21565682 0.21040165 1.3917147  0.5513068 ]

[0.2186111  0.21166795 1.3857601  0.5422261 ]
[0.21841662 0.21160564 1.3839498  0.5401215 ]
error shifted	 0.019502629
error orig	 0.017273473


In [70]:
features = np.array(features_all)
labels = np.array(labels_all)

print(torch.from_numpy(features[idx][:4]) + best_fric_model(torch.from_numpy(features[idx])))
print(torch.from_numpy(features[idx+1][:4]) + best_fric_model(torch.from_numpy(features[idx+1])))
print(torch.from_numpy(features[idx+2][:4]) + best_fric_model(torch.from_numpy(features[idx+2])))
print(features[idx:idx+3, :4])

tensor([0.2157, 0.2104, 1.3917, 0.5513], grad_fn=<AddBackward0>)
tensor([0.2184, 0.2116, 1.3839, 0.5401], grad_fn=<AddBackward0>)
tensor([0.2211, 0.2126, 1.3908, 0.5304], grad_fn=<AddBackward0>)
[[0.21306922 0.20945792 1.4001273  0.5626806 ]
 [0.21586235 0.21057317 1.3629992  0.5525674 ]
 [0.2186111  0.21166795 1.3857601  0.5422261 ]]


In [71]:
batch = buffer.get_train_batch(16)
xy = torch.from_numpy(batch[0]).float()
xy_prev = torch.from_numpy(batch[1]).float()
xy_vel = torch.from_numpy(batch[2]).float()
xy_vel_prev = torch.from_numpy(batch[3]).float()
act = torch.from_numpy(batch[4]).float()
friction = torch.from_numpy(batch[5]).float()

pos_prev = torch.cat([xy_prev, xy_vel_prev], dim=1)
features = torch.cat([xy_prev, xy_vel_prev, act] + ([friction] if include_friction else []) , dim=1)
labels = torch.cat([xy, xy_vel], dim=1)

y_hat = pos_prev + best_fric_model(features)
loss = torch.mean(F.mse_loss(y_hat, labels,reduction='none'), dim=1)
worst_loss = torch.argmax(loss, dim=0)
print(loss[worst_loss])
print(xy[worst_loss])
print(xy_prev[worst_loss])
print(xy_vel[worst_loss])
print(xy_vel_prev[worst_loss])
print(act[worst_loss])

tensor(0.0005, grad_fn=<SelectBackward>)
tensor([ 0.4324, -0.4401])
tensor([ 0.4350, -0.4386])
tensor([-1.2471, -0.7176])
tensor([-1.3119, -0.7344])
tensor([-0.9949,  0.1005])


In [72]:
big_errors = np.argwhere(np.array(iid_fric_losses) > 0.003)
idx = 445 -2
print('x\t', features[idx])
print('y\t', labels[idx])
print('y_hat\t', y_hats[idx])
print(F.mse_loss(torch.tensor(labels[idx]), torch.tensor(y_hats[idx])))
print()
print('x\t', features[idx + 1])
print('y\t', labels[idx + 1])
print('y_hat\t', y_hats[idx + 1])
print(F.mse_loss(torch.tensor(labels[idx+1]), torch.tensor(y_hats[idx+1])))

IndexError: index 443 is out of bounds for dimension 0 with size 16

In [None]:
big_errors.shape

## Visualize a rollout using learned model vs sim

In [63]:
# start_idx = 60000 + 2000 + 75
# start_idx = 1000+200
start_idx = 12324
H = 50
init_qpos = states['qpos'][start_idx]
init_qvel = states['qvel'][start_idx]
actions = states['actions'][start_idx:start_idx+H]
fricts = states['frictions'][start_idx:start_idx+H]
# frictions = [1.1] * H

sim_states_prime, fric_model_states_prime = rollout_model(best_fric_model, True, init_qpos, init_qvel, actions, fricts)
_, no_fric_model_states_prime = rollout_model(best_no_fric_model, False, init_qpos, init_qvel, actions, frictions)
ssp = np.array(sim_states_prime)
fmsp = np.array(fric_model_states_prime)
nfmsp = np.array(no_fric_model_states_prime)
print(frictions)

[1.14929578 1.15       1.15070422 ... 1.17896378 1.17827249 1.17758058]


In [64]:
%matplotlib widget
vis_rollouts(ssp, fmsp, nfmsp, labels=['sim', 'fric iid', 'no fric iid'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [69]:
_, buf = rollout_model(small_include_learner.model, True, init_qpos, init_qvel, actions, frictions)
# _, buf = rollout_model(small_no_include_learner.model, False, init_qpos, init_qvel, actions, frictions)
# _, buf = rollout_model(large_include_learner.model, True, init_qpos, init_qvel, actions, frictions)
# _, buf = rollout_model(large_no_include_learner.model, False, init_qpos, init_qvel, actions, frictions)

nfsb = np.array(buf)
%matplotlib widget
vis_rollouts(ssp, nfsb, nfmsp, labels=['sim', 'no fric seq small', 'no fric iid'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
min_vel_idx = np.argmin(np.linalg.norm(seq_ssp[:, 1], axis=1))
print(seq_ssp[min_vel_idx, 1])
print(seq_ssp[:, 1])
print(np.linalg.norm(seq_ssp[:, 1], axis=1))
print(np.linalg.norm(seq_ssp[min_vel_idx, 1]))

In [None]:
# test_data['frictions']

In [None]:
asdf()

In [None]:


# model = new_model()
lr=1e-4

optim = torch.optim.Adam(model.parameters(), lr=lr)

learner = Learn(model, optim, F.mse_loss)

obs = env.reset()
t = 0
scale = 100
while t < T:
    a = [np.sin(t / scale), np.cos(t / scale)]
    prev_obs = obs
    obs, rew, done, info = env.step(a)

    process(buffer, learner, env, prev_obs, obs, a, t)
    t += 1

In [None]:
trains = []
vals = []

In [None]:
for i in range(1000):
    trains.append(buffer.get_train_batch(40*3))

In [None]:
for i in range(1000):
    vals.append(buffer.get_val_batch(40*3))

In [None]:
batch = buffer.get_train_batch(2)

In [None]:
batch[0]

In [None]:
all_first = []
for batch in trains:
    all_first.append(batch[0])

In [None]:
all_trains = np.concatenate(all_first)

In [None]:
unique_trains = np.unique(all_trains, axis=0)

In [None]:
all_vals = np.concatenate(all_first)

In [None]:
unique_vals = np.unique(all_vals, axis=0)

In [None]:
unique_trains.shape

In [None]:
unique_vals.shape

In [None]:
not set(list(unique_trains[:,0])).isdisjoint(set((unique_vals[:,0])))