In [27]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

from lib import dpf, panda_models, panda_datasets
from lib.utils import file_utils, torch_utils, misc_utils

print(torch.__version__, np.__version__)

1.3.1 1.16.4


In [33]:
# Create model & training buddy
#experiment_name = "push_pull_no_prev"
#experiment_name = "oof"
#experiment_name = "blahblahtest"
#experiment_name = "dynamics_fixed"
#experiment_name = "dynamics_fixed2"
#experiment_name = "delta_test"
#experiment_name = "alt_model_again"
experiment_name = "bigger_model"

dynamics_model = panda_models.PandaAltDynamicsModel(units=32)
measurement_model = panda_models.PandaMeasurementModel(units=32)

pf_model = dpf.ParticleFilterNetwork(dynamics_model, measurement_model)

buddy = torch_utils.TrainingBuddy(
    experiment_name,
    pf_model,
    optimizer_names=["e2e", "dynamics", "measurement"],
    log_dir="logs/pf",
    checkpoint_dir="checkpoints/pf"
)

Using device: cuda
No checkpoint found


In [37]:
dynamics_trainset = panda_datasets.PandaSimpleDataset(
    "data/pull-test-small.hdf5",
    #"data/push-test-small.hdf5",
    use_proprioception=True,
    use_vision=True,
    vision_interval=1
)

Parsed data: 6125 active, 1595 inactive
Keeping: 1595


In [38]:
# Train dynamics only
def train_dynamics(dynamics_model, dataloader, log_interval=10):
    # Train for 1 epoch
    for batch_idx, batch in enumerate(dataloader):
        # Transfer to GPU and pull out batch data
        batch_gpu = torch_utils.to_device(batch, buddy._device)
        prev_states, unused_observations, controls, new_states = batch_gpu

        #prev_states += torch_utils.to_torch(np.random.normal(0, 0.01, size=prev_states.shape), device=buddy._device)
        prev_states = prev_states[:,np.newaxis,:]
        new_states_pred = dynamics_model(prev_states, controls, noisy=False)
        new_states_pred = new_states_pred.squeeze(dim=1)
        
        
        mse_pos, mse_vel = torch.mean((new_states_pred - new_states) ** 2, axis=0)
        loss = (mse_pos + mse_vel) / 2

        buddy.minimize(loss, optimizer_name="dynamics")
        
        if buddy._steps % log_interval == 0:
            buddy.log("dynamics/training loss", loss)
            buddy.log("dynamics/MSE position", mse_pos)
            buddy.log("dynamics/MSE velocity", mse_vel)

            label_std = new_states.std(dim=0)
            assert label_std.shape == (2,)
            buddy.log("dynamics/Label pos std", label_std[0])
            buddy.log("dynamics/Label vel std", label_std[1])

            pred_std = new_states_pred.std(dim=0)
            assert pred_std.shape == (2,)
            buddy.log("dynamics/Predicted pos std", pred_std[0])
            buddy.log("dynamics/Predicted vel std", pred_std[1])

            label_mean = new_states.mean(dim=0)
            assert label_mean.shape == (2,)
            buddy.log("dynamics/Label pos mean", label_mean[0])
            buddy.log("dynamics/Label vel mean", label_mean[1])

            pred_mean = new_states_pred.mean(dim=0)
            assert pred_mean.shape == (2,)
            buddy.log("dynamics/Predicted pos mean", pred_mean[0])
            buddy.log("dynamics/Predicted vel mean", pred_mean[1])

            print(".", end="")

In [41]:
dataloader = torch.utils.data.DataLoader(dynamics_trainset, batch_size=512, shuffle=True, num_workers=2)

for _ in range(2000):
    print("Training epoch", _)
    train_dynamics(dynamics_model, dataloader)


Training epoch 0
..Training epoch 1
.Training epoch 2
..Training epoch 3
.Saved checkpoint to path: checkpoints/pf/bigger_model-3000.ckpt
.Training epoch 4
.Training epoch 5
..Training epoch 6
.Training epoch 7
..Training epoch 8
..Training epoch 9
.Training epoch 10
..Training epoch 11
.Training epoch 12
..Training epoch 13
..Training epoch 14
.Training epoch 15
..Training epoch 16
.Training epoch 17
..Training epoch 18
..Training epoch 19
.Training epoch 20
..Training epoch 21
.Training epoch 22
..Training epoch 23
..Training epoch 24
.Training epoch 25
..Training epoch 26
.Training epoch 27
..Training epoch 28
..Training epoch 29
.Training epoch 30
..Training epoch 31
.Training epoch 32
..Training epoch 33
..Training epoch 34
.Training epoch 35
..Training epoch 36
.Training epoch 37
..Training epoch 38
..Training epoch 39
.Training epoch 40
..Training epoch 41
.Training epoch 42
..Training epoch 43
..Training epoch 44


KeyboardInterrupt: 

In [48]:
# Load dataset
e2e_trainset = panda_datasets.PandaParticleFilterDataset(
    "data/pull-test-small.hdf5",
    #"data/push-test-small.hdf5",
    use_proprioception=True,
    use_vision=True,
    vision_interval=10
)
e2e_trainset_loader = torch.utils.data.DataLoader(e2e_trainset, batch_size=64, shuffle=True, num_workers=2)

Parsed data: 311 active, 73 inactive
Keeping: 73


In [53]:
def train_e2e(pf_model, log_interval=10):
    # Train for 1 epoch
    for batch_idx, batch in enumerate(e2e_trainset_loader):
        # Transfer to GPU and pull out batch data
        batch_gpu = torch_utils.to_device(batch, buddy._device)
        batch_particles, batch_states, batch_obs, batch_controls = batch_gpu

        # N = batch size, M = particle count
        N, timesteps, control_dim = batch_controls.shape
        N, timesteps, state_dim = batch_states.shape
        N, M, state_dim = batch_particles.shape
        assert batch_controls.shape == (N, timesteps, control_dim)

        # Give all particle equal weights
        particles = batch_particles
        log_weights = torch.ones((N, M), device=buddy._device) * (-np.log(M))

        for t in range(1, timesteps):
            prev_particles = particles
            prev_log_weights = log_weights

            state_estimates, new_particles, new_log_weights = pf_model.forward(
                prev_particles,
                prev_log_weights,
                misc_utils.DictIterator(batch_obs)[:,t - 1,:],
                batch_controls[:,t,:],
                resample=False
            )

            loss = dpf.gmm_loss(
                particles_states=new_particles,
                log_weights=new_log_weights,
                true_states=batch_states[:, t, :],
                gmm_variances=np.array([0.2, 0.05])
            )

            buddy.minimize(loss, optimizer_name="end_to_end")
            # Disable backprop through time
            particles = new_particles.detach()
            log_weights = new_log_weights.detach()

            if buddy._steps % log_interval == 0:
                buddy.log("e2e/Training loss:", loss)



In [54]:
pf_model.freeze_measurement_model = False
pf_model.freeze_dynamics_model = True
for i in range(100):
    print("Training epoch", i)
    train_e2e(pf_model)

Training epoch 0
Training epoch 1
Training epoch 2
Training epoch 3
Training epoch 4
Training epoch 5


KeyboardInterrupt: 

In [None]:
buddy.save_checkpoint()

In [None]:
def eval(pf_model, trajectories, start_time=0, max_timesteps=100000):
    # To make things easier, we're going to cut all our trajectories to the same length :)
    end_time = np.min([len(s) for s, _, _ in trajectories] + [start_time + max_timesteps])
    predicted_states = [[states[start_time]] for states, _, _ in trajectories]
    actual_states = [states[start_time:end_time] for states, _, _ in trajectories]

    state_dim = len(actual_states[0][0])
    N = len(trajectories)
    M = 1

    particles = np.zeros((N, M, state_dim))
    for i in range(N):
        particles[i, :] = predicted_states[i][0]
    particles = torch_utils.to_torch(particles, device=buddy._device)
    log_weights = torch.ones((N, M), device=buddy._device) * (-np.log(M))

    for t in range(start_time + 1, end_time):
        s = []
        o = {}
        c = []
        for i, traj in enumerate(trajectories):
            states, observations, controls = traj

            s.append(predicted_states[i][t - start_time - 1])
            o_t = misc_utils.DictIterator(observations)[t]
            misc_utils.DictIterator(o).append(o_t)
            c.append(controls[t])

        s = np.array(s)
        misc_utils.DictIterator(o).convert_to_numpy()
        c = np.array(c)
        (s, o, c) = torch_utils.to_torch((s, o, c), device=buddy._device)

        state_estimates, new_particles, new_log_weights = pf_model.forward(
            particles,
            log_weights,
            o,
            c,
            resample=True,
            noisy_dynamics=False
        )

        particles = new_particles
        log_weights = new_log_weights
        #print(state_estimates)
        
        for i in range(len(trajectories)):
            predicted_states[i].append(torch_utils.to_numpy(state_estimates[i]))

        misc_utils.progress_bar(t / (end_time - start_time))
    misc_utils.progress_bar(1.)

    predicted_states = np.array(predicted_states)
    actual_states = np.array(actual_states)
    return predicted_states, actual_states


eval_trajectories = file_utils.load_trajectories(
    "data/pull-test-small.hdf5",   
    use_proprioception=True,
    use_vision=True,
    vision_interval=1
)
pred, actual = eval(pf_model, eval_trajectories[0:5], max_timesteps=3000)

tensor([[1.5212e-05, 1.5212e-05, 1.5212e-05, 1.5212e-05, 1.5212e-05],
        [4.4675e-05, 4.4675e-05, 4.4675e-05, 4.4675e-05, 4.4675e-05]],
       device='cuda:0')
tensor([[ 1.7446e-05,  1.7446e-05,  1.7446e-05,  1.7446e-05,  1.7446e-05],
        [-9.2158e-03, -9.0409e-03, -9.1323e-03, -9.1202e-03, -9.0406e-03]],
       device='cuda:0')
tensor([[-0.0004, -0.0004, -0.0004, -0.0004, -0.0004],
        [-0.0073, -0.0083, -0.0135, -0.0132, -0.0081]], device='cuda:0')
tensor([[-0.0008, -0.0008, -0.0011, -0.0011, -0.0008],
        [-0.0119,  0.0032, -0.0011, -0.0008,  0.0036]], device='cuda:0')
tensor([[-0.0014, -0.0007, -0.0012, -0.0011, -0.0007],
        [-0.0207,  0.0150,  0.0143,  0.0143,  0.0149]], device='cuda:0')
tensor([[-2.4407e-03,  6.5383e-05, -4.5448e-04, -4.2226e-04,  8.4161e-05],
        [-2.6011e-02,  3.8108e-02,  3.5389e-02,  3.5919e-02,  3.7429e-02]],
       device='cuda:0')
tensor([[-0.0037,  0.0020,  0.0013,  0.0014,  0.0020],
        [-0.0080,  0.0528,  0.0516,  0.0515,  

tensor([[ 0.1316,  0.1592,  0.0766,  0.2432,  0.2427],
        [-0.0189,  0.0008, -0.0136, -0.0242, -0.0850]], device='cuda:0')
tensor([[ 0.1307,  0.1593,  0.0759,  0.2420,  0.2385],
        [-0.0110, -0.0122, -0.0003, -0.0157, -0.0872]], device='cuda:0')
tensor([[ 0.1301,  0.1587,  0.0759,  0.2412,  0.2341],
        [-0.0065, -0.0293,  0.0330, -0.0383, -0.0998]], device='cuda:0')
tensor([[ 0.1298,  0.1572,  0.0775,  0.2393,  0.2291],
        [-0.0098, -0.0489, -0.0035, -0.0626, -0.1133]], device='cuda:0')
tensor([[ 0.1293,  0.1548,  0.0773,  0.2361,  0.2234],
        [-0.0234, -0.0682, -0.0198, -0.0353, -0.1341]], device='cuda:0')
tensor([[ 0.1281,  0.1514,  0.0763,  0.2344,  0.2167],
        [-0.0553, -0.0854, -0.0610, -0.1242, -0.1497]], device='cuda:0')
tensor([[ 0.1254,  0.1471,  0.0733,  0.2282,  0.2093],
        [-0.0880, -0.1011, -0.0571, -0.0850, -0.1578]], device='cuda:0')
tensor([[ 0.1210,  0.1420,  0.0704,  0.2239,  0.2014],
        [-0.0842, -0.1151, -0.0452, -0.0535, -0.1

tensor([[-0.0382, -0.1247, -0.0357,  0.1113,  0.1620],
        [ 0.0099, -0.0877, -0.0757, -0.0561, -0.1509]], device='cuda:0')
tensor([[-0.0377, -0.1291, -0.0394,  0.1085,  0.1545],
        [ 0.0092, -0.0657, -0.0796, -0.0509, -0.1477]], device='cuda:0')
tensor([[-0.0373, -0.1324, -0.0434,  0.1060,  0.1471],
        [-0.0104, -0.0365, -0.0883, -0.0463, -0.1435]], device='cuda:0')
tensor([[-0.0378, -0.1342, -0.0478,  0.1037,  0.1399],
        [-0.0345, -0.0044, -0.0885, -0.0426, -0.1388]], device='cuda:0')
tensor([[-0.0395, -0.1344, -0.0523,  0.1015,  0.1330],
        [-0.0534,  0.0209, -0.0630, -0.0395, -0.1335]], device='cuda:0')
tensor([[-0.0422, -0.1334, -0.0554,  0.0996,  0.1263],
        [-0.0633,  0.0364, -0.0677, -0.0370, -0.1263]], device='cuda:0')
tensor([[-0.0454, -0.1315, -0.0588,  0.0977,  0.1200],
        [-0.0572,  0.0545, -0.0500, -0.0349, -0.1177]], device='cuda:0')
tensor([[-0.0482, -0.1288, -0.0613,  0.0960,  0.1141],
        [-0.0604,  0.0693, -0.0550, -0.0339, -0.1

In [None]:
def vis_eval(predicted_states, actual_states):
    timesteps = len(actual_states[0])

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
    
    plt.figure(figsize=(15,10))    
    for i, (pred, actual) in enumerate(zip(predicted_states, actual_states)):
        plt.plot(range(timesteps), pred[:,0], label="Predicted Position " + str(i), c=colors[i], alpha=0.3)
        plt.plot(range(timesteps), actual[:,0], label="Actual Position " + str(i), c=colors[i])
    plt.legend()
    plt.show()
    print(predicted_states.shape)
    print("Position MSE: ", np.mean((predicted_states[:,:,0] - actual_states[:,:,0])**2))

    plt.figure(figsize=(15,10))
    for i, (pred, actual) in enumerate(zip(predicted_states, actual_states)):
        plt.plot(range(timesteps), pred[:,1], label="Predicted Velocity " + str(i), c=colors[i], alpha=0.3)
        plt.plot(range(timesteps), actual[:,1], label="Actual Velocity " + str(i), c=colors[i])
    plt.legend()
    plt.show()
    print("Velocity MSE: ", np.mean((predicted_states[:,:,1] - actual_states[:,:,1])**2))

vis_eval(pred, actual)


In [26]:
### DEAD RECKONING TEST

# def eval(trajectories, max_timesteps=100000):
#     # To make things easier, we're going to cut all our trajectories to the same length :)
#     timesteps = np.min([len(s) for s, _, _ in trajectories] + [max_timesteps])
#     predicted_states = [[states[0]] for states, _, _ in trajectories]
#     actual_states = [states[:timesteps] for states, _, _ in trajectories]

#     for t in range(1, timesteps):
#         for i in range(len(trajectories)):
#             prev_state = predicted_states[i][t - 1]
#             new_vel = actual_states[i][t][1]
#             new_pos = prev_state[0] + new_vel / 20
            
#             predicted_states[i].append([new_pos, new_vel])
#         misc_utils.progress_bar(t / timesteps)
#     misc_utils.progress_bar(1.)

#     predicted_states = np.array(predicted_states)
#     actual_states = np.array(actual_states)
#     return predicted_states, actual_states


# eval_trajectories = file_utils.load_trajectories(
#     "data/pull-test-small.hdf5",   
#     use_proprioception=True,
#     use_vision=True,
#     vision_interval=1
# )
# pred, actual = eval(eval_trajectories[0:1], max_timesteps=200)
# vis_eval(pred, actual)
