### PLI + SBFD Test with Cosine Similarity
Script to test both the inverse model and the dynamics model.
These two models could be trained separately and then finetuned.
Steps are as follows:
1. Load both of the models
2. At each step sample an action using gaussian distribution with mean and std of the action space
3. Predict the next state using SBFD model
4. Calculate the best next state using OT rewards
5. Apply the action to go to that best next state (use the predicted action not the sampled action)

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
import torch
import torch.utils.data as data 

from copy import deepcopy
from cv2 import aruco
from omegaconf import OmegaConf
from torch.nn.parallel import DistributedDataParallel as DDP
from tqdm import tqdm

# Custom imports
from contrastive_learning.datasets.state_dataset import StateDataset
from contrastive_learning.tests.test_model import load_sbfd, load_lin_model
from contrastive_learning.tests.plotting import plot_corners, plot_rvec_tvec
from contrastive_learning.datasets.dataloaders import get_dataloaders

### Load both of the models

In [2]:
# Start the multiprocessing to load the saved models properly
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "29505"

torch.distributed.init_process_group(backend='gloo', rank=0, world_size=1)
torch.cuda.set_device(0)

In [3]:
# Set the device and out_dir
device = torch.device('cuda:0')
fps = 15

pli_out_dir = '/home/irmak/Workspace/DAWGE/contrastive_learning/out/2022.07.29/16-37_pli_ref_dog_lf_mse_fi_1_pt_corners_bs_64_hd_64_lr_0.001_zd_8'
sbfd_out_dir = '/home/irmak/Workspace/DAWGE/contrastive_learning/out/2022.07.27/19-45_sbfd_ue_False_lf_mse_fi_1_pt_corners_bs_64_hd_64_lr_0.001_zd_8'
pli_cfg = OmegaConf.load(os.path.join(pli_out_dir, '.hydra/config.yaml'))
sbfd_cfg = OmegaConf.load(os.path.join(sbfd_out_dir, '.hydra/config.yaml'))

print('pli_cfg: {}\nsbfd_cfg: {}'.format(pli_cfg, sbfd_cfg))

lin_model_path = os.path.join(pli_out_dir, 'models/lin_model.pt')
pos_encoder_path = os.path.join(sbfd_out_dir, 'models/pos_encoder.pt')
trans_path = os.path.join(sbfd_out_dir, 'models/trans.pt')

# Load the position encoder and forward linear model
if sbfd_cfg.agent.use_encoder == False:
    sbfd_cfg.z_dim = sbfd_cfg.pos_dim*2
pos_encoder, trans = load_sbfd(sbfd_cfg, device, pos_encoder_path, trans_path)

# Load the encoder
lin_model = load_lin_model(pli_cfg, device, lin_model_path)

pli_cfg: {'agent': {'_target_': 'contrastive_learning.models.agents.pli.PLI', 'loss_fn': 'mse', 'use_encoder': False, 'model': '???', 'optimizer': '???'}, 'optimizer': {'_target_': 'torch.optim.Adam', 'params': '???', 'lr': '???', 'weight_decay': '???'}, 'model': {'_target_': 'contrastive_learning.models.custom_models.LinearInverse', 'input_dim': '???', 'action_dim': '???', 'hidden_dim': '???'}, 'pos_encoder': {'_target_': 'contrastive_learning.models.custom_models.PosToEmbedding', 'input_dim': '???', 'hidden_dim': '???', 'out_dim': '???'}, 'seed': 42, 'device': 'cuda', 'agent_type': 'pli', 'dataset_type': 'state', 'pos_type': 'corners', 'pos_ref': 'dog', 'train_epochs': 1000, 'save_frequency': 10, 'train_dset_split': 0.8, 'batch_size': 64, 'lr': 0.001, 'weight_decay': 1e-05, 'z_dim': 8, 'pos_dim': 8, 'hidden_dim': 64, 'action_dim': 2, 'distributed': True, 'num_workers': 4, 'world_size': 1, 'num_gpus': 4, 'fps': 15, 'frame_interval': 1, 'video_type': 'color', 'experiment': '${agent_typ

In [4]:
print(lin_model)
print(pos_encoder)
print(trans)

DistributedDataParallel(
  (module): LinearInverse(
    (model): Sequential(
      (0): Linear(in_features=32, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=16, bias=True)
      (3): ReLU()
      (4): Linear(in_features=16, out_features=2, bias=True)
    )
  )
)
DistributedDataParallel(
  (module): PosToEmbedding(
    (model): Sequential(
      (0): Linear(in_features=16, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=16, bias=True)
    )
  )
)
DistributedDataParallel(
  (module): Transition(
    (model): Sequential(
      (0): Linear(in_features=20, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): ReLU()
      (4): Linear(in_features=64, out_features=16, bias=True)
    )
  )
)


Get an example trajectory and plot the trajectory in a matplotlib plot with
10 frames difference in each step

In [5]:
test_traj_path = '/home/irmak/Workspace/DAWGE/src/dawge_planner/data/test_demos/box_marker_test_1'
# Create a dataset with this dir only - sbfd_cfg and pli_cfg differences shouldn't matter in StateDataset

global_cfg = deepcopy(pli_cfg)
global_cfg.pos_ref = 'global'
demo_dataset = StateDataset(global_cfg, single_dir=True, single_dir_root=test_traj_path)
demo_loader = data.DataLoader(demo_dataset, batch_size=1, shuffle=False, num_workers=4)
demo_loader_iter = iter(demo_loader)
_, _, all_dset = get_dataloaders(global_cfg)

DATASET POS_REF: global
len(dataset): 64
self.action_min: [-0.15000001 -0.30000001], self.action_max: [0.15000001 0.30000001]
DATASET POS_REF: global
len(dataset): 4910
self.action_min: [-0.15000001 -0.30000001], self.action_max: [0.15000001 0.30000001]


### Get Current Position
Will be actual robot position in the end

In [6]:
# For now this method will be the next position in the trajectory with a little stochasticity
# in actual experiments this will get the position of the robot
# TODO: Add a frame interval to this so that it will give the predicted next for some frame interval and the actual next pos once a while
def get_curr_state(cfg, data_loader_iter):
    batch = next(data_loader_iter)
    curr_pos, next_pos, action = [b for b in batch]
    action = all_dset.denormalize_action(action[0].detach().numpy())
    if cfg.pos_type == 'corners':
        curr_pos, next_pos = all_dset.denormalize_corner(curr_pos[0].detach().numpy()), all_dset.denormalize_corner(next_pos[0].detach().numpy())
    elif cfg.pos_type == 'rvec_tvec':
        curr_pos, next_pos = all_dset.denormalize_pos_rvec_tvec(curr_pos[0].detach().numpy()), all_dset.denormalize_pos_rvec_tvec(next_pos[0].detach().numpy())
        
    return curr_pos, next_pos, action
        

### Sample 100 Actions

In [7]:
# TODO: Check if these sampled actions are same with inverse model's actions
# If that is the case it means both of the models work nicely
# Plot some graphs to show the similarity of these actions
def sample_actions(num_samples=100):
    action_mean, action_stds, _, _ = all_dset.calculate_corners_means_stds()
    # print('action_mean: {}, action_stds: {}'.format(action_mean, action_stds))
    
    sampled_actions = []
    for _ in range(num_samples):
        sampled_actions.append(np.random.normal(action_mean, action_stds))

    return np.array(sampled_actions)

# sampled_actions = sample_actions()
# print('sampled_actions: {}'.format(sampled_actions))

### Predict Next Position with sample actions

In [8]:
# Both of these values are oen dimensional - curr_pos.shape: (16/12), action.shape: (2)
# And they are denormalized so you should normalize them, pass it through the model and denormalize them again
# TODO: Draw the predicted next positions at each step as well
def predict_next_pos(cfg, curr_pos, action):
    # Normalize position and action
    if cfg.pos_type == 'corners':
        curr_pos = torch.flatten(torch.FloatTensor(all_dset.normalize_corner(curr_pos))).to(device)
    elif cfg.pos_type == 'rvec_tvec':
        curr_pos = torch.FloatTensor(all_dset.normalize_rvec_tvec(curr_pos)).to(device)
        
    action = torch.FloatTensor(all_dset.normalize_action(action)).to(device)
    
    # Add another dimension to the first axis for both curr_pos and action
    curr_pos = torch.unsqueeze(curr_pos, 0)
    action = torch.unsqueeze(action, 0)

    # Pass them through the trans and add them to the curr_pos
    # print('curr_pos.shape: {}, action.shape: {}'.format(curr_pos.shape, action.shape))
    pos_delta = trans(curr_pos, action)
    next_pos_predict = curr_pos + pos_delta
    # print('next_pos_predict.shape: {}'.format(next_pos_predict.shape))
    
    # Denormlize the predicted next position
    if cfg.pos_type == 'corners':
        next_pos_predict = all_dset.denormalize_corner(next_pos_predict[0].cpu().detach().numpy())
    elif cfg.pos_type == 'rvec_tvec':
        next_pos_predict = all_dset.denormalize_pos_rvec_tvec(next_pos_predict[0].cpu().detach().numpy())

    return next_pos_predict

### Create Trajectories and Apply the action that will give the most similar trajectory as the demo

In [9]:
# Both of their shapes are: (N, 16/12) - N is the number of steps in trajectory
# traj_a and traj_b should be torches
# NOTE: This sum can be between -N and +N
def calc_traj_sim(traj_a, traj_b):
    cos = torch.nn.CosineSimilarity(dim=1, eps=1e-8)
    return cos(torch.FloatTensor(traj_a), torch.FloatTensor(traj_b)).sum()

In [10]:
def trans_pos_to_model(cfg, curr_pos): # Returns flattened positions
    if cfg.pos_type == 'corners':
        curr_pos = torch.FloatTensor(all_dset.normalize_corner(curr_pos))
        return torch.flatten(curr_pos)
    elif cfg.pos_type == 'rvec_tvec':
        curr_pos = torch.FloatTensor(all_dset.normalize_rvec_tvec(curr_pos))
        return curr_pos

In [11]:
def get_reference(cfg, curr_pos): # Gets flattened positions, shape: (16)
    ref_tensor = torch.zeros((curr_pos.shape))
    half_idx = int(curr_pos.shape[0] / 2) # In order not to have a control for pos_type
    if cfg.pos_ref == 'dog':
        ref_tensor = curr_pos[half_idx:]
        ref_tensor = ref_tensor.repeat(1,2)
    elif cfg.pos_ref == 'box':
        ref_tensor = curr_pos[:half_idx]
        ref_tensor = ref_tensor.repeat(1,2)
    return ref_tensor

In [12]:

N = len(demo_dataset)
real_traj = np.zeros((N, pli_cfg.pos_dim,2))
pred_traj = np.zeros((N, pli_cfg.pos_dim,2))
real_actions = np.zeros((N, pli_cfg.action_dim))
applied_actions = np.zeros((N, pli_cfg.action_dim))

demo_loader_iter = iter(demo_loader)
frame_interval = N # We will choose the predicted pos as the next pos for frame_interval frames and then we will get the actual position once

for i,batch in enumerate(demo_loader): # TODO: change this to N
    print('i: {}'.format(i))
    curr_pos, next_pos, action = get_curr_state(pli_cfg, demo_loader_iter)
    real_actions[i,:] = action
    real_traj[i,:] = curr_pos
    
    if i % frame_interval != 0:
        # Sample num_samples actions NOTE: Uncomment below part if you'd like to sample actions rather than using the whole trajectory
        num_samples=100
        sampled_actions = sample_actions(num_samples)
        
        # Predict the next pos for each sampled action and create N different trajectories
        min_cos_sim = -N
        for sampled_act in sampled_actions:
            # Calculate different trajectories
            curr_pos_pred = predict_next_pos(pli_cfg, pred_traj[i-1], sampled_act)
            pred_traj[i,:] = curr_pos_pred
            
            # Calculate the trajectory similarity between predicted and actual trajectory
            cos_sim = calc_traj_sim(real_traj[:i+1,:].reshape((i+1,-1)), pred_traj[:i+1,:].reshape((i+1,-1)))
            # Get the best prediction as the next step
            if cos_sim > min_cos_sim:
                min_cos_sim = cos_sim
                curr_pos = curr_pos_pred # Best curr_pos is appended here
                applied_actions[i,:] = sampled_act

#         # Predict the action applied
#         curr_pos, next_pos = trans_pos_to_model(pli_cfg, curr_pos).to(device), trans_pos_to_model(pli_cfg, next_pos).to(device)
        
#         # Get the referenced state according to the given dataset config
#         ref_tensor = get_reference(pli_cfg, curr_pos)
#         pred_action = lin_model(curr_pos-ref_tensor, next_pos-ref_tensor) # This ref_tensor should only be used in linear model
#         pred_action = all_dset.denormalize_action(pred_action[0].cpu().detach().numpy())
#         applied_actions[i,:] = pred_action
        
#         # Predict the next state with the action applied
#         curr_pos = predict_next_pos(pli_cfg, pred_traj[i-1], pred_action)
                        
    pred_traj[i, :] = curr_pos
    
        
    


i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9
i: 10
i: 11
i: 12
i: 13
i: 14
i: 15
i: 16
i: 17
i: 18
i: 19
i: 20
i: 21
i: 22
i: 23
i: 24
i: 25
i: 26
i: 27
i: 28
i: 29
i: 30
i: 31
i: 32
i: 33
i: 34
i: 35
i: 36
i: 37
i: 38
i: 39
i: 40
i: 41
i: 42
i: 43
i: 44
i: 45
i: 46
i: 47
i: 48
i: 49
i: 50
i: 51
i: 52
i: 53
i: 54
i: 55
i: 56
i: 57
i: 58
i: 59
i: 60
i: 61
i: 62
i: 63


In [13]:
nrows = 4
ncols = math.ceil(N / nrows)
print(N, ncols)
fig, axs = plt.subplots(figsize=(ncols*10,nrows*10), nrows=nrows, ncols=ncols) # Draw the predicted action
print(axs.shape)
fig.suptitle("Frame-by-frame Predictions")

for i in range(N):
    axs_row = int(i / ncols)
    axs_col = int(i % ncols)

    real_action = real_actions[i]
    pred_action = applied_actions[i]
    
    curr_pos = real_traj[i]
    pred_pos = pred_traj[i]
    
    axs[axs_row,axs_col].set_title("Frame: {}".format(i))
    if pli_cfg.pos_type == 'corners':
        # Draw the current pos
        _, frame_axis = plot_corners(axs[axs_row,axs_col], curr_pos, plot_action=True, actions=[real_action, pred_action], color_scheme=1)
        # Draw the predicted pos
        plot_corners(axs[axs_row,axs_col], pred_pos, plot_action=False, use_frame_axis=True, frame_axis=frame_axis, color_scheme=2)
    elif pli_cfg.pos_type == 'rvec_tvec':
        # Draw the current pos
        _, frame_axis = plot_rvec_tvec(axs[axs_row,axs_col], curr_pos, plot_action=True, actions=[real_action], color_scheme=1)
        # Draw the predicted pos
        plot_rvec_tvec(axs[axs_row,axs_col], pred_pos, plot_action=True, actions=[pred_action], use_frame_axis=True, frame_axis=frame_axis, color_scheme=2)

pli_exp_name = '{}_{}'.format(pli_out_dir.split('/')[-2], pli_out_dir.split('/')[-1].split('_')[0])
sbfd_exp_name = '{}_{}'.format(sbfd_out_dir.split('/')[-2], sbfd_out_dir.split('/')[-1].split('_')[0])
plt_file_name = f'/home/irmak/Workspace/DAWGE/contrastive_learning/tests/plots/pli_{pli_exp_name}_sbfd_{sbfd_exp_name}_fi_{frame_interval}_test.jpg'
plt.savefig(plt_file_name)

print('Plot saved to: {}'.format(plt_file_name))
    

64 16
(4, 16)
Plot saved to: /home/irmak/Workspace/DAWGE/contrastive_learning/tests/plots/pli_2022.07.29_16-37_sbfd_2022.07.27_19-45_fi_64_test.jpg


In [20]:
# print('Actual Traj \t\t\t Pred Traj')
for i in range(N):
    print('Actual Traj\n{} \nPred Traj\n{}'.format(real_traj[i], pred_traj[i]))

Actual Traj
[[ 56.61068836 343.37161228]
 [ 45.60305272 309.04924139]
 [119.51145422 294.84689131]
 [133.66412401 327.98571527]
 [163.54197836 640.43763012]
 [251.60304505 630.96938252]
 [253.17557824 688.96235108]
 [150.96183255 700.79764992]] 
Pred Traj
[[ 56.61068836 343.37161228]
 [ 45.60305272 309.04924139]
 [119.51145422 294.84689131]
 [133.66412401 327.98571527]
 [163.54197836 640.43763012]
 [251.60304505 630.96938252]
 [253.17557824 688.96235108]
 [150.96183255 700.79764992]]
Actual Traj
[[ 56.61068836 343.37161228]
 [ 45.60305272 309.04924139]
 [119.51145422 294.84689131]
 [133.66412401 327.98571527]
 [100.6412217  658.19055706]
 [194.9923659  634.51995939]
 [207.57252091 690.14587671]
 [110.07633796 715.        ]] 
Pred Traj
[[ 60.55025871 341.32791996]
 [ 49.44784029 306.89310327]
 [123.48733994 292.67359972]
 [137.81402144 325.9460929 ]
 [164.02375209 618.1568259 ]
 [251.66522366 608.60223562]
 [253.80963063 665.38576722]
 [151.89369214 677.19997048]]
Actual Traj
[[ 56.6106