In [1]:

# Notebook to check representation distances
import glob
import os
import hydra
import numpy as np
import torch
import torch.nn as nn
from torchvision.utils import save_image
from torchvision import transforms as T
import torch.nn.functional as F

# from agent.encoder import Encoder

from tactile_learning.models import *
from tactile_learning.utils import *
from tactile_learning.tactile_data import *


In [2]:
# Get a random episode
fn = '/home/irmak/Workspace/tactile-learning/buffer/20230507T204720_14_76.npz'
with open(fn, 'rb') as f:
    episode = np.load(f)
    episode = {k: episode[k] for k in episode.keys()}

In [3]:
print('pixels.shape: {}'.format(episode['pixels'].shape))
print('tactile.shape: {}'.format(episode['tactile'].shape))

pixels.shape: (77, 3, 224, 224)
tactile.shape: (77, 1024)


In [4]:
data_path = '/home/irmak/Workspace/Holo-Bot/extracted_data/bowl_picking/after_rss' 
expert_demo_num = 24
roots = sorted(glob.glob(f'{data_path}/demonstration_*'))
data = load_data(roots, demos_to_use=[expert_demo_num])

In [5]:
# Class to analyze data
class RepresentationAnalyzer:
    def __init__(
        self,
        data,
        data_path = '/home/irmak/Workspace/Holo-Bot/extracted_data/bowl_picking/after_rss',
        tactile_out_dir = '/home/irmak/Workspace/tactile-learning/tactile_learning/out/2023.01.28/12-32_tactile_byol_bs_512_tactile_play_data_alexnet_pretrained_duration_120',
        image_out_dir = '/home/irmak/Workspace/tactile-learning/tactile_learning/out/2023.05.06/10-50_image_byol_bs_32_epochs_500_lr_1e-05_bowl_picking_after_rss',
        device = 'cuda',
    ):
        # Set expert demo
        # roots = sorted(glob.glob(f'{data_path}/demonstration_*'))
        # self.data = load_data(roots, demos_to_use=[expert_demo_num])
        self.data = data
        self.data_path = data_path
        self.device = torch.device(device)

        image_cfg, self.image_encoder, self.image_transform = init_encoder_info(self.device, image_out_dir, 'image')
        self.inv_image_transform = get_inverse_image_norm() 
        self.image_normalize = T.Normalize(VISION_IMAGE_MEANS, VISION_IMAGE_STDS)

        tactile_cfg, self.tactile_encoder, _ = init_encoder_info(self.device, tactile_out_dir, 'tactile', model_type='byol')
        tactile_img = TactileImage(
            tactile_image_size = tactile_cfg.tactile_image_size, 
            shuffle_type = None
        )
        self.tactile_repr = TactileRepresentation( # This will be used when calculating the reward - not getting the observations
            encoder_out_dim = tactile_cfg.encoder.out_dim,
            tactile_encoder = self.tactile_encoder,
            tactile_image = tactile_img,
            representation_type = 'tdex'
        )

        self.rewards = 'sinkhorn_cosine'
        self.sinkhorn_rew_scale = 200

        self.reward_representations = ['image', 'tactile']
        self.policy_representations = ['image', 'tactile', 'features']

        self._set_expert_demo()

    def _load_dataset_image(self, demo_id, image_id):
        dset_img = load_dataset_image(self.data_path, demo_id, image_id, self.view_num)
        img = self.image_transform(dset_img)
        return torch.FloatTensor(img) 

    def _set_expert_demo(self):
        # We'll stack the tactile repr and the image observations
        self.expert_demo = dict(
            image_obs = [], 
            tactile_repr = []
        )
        for step_id in range(len(self.data['image']['indices'])): 
            demo_id, tactile_id = self.data['tactile']['indices'][step_id]

            tactile_value = self.data['tactile']['values'][demo_id][tactile_id]
            tactile_repr = self.tactile_repr.get(tactile_value, detach=False)

            _, image_id = self.data['image']['indices'][step_id]
            image = load_dataset_image(
                data_path = self.data_path, 
                demo_id = demo_id, 
                image_id = image_id,
                view_num = 1,
                transform = self.image_transform
            )

            # if step_id == 0:
            #     tactile_reprs = tactile_repr.unsqueeze(0)
            #     image_obs = image.unsqueeze(0)
            # else:
            #     image_obs = torch.concat([image_obs, image.unsqueeze(0)], dim=0)
            #
            #      tactile_reprs = torch.concat([tactile_reprs, tactile_repr.unsqueeze(0)], dim=0)
            self.expert_demo['image_obs'].append(image)
            self.expert_demo['tactile_repr'].append(tactile_repr)


        # self.expert_demo = dict(
        #     image_obs = image_obs, 
        #     tactile_repr = tactile_reprs
        # )
        for obs_type in self.expert_demo.keys():
            self.expert_demo[obs_type] = torch.stack(self.expert_demo[obs_type], 0)

    def _get_representation_distances(self, episode_obs, mock=False):
        curr_reprs, exp_reprs = [], []
        if 'image' in self.reward_representations: # We will not be using features for reward for sure
            if mock:
                image_reprs = self.image_encoder(episode_obs['image_obs'].to(self.device))
            else: 
                image_obs = self.image_normalize(episode_obs['image_obs']).to(self.device) # This will give all the image observations of one episode
                image_reprs = self.image_encoder(image_obs)
            expert_image_reprs = self.image_encoder(self.expert_demo['image_obs'].to(self.device))
            curr_reprs.append(image_reprs)
            exp_reprs.append(expert_image_reprs)
    
            del image_reprs
            del expert_image_reprs
            torch.cuda.empty_cache()

        if 'tactile' in self.reward_representations:
            tactile_reprs = episode_obs['tactile_repr'].to(self.device) # This will give all the representations of one episode
            expert_tactile_reprs = self.expert_demo['tactile_repr'].to(self.device)
            curr_reprs.append(tactile_reprs)
            exp_reprs.append(expert_tactile_reprs)

            del tactile_reprs
            del expert_tactile_reprs
            torch.cuda.empty_cache()

        # Concatenate everything now
        obs = torch.concat(curr_reprs, dim=-1).detach()
        exp = torch.concat(exp_reprs, dim=-1).detach()

        # Get the rewards
        if self.rewards == 'sinkhorn_cosine':
            cost_matrix = cosine_distance(
                obs, exp)  # Get cost matrix for samples using critic network.
            print('cost_matrix.shape: {}'.format(cost_matrix.shape))
            transport_plan = optimal_transport_plan(
                obs, exp, cost_matrix, method='sinkhorn',
                niter=100).float()  # Getting optimal coupling
            print('ot plan: {}'.format(transport_plan.shape))
            ot_rewards = -self.sinkhorn_rew_scale * torch.diag(
                torch.mm(transport_plan,
                            cost_matrix.T)).detach().cpu().numpy()

        episode_obs = {episode_obs[k].detach().cpu() for k in episode_obs.keys()}
        print('ot_rewards: {}'.format(ot_rewards))

        del obs
        del exp
        torch.cuda.empty_cache()

        return ot_rewards

    def ot_rewarder(self, episode_obs, mock=False): # TODO: Delete the mock option
        
        # NOTE: In this code we're not using target encoder since the encoders are already frozen
        curr_reprs, exp_reprs = [], []
        if 'image' in self.reward_representations: # We will not be using features for reward for sure
            if mock:
                image_reprs = self.image_encoder(episode_obs['image_obs'].to(self.device))
            else:
                image_obs = self.image_normalize(episode_obs['image_obs']).to(self.device) # This will give all the image observations of one episode
                image_reprs = self.image_encoder(image_obs)
            expert_image_reprs = self.image_encoder(self.expert_demo['image_obs'].to(self.device))
            curr_reprs.append(image_reprs)
            exp_reprs.append(expert_image_reprs)

            del image_reprs
            del expert_image_reprs
            torch.cuda.empty_cache()
    
        if 'tactile' in self.reward_representations:
            tactile_reprs = episode_obs['tactile_repr'].to(self.device) # This will give all the representations of one episode
            expert_tactile_reprs = self.expert_demo['tactile_repr'].to(self.device)
            curr_reprs.append(tactile_reprs)
            exp_reprs.append(expert_tactile_reprs)

            del tactile_reprs
            del expert_tactile_reprs
            torch.cuda.empty_cache()

        # Concatenate everything now
        obs = torch.concat(curr_reprs, dim=-1).detach()
        exp = torch.concat(exp_reprs, dim=-1).detach()

        if self.rewards == 'sinkhorn_cosine':
            cost_matrix = cosine_distance(
                obs, exp)  # Get cost matrix for samples using critic network.
            transport_plan = optimal_transport_plan(
                obs, exp, cost_matrix, method='sinkhorn',
                niter=100).float()  # Getting optimal coupling
            ot_rewards = -self.sinkhorn_rew_scale * torch.diag(
                torch.mm(transport_plan,
                            cost_matrix.T)).detach().cpu().numpy()
            
        elif self.rewards == 'sinkhorn_euclidean':
            cost_matrix = euclidean_distance(
                obs, exp)  # Get cost matrix for samples using critic network.
            transport_plan = optimal_transport_plan(
                obs, exp, cost_matrix, method='sinkhorn',
                niter=100).float()  # Getting optimal coupling
            ot_rewards = -self.sinkhorn_rew_scale * torch.diag(
                torch.mm(transport_plan,
                            cost_matrix.T)).detach().cpu().numpy()
            
        elif self.rewards == 'cosine':
            exp = torch.cat((exp, exp[-1].unsqueeze(0)))
            ot_rewards = -(1. - F.cosine_similarity(obs, exp))
            ot_rewards *= self.sinkhorn_rew_scale
            ot_rewards = ot_rewards.detach().cpu().numpy()
            
        elif self.rewards == 'euclidean':
            exp = torch.cat((exp, exp[-1].unsqueeze(0)))
            ot_rewards = -(obs - exp).norm(dim=1)
            ot_rewards *= self.sinkhorn_rew_scale
            ot_rewards = ot_rewards.detach().cpu().numpy()
            
        else:
            raise NotImplementedError()
        
        del obs
        del exp 
        torch.cuda.empty_cache()

        return ot_rewards

In [6]:

repr_analyzer = RepresentationAnalyzer(data=data)





mod_name: collections, name: OrderedDict
mod_name: torch._utils, name: _rebuild_parameter
mod_name: torch._utils, name: _rebuild_tensor_v2


Using cache found in /home/irmak/.cache/torch/hub/pytorch_vision_v0.10.0


mod_name: collections, name: OrderedDict
mod_name: torch._utils, name: _rebuild_parameter
mod_name: torch._utils, name: _rebuild_tensor_v2


In [7]:
episode_obs = dict(
    image_obs = torch.FloatTensor(episode['pixels']),
    tactile_repr = torch.FloatTensor(episode['tactile'])
)
ot_rewards = repr_analyzer._get_representation_distances(episode_obs, mock=False)
torch.cuda.empty_cache()

cost_matrix.shape: torch.Size([77, 69])
ot plan: torch.Size([77, 69])
ot_rewards: [-1.5194867 -1.5211235 -1.5284127 -1.5192441 -1.5157703 -1.5184269
 -1.5145341 -1.5080873 -1.5011551 -1.5019797 -1.4989655 -1.5020616
 -1.5034844 -1.5049654 -1.5041409 -1.5033431 -1.5006042 -1.4998842
 -1.4976026 -1.4977747 -1.507434  -1.5014273 -1.5012411 -1.5047222
 -1.501378  -1.5022033 -1.5066106 -1.5053742 -1.5074621 -1.508056
 -1.5081058 -1.505208  -1.5077295 -1.5188035 -1.510505  -1.5181931
 -1.508751  -1.5083652 -1.5090806 -1.5085036 -1.5107883 -1.5097964
 -1.5140144 -1.5099934 -1.5187448 -1.5117332 -1.5104812 -1.5064644
 -1.5051708 -1.50601   -1.5093814 -1.5079747 -1.5083027 -1.5099336
 -1.5108486 -1.5089498 -1.5089706 -1.5071304 -1.5086879 -1.506789
 -1.5062562 -1.5037047 -1.4930328 -1.490211  -1.4953387 -1.494063
 -1.4947919 -1.498822  -1.4954224 -1.4972954 -1.496622  -1.4972457
 -1.4970027 -1.4968832 -1.4960258 -1.496317  -1.4962063]


In [8]:
ot_reward_sum = np.sum(ot_rewards)
repr_analyzer.sinkhorn_rew_scale = repr_analyzer.sinkhorn_rew_scale * 10 / float(
    np.abs(ot_reward_sum))
new_rewards = repr_analyzer._get_representation_distances(
    episode_obs = episode_obs
)
torch.cuda.empty_cache()
new_rewards_sum = np.sum(new_rewards)
print(f'new_rewards_sum: {new_rewards_sum}')

cost_matrix.shape: torch.Size([77, 69])
ot plan: torch.Size([77, 69])
ot_rewards: [-0.13105169 -0.13119285 -0.13182153 -0.13103075 -0.13073117 -0.13096027
 -0.13062453 -0.13006851 -0.12947063 -0.12954175 -0.12928179 -0.12954882
 -0.12967153 -0.12979926 -0.12972815 -0.12965934 -0.12942313 -0.12936103
 -0.12916423 -0.12917908 -0.13001218 -0.12949412 -0.12947805 -0.1297783
 -0.12948987 -0.12956105 -0.12994115 -0.12983452 -0.1300146  -0.13006581
 -0.1300701  -0.12982018 -0.13003765 -0.13099276 -0.13027702 -0.13094011
 -0.13012576 -0.13009249 -0.13015419 -0.13010442 -0.13030148 -0.13021591
 -0.13057971 -0.13023292 -0.1309877  -0.13038296 -0.130275   -0.12992854
 -0.12981698 -0.12988935 -0.13018014 -0.13005881 -0.13008709 -0.13022776
 -0.13030668 -0.1301429  -0.1301447  -0.12998599 -0.1301203  -0.12995654
 -0.12991059 -0.12969053 -0.12877011 -0.12852673 -0.12896898 -0.12885897
 -0.12892182 -0.1292694  -0.12897621 -0.12913774 -0.12907967 -0.12913346
 -0.1291125  -0.12910219 -0.12902825 -0.129

In [9]:
episode_obs = dict()

In [10]:
# Get the rewards from an actual demo
mock_data = load_data(roots, demos_to_use=[29])
# We'll stack the tactile repr and the image observations
mock_episode_obs = dict(
    image_obs = [],
    tactile_repr = []
)
for step_id in range(len(mock_data['image']['indices'])): 
    demo_id, tactile_id = mock_data['tactile']['indices'][step_id]

    tactile_value = mock_data['tactile']['values'][demo_id][tactile_id]
    tactile_repr = repr_analyzer.tactile_repr.get(tactile_value, detach=False)

    _, image_id = mock_data['image']['indices'][step_id]
    image = load_dataset_image(
        data_path = repr_analyzer.data_path, 
        demo_id = demo_id, 
        image_id = image_id,
        view_num = 1,
        transform = repr_analyzer.image_transform
    )

    # if step_id == 0:
    #     tactile_reprs = tactile_repr.unsqueeze(0)
    #     image_obs = image.unsqueeze(0)
    # else:
    #     image_obs = torch.concat([image_obs, image.unsqueeze(0)], dim=0)
    #     tactile_reprs = torch.concat([tactile_reprs, tactile_repr.unsqueeze(0)], dim=0)
    mock_episode_obs['image_obs'].append(image)
    mock_episode_obs['tactile_repr'].append(tactile_repr)

for obs_type in mock_episode_obs.keys():
    mock_episode_obs[obs_type] = torch.stack(mock_episode_obs[obs_type], 0)


In [11]:
mock_episode_obs['image_obs'].shape

torch.Size([80, 3, 480, 480])

In [13]:
repr_analyzer._get_representation_distances(episode_obs=mock_episode_obs, mock=True)
torch.cuda.empty_cache()

RuntimeError: CUDA out of memory. Tried to allocate 62.00 MiB (GPU 0; 15.74 GiB total capacity; 14.06 GiB already allocated; 9.06 MiB free; 14.23 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF