In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import torch
import hydra
from omegaconf import OmegaConf
from torchrl.data import CompositeSpec, UnboundedContinuousTensorSpec, BoundedTensorSpec, DiscreteTensorSpec


def create_observation_spec(device):
    return CompositeSpec(
        agents=CompositeSpec(
            observation=UnboundedContinuousTensorSpec(shape=torch.Size([2, 1, 30]), device=device, dtype=torch.float32),
            intrinsics=UnboundedContinuousTensorSpec(shape=torch.Size([2, 1, 24]), device=device, dtype=torch.float32),
            shape=torch.Size([2]),
            device=device
        ),
        is_init=DiscreteTensorSpec(shape=torch.Size([2, 1]), n=2, device=device, dtype=torch.bool),
        shape=torch.Size([2]),
        device=device
    )

def create_action_spec(device):
    return UnboundedContinuousTensorSpec(
        shape=torch.Size([2, 1, 4]),
        device=device,
        dtype=torch.float32
    )

def create_reward_spec(device):
    return UnboundedContinuousTensorSpec(
        shape=torch.Size([2, 1, 1]),
        device=device,
        dtype=torch.float32
    )

  return torch._C._cuda_getDeviceCount() > 0


In [45]:
# load .pt model

from omegaconf import OmegaConf
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os
import sys
from tensordict.tensordict import TensorDict, TensorDictBase
from torchrl.data import (
    BinaryDiscreteTensorSpec,
    CompositeSpec,
    UnboundedContinuousTensorSpec,
    BoundedTensorSpec,
)

from ppo.ppo import PPOPolicy

class inference():
    def __init__(self, model_path):
        self.model_path = model_path
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model_dict = torch.load(self.model_path, map_location=self.device)
        #self.policy = self.create_model()
        print(self.model_dict.keys())

        self.cfg = self.model_dict['algorithm_config']
        self.cfg = OmegaConf.create(self.cfg)
        

        #print(self.cfg)
        # conver to omegaconf


        self.observation_spec =  self.model_dict['observation_spec']
        self.observation_spec._device = self.device

        #print(self.observation_spec)

        self.action_spec = self.model_dict['action_spec']
        self.action_spec.device = self.device

        self.reward_spec = self.model_dict['reward_spec']
        self.reward_spec.device = self.device

        self.state_dict = self.model_dict['model_state_dict']
        #print(self.state_dict.keys())
        self.policy = self.create_model()

    def create_model(self):
        return PPOPolicy(
            cfg=self.cfg,
            observation_spec=self.observation_spec,
            action_spec=self.action_spec,
            reward_spec=self.reward_spec,
            device=self.device,
            state_dict = self.state_dict
        )


model_path = "model/full_checkpoint_full_hover_humming.pt"

inf = inference(model_path)

#print(inf.policy)

# evaluate the model create a random observation
obs = TensorDict({'agents': 
                  {'observation': torch.randn(1, 1, 30).to(inf.device), 
                   'intrinsics': torch.randn(1, 1, 30).to(inf.device)}, 
                   'is_init': torch.tensor([[1], [0]]).to(inf.device)})


prediction = inf.policy(obs)

print(prediction['agents']['action'])


dict_keys(['model_state_dict', 'algorithm', 'algorithm_config', 'frames', 'observation_spec', 'action_spec', 'reward_spec'])
tensor([[[-0.5389, -0.3602, -0.3442,  0.0682]]], device='cuda:0',
       grad_fn=<ViewBackward0>)


In [11]:
print(prediction.keys())    

_StringKeys({'agents': TensorDict(
    fields={
        action: Tensor(shape=torch.Size([1, 1, 4]), device=cuda:0, dtype=torch.float32, is_shared=True),
        intrinsics: Tensor(shape=torch.Size([1, 1, 30]), device=cuda:0, dtype=torch.float32, is_shared=True),
        observation: Tensor(shape=torch.Size([1, 1, 30]), device=cuda:0, dtype=torch.float32, is_shared=True)},
    batch_size=torch.Size([]),
    device=None,
    is_shared=False), 'is_init': tensor([[1],
        [0]], device='cuda:0'), 'sample_log_prob': tensor([[8.5385]], device='cuda:0', grad_fn=<SumBackward1>), 'state_value': tensor([[[-1.6326]]], device='cuda:0', grad_fn=<ViewBackward0>)})


In [8]:
print(inf.policy)

PPOPolicy(
  (critic_loss_fn): HuberLoss()
  (gae): GAE()
  (actor): ProbabilisticActor(
      module=ModuleList(
        (0): TensorDictModule(
            module=Sequential(
              (0): Sequential(
                (0): Linear(in_features=30, out_features=256, bias=True)
                (1): LeakyReLU(negative_slope=0.01)
                (2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
                (3): Linear(in_features=256, out_features=256, bias=True)
                (4): LeakyReLU(negative_slope=0.01)
                (5): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
                (6): Linear(in_features=256, out_features=256, bias=True)
                (7): LeakyReLU(negative_slope=0.01)
                (8): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
              )
              (1): Actor(
                (actor_mean): Linear(in_features=256, out_features=4, bias=True)
              )
            ),
            device=cuda:0,
          