In [None]:
import torch
from torch import nn
import gymnasium as gym
import numpy as np

In [None]:
class PolicyModel(nn.Module):
    def __init__(self, obs_size, action_size):
        super().__init__()
        self.mean = nn.Sequential(
                        nn.Linear(obs_size, 512),
                        nn.ReLU(),
                        nn.Linear(512, 256),
                        nn.ReLU(),
                        nn.Linear(256, 64),
                        nn.ReLU(),
                        nn.Linear(64, action_size),
                        nn.Tanh())
        
        self.logstd_layer = nn.Parameter(torch.zeros(action_size))
    
    def forward(self, x):
        return self.mean(x), self.logstd_layer

In [None]:
policy_model = PolicyModel(obs_size=24, action_size = 4)
policy_model.load_state_dict(torch.load('actor_model_weightsv1.2.pth'))

In [None]:
env = gym.make("BipedalWalker-v3", render_mode = 'human')
epi_reward = 0
step = 0
obs = env.reset()[0]
while True:
    step+=1
    with torch.inference_mode():
        action, logstd = policy_model(torch.from_numpy(obs))
        action = action.numpy()
        next_state, reward, done, info, _ = env.step(action)
        epi_reward += reward
        if done:
            break
        else:
            obs = next_state
print(f'Episode reward: {epi_reward} | step count: {step}')
env.close()