In [2]:
import math
import random

import gym
import numpy as np
        
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal,Beta

from IPython.display import clear_output
from PIL import Image

# Load Expert Model

In [3]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, mean=0., std=0.1)
        nn.init.constant_(m.bias, 0.1)
        

class ActorCritic(nn.Module):
    def __init__(self, num_inputs, num_outputs, hidden_size, std=0.0):
        super(ActorCritic, self).__init__()
        
        self.critic = nn.Sequential(
            nn.Linear(num_inputs, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
        
        self.actor = nn.Sequential(
            nn.Linear(num_inputs, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_outputs),
        )
        self.log_std = nn.Parameter(torch.ones(1, num_outputs) * std)
        
        self.apply(init_weights)
        
    def forward(self, x):
        value = self.critic(x)
        mu    = self.actor(x)
        std   = self.log_std.exp().expand_as(mu)
        dist  = Normal(mu, std)
        return dist, value

In [4]:
env = gym.make('Pendulum-v0')
num_inputs  = env.observation_space.shape[0]
num_outputs = env.action_space.shape[0]
hidden_size = 256
device = 'cpu'
load_model = ActorCritic(2*num_inputs, num_outputs, hidden_size).to(device)
load_model = torch.load('model_hindsight_ppo', map_location = 'cpu')

# Save Environment RGB Frame

In [6]:
frames = []
gif_file = 'Pendulum_HPPOModel9.gif'

done = False
total_reward = 0

state = env.reset()
goal = np.asarray([0, 0, 0])
while not done:
    frames.append(Image.fromarray(env.render(mode='rgb_array')))
    state_goal = np.concatenate((state,goal),0)
    state_goal = torch.FloatTensor(state_goal).unsqueeze(0).to(device)
    dist, _ = load_model(state_goal)
    next_state, reward, done, _ = env.step(dist.sample().cpu().numpy()[0])
    state = next_state
    total_reward += reward
    
print ('total_reward: ', total_reward)
        
with open(gif_file, 'wb') as f:  # change the path if necessary
    im = Image.new('RGB', frames[0].size)
    im.save(f, save_all=True, append_images=frames)

total_reward:  -256.9926050612457


# Display GIF File

In [5]:
# from IPython.display import Image, display
# X = Image(url='Pendulum_RandomAgent.gif')
# display(X)