# [07] Play Deep-Q Learning in Pong

### Imports & Constants

In [3]:
import time, os, collections
import numpy as np

import gym
import torch

from lib import wrappers, dqn_model


DEFAULT_ENV_NAME = "PongNoFrameskip-v4"
FPS = 25 # Frames-per-second: approximate speed of shown frames 

<br> 

### Main

In [5]:
# Directories 
model_dir = 'models/'
rec_dir = 'recordings/'
model_file = os.path.join(model_dir, "PongNoFrameskip-v4_19.dat")

# Environment, Monitor 
env = wrappers.make_env(DEFAULT_ENV_NAME)
env = gym.wrappers.Monitor(env, rec_dir, force=True)

# Create Network & load weights 
# `map_location` maps the loaded tensor from GPU to CPU 
net = dqn_model.DQN(env.observation_space.shape, env.action_space.n)
state = torch.load(model_file, map_location=lambda stg, _: stg)
net.load_state_dict(state, model_dir)

state = env.reset()
total_reward = 0.0
c = collections.Counter() # Action counter 

while True:
    
    start_ts = time.time()
    env.render()
    
    # Pass observation to agent and select action wiht maximum value 
    state_v = torch.tensor(np.array([state], copy=False))
    q_vals = net(state_v).data.numpy()[0]
    action = np.argmax(q_vals)
    c[action] += 1
    
    # Take step & accumulate total reward 
    state, reward, done, _ = env.step(action)
    total_reward += reward
    
    if done:
        break
    
    # ? 
    delta = 1 / FPS - (time.time() - start_ts)
    if delta > 0:
        time.sleep(delta)
            
print("Total reward: %.2f" % total_reward)
print("Action counts:", c)

env.close()

Total reward: 21.00
Action counts: Counter({3: 913, 1: 206, 4: 203, 0: 182, 2: 69, 5: 57})


<br>

### Play Video

In [8]:
from IPython.display import Video
Video("./recordings/openaigym.video.0.4778.video000000.mp4", width=300)

<br>