In [8]:
import gym
from gym.wrappers import Monitor
from stable_baselines3 import A2C, PPO, DQN, DDPG
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

In [9]:
!python -m atari_py.import_roms .\Roms\ROMS

# Cartpole

In [10]:
env_name = 'CartPole-v0'
env = gym.make(env_name)

In [11]:
cartpole = PPO.load('./Training/Saved Models/ppo_cartpole', env = env)

In [12]:
episodes = 10
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = cartpole.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:200.0
Episode:2 Score:200.0
Episode:3 Score:200.0
Episode:4 Score:200.0
Episode:5 Score:200.0
Episode:6 Score:200.0
Episode:7 Score:200.0
Episode:8 Score:200.0
Episode:9 Score:200.0
Episode:10 Score:200.0


In [12]:
env.close()

# Pendulum

In [13]:
env_name = 'Pendulum-v0'
env = gym.make(env_name)

In [14]:
pendulum = DDPG.load('./Training/Saved Models/ddpg_pendulum500k', env = env)

In [15]:
env.close()

In [17]:
episodes = 10
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = pendulum.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:-122.60450684357922
Episode:2 Score:-119.8476825488763
Episode:3 Score:-228.0823483365638
Episode:4 Score:-119.05780847059735
Episode:5 Score:-125.21328083180082
Episode:6 Score:-225.30105851550263
Episode:7 Score:-235.1252615725433
Episode:8 Score:-126.22938297358381
Episode:9 Score:-127.08378075430355
Episode:10 Score:-134.42548577648523


In [14]:
env.close()

# Breakout

In [18]:
env = make_atari_env('Breakout-v0', n_envs = 4, seed = 0)
env = VecFrameStack(env, n_stack = 4)

In [19]:
breakout = A2C.load('./Training/Saved Models/a2c_breakout1mil', env = env)

In [22]:
env = make_atari_env('Breakout-v0', n_envs = 1, seed = 0)
env = VecFrameStack(env, n_stack = 4)

episodes = 20
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = breakout.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:[3.]
Episode:2 Score:[0.]
Episode:3 Score:[4.]
Episode:4 Score:[4.]
Episode:5 Score:[1.]
Episode:6 Score:[4.]
Episode:7 Score:[0.]
Episode:8 Score:[0.]
Episode:9 Score:[0.]
Episode:10 Score:[6.]
Episode:11 Score:[0.]
Episode:12 Score:[2.]
Episode:13 Score:[8.]
Episode:14 Score:[2.]
Episode:15 Score:[0.]
Episode:16 Score:[0.]
Episode:17 Score:[0.]
Episode:18 Score:[0.]
Episode:19 Score:[5.]
Episode:20 Score:[0.]


In [21]:
env.close()

# Car Racing

In [23]:
env_name = 'CarRacing-v0'
env = gym.make(env_name)
env = DummyVecEnv([lambda: env])

In [24]:
car = PPO.load('./Training/Saved Models/ppo_car_500k', env = env)

In [33]:
env.close()

In [25]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = car.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Track generation: 1200..1504 -> 304-tiles track
Track generation: 1077..1356 -> 279-tiles track
Episode:1 Score:[-60.3956]
Track generation: 1167..1471 -> 304-tiles track
Track generation: 1198..1502 -> 304-tiles track
Episode:2 Score:[-60.39546]
Track generation: 1263..1583 -> 320-tiles track
Track generation: 1090..1372 -> 282-tiles track
Episode:3 Score:[-81.19055]
Track generation: 1187..1488 -> 301-tiles track
Track generation: 1158..1461 -> 303-tiles track
Episode:4 Score:[-79.999344]
Track generation: 1240..1554 -> 314-tiles track
Track generation: 1104..1384 -> 280-tiles track
Episode:5 Score:[-80.83]


In [4]:
car2 = PPO.load('./Training/Saved Models/ppo_car_800k', env = env)

In [5]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = car2.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Track generation: 1156..1449 -> 293-tiles track




Track generation: 1159..1453 -> 294-tiles track
Episode:1 Score:[-82.87601]
Track generation: 963..1208 -> 245-tiles track
Track generation: 1095..1381 -> 286-tiles track
Episode:2 Score:[-79.507545]
Track generation: 1186..1487 -> 301-tiles track
Track generation: 1068..1339 -> 271-tiles track
Episode:3 Score:[-83.33263]
Track generation: 1201..1505 -> 304-tiles track
Track generation: 1147..1438 -> 291-tiles track
Episode:4 Score:[-83.49764]
Track generation: 1127..1413 -> 286-tiles track
Track generation: 1314..1646 -> 332-tiles track
Episode:5 Score:[-82.455444]
Track generation: 1055..1323 -> 268-tiles track
Track generation: 1139..1428 -> 289-tiles track
Episode:6 Score:[-81.27273]
Track generation: 1061..1334 -> 273-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1313..1645 -> 332-tiles track
Track generation: 1136..1424 -> 288-tiles track
Episode:7 Score:[-84.89353]
Track generation: 1171..1468 -> 297-tiles track
Tra

In [26]:
car3 = PPO.load('./Training/Saved Models/ppo_car_1mil', env = env)

In [27]:
episodes = 3
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = car3.predict(obs)
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Track generation: 1052..1319 -> 267-tiles track
Track generation: 1083..1362 -> 279-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1147..1444 -> 297-tiles track
Episode:1 Score:[-77.44299]
Track generation: 1140..1429 -> 289-tiles track
Track generation: 1136..1423 -> 287-tiles track
Episode:2 Score:[-79.16597]
Track generation: 1162..1457 -> 295-tiles track
Track generation: 1076..1349 -> 273-tiles track
Episode:3 Score:[-79.59113]
