In [None]:
#use activate tensorforceEnv
from ale_py import ALEInterface

ale = ALEInterface()

from ale_py.roms import Breakout

ale.loadROM(Breakout)


from tensorforce import Agent, Environment

import gym
from gym import wrappers

def main():

    # DQN agent specification
    env = gym.make('CartPole-v1')
    env = wrappers.Monitor(env, 'tmp', force=True)
    environment = Environment.create(environment=env, max_episode_timesteps=500)
    nbr_step = 1500
    agent = Agent.create(
        agent='ddqn', 
        environment=environment,
        memory = 50000,
        batch_size=64, 
        update_frequency = 0.25,
        start_updating = 100,
        learning_rate= 0.001,
        target_sync_frequency = 100,
        exploration = dict(type = 'exponential',unit = 'updates', 
                           num_steps = nbr_step, initial_value=0.95, decay_rate=0.99)
        
        
    )
    
    print(agent.get_architecture())
    
    
    # Train for 100 episodes
    for episode in range(nbr_step):

        # Record episode experience
        episode_states = list()
        episode_internals = list()
        episode_actions = list()
        episode_terminal = list()
        episode_reward = list()

        # Episode using independent-act and agent.intial_internals()
        states = environment.reset()
        internals = agent.initial_internals()
        terminal = False
        sum_rewards = 0.0
        while not terminal:

            actions = agent.act(states=states)
            
            states, terminal, reward = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)
            #env.render()
            sum_rewards += reward
        print('Episode {}: {}'.format(episode, sum_rewards))

        

    # Evaluate for 100 episodes
    sum_rewards = 0.0
    for _ in range(100):
        states = environment.reset()
        internals = agent.initial_internals()
        terminal = False
        while not terminal:
            actions, internals = agent.act(
                states=states, internals=internals, independent=True, deterministic=True
            )
            states, terminal, reward = environment.execute(actions=actions)
            env.render()
            sum_rewards += reward
    print('Mean evaluation return:', sum_rewards / 100.0)

    # Close agent and environment
    agent.close()
    environment.close()
    
    
    
    

if __name__ == '__main__':
    main()



Policy:
    Network:  
        Dense(name=dense0, size=64, bias=True, activation=tanh)
        Dense(name=dense1, size=64, bias=True, activation=tanh)Action-value:  Linear(name=action_value, size=2, bias=True)
Baseline:
    Network:  
        Dense(name=dense0, size=64, bias=True, activation=tanh)
        Dense(name=dense1, size=64, bias=True, activation=tanh)Action-value:  Linear(name=action_value, size=2, bias=True)
Episode 0: 18.0
Episode 1: 30.0
Episode 2: 45.0
Episode 3: 14.0
Episode 4: 18.0
Episode 5: 14.0
Episode 6: 14.0
Episode 7: 16.0
Episode 8: 38.0
Episode 9: 27.0
Episode 10: 16.0
Episode 11: 21.0
Episode 12: 12.0
Episode 13: 16.0
Episode 14: 19.0
Episode 15: 11.0
Episode 16: 23.0
Episode 17: 21.0
Episode 18: 12.0
Episode 19: 20.0
Episode 20: 16.0
Episode 21: 16.0
Episode 22: 31.0
Episode 23: 19.0
Episode 24: 25.0
Episode 25: 17.0
Episode 26: 16.0
Episode 27: 17.0
Episode 28: 10.0
Episode 29: 40.0
Episode 30: 12.0
Episode 31: 24.0
Episode 32: 11.0
Episode 33: 15.0
Episode 34

Episode 439: 27.0
Episode 440: 27.0
Episode 441: 11.0
Episode 442: 13.0
Episode 443: 28.0
Episode 444: 11.0
Episode 445: 17.0
Episode 446: 30.0
Episode 447: 35.0
Episode 448: 31.0
Episode 449: 24.0
Episode 450: 12.0
Episode 451: 50.0
Episode 452: 14.0
Episode 453: 13.0
Episode 454: 18.0
Episode 455: 25.0
Episode 456: 20.0
Episode 457: 12.0
Episode 458: 25.0
Episode 459: 50.0
Episode 460: 29.0
Episode 461: 19.0
Episode 462: 36.0
Episode 463: 34.0
Episode 464: 58.0
Episode 465: 20.0
Episode 466: 16.0
Episode 467: 51.0
Episode 468: 20.0
Episode 469: 26.0
Episode 470: 15.0
Episode 471: 40.0
Episode 472: 25.0
Episode 473: 15.0
Episode 474: 36.0
Episode 475: 15.0
Episode 476: 13.0
Episode 477: 56.0
Episode 478: 16.0
Episode 479: 27.0
Episode 480: 34.0
Episode 481: 22.0
Episode 482: 16.0
Episode 483: 13.0
Episode 484: 14.0
Episode 485: 14.0
Episode 486: 19.0
Episode 487: 18.0
Episode 488: 20.0
Episode 489: 11.0
Episode 490: 31.0
Episode 491: 11.0
Episode 492: 23.0
Episode 493: 41.0
Episode 49

Episode 895: 33.0
Episode 896: 11.0
Episode 897: 24.0
Episode 898: 15.0
Episode 899: 12.0
Episode 900: 25.0
Episode 901: 16.0
Episode 902: 35.0
Episode 903: 29.0
Episode 904: 13.0
Episode 905: 12.0
Episode 906: 13.0
Episode 907: 12.0
Episode 908: 31.0
Episode 909: 22.0
Episode 910: 22.0
Episode 911: 15.0
Episode 912: 10.0
Episode 913: 11.0
Episode 914: 22.0
Episode 915: 25.0
Episode 916: 37.0
Episode 917: 12.0
Episode 918: 33.0
Episode 919: 11.0
Episode 920: 17.0
Episode 921: 32.0
Episode 922: 57.0
Episode 923: 42.0
Episode 924: 36.0
Episode 925: 49.0
Episode 926: 18.0
Episode 927: 16.0
Episode 928: 28.0
Episode 929: 44.0
Episode 930: 63.0
Episode 931: 73.0
Episode 932: 17.0
Episode 933: 21.0
Episode 934: 23.0
Episode 935: 31.0
Episode 936: 87.0
Episode 937: 38.0
Episode 938: 31.0
Episode 939: 13.0
Episode 940: 16.0
Episode 941: 22.0
Episode 942: 19.0
Episode 943: 79.0
Episode 944: 39.0
Episode 945: 56.0
Episode 946: 13.0
Episode 947: 15.0
Episode 948: 30.0
Episode 949: 40.0
Episode 95

Episode 1331: 27.0
Episode 1332: 13.0
Episode 1333: 18.0
Episode 1334: 21.0
Episode 1335: 20.0
Episode 1336: 18.0
Episode 1337: 10.0
Episode 1338: 20.0
Episode 1339: 16.0
Episode 1340: 18.0
Episode 1341: 14.0
Episode 1342: 46.0
Episode 1343: 31.0
Episode 1344: 19.0
Episode 1345: 11.0
Episode 1346: 30.0
Episode 1347: 18.0
Episode 1348: 14.0
Episode 1349: 11.0
Episode 1350: 17.0
Episode 1351: 25.0
Episode 1352: 12.0
Episode 1353: 44.0
Episode 1354: 11.0
Episode 1355: 15.0
Episode 1356: 16.0
Episode 1357: 23.0
Episode 1358: 17.0
Episode 1359: 22.0
Episode 1360: 16.0
Episode 1361: 27.0
Episode 1362: 54.0
Episode 1363: 17.0
Episode 1364: 12.0
Episode 1365: 27.0
Episode 1366: 14.0
Episode 1367: 17.0
Episode 1368: 81.0
Episode 1369: 33.0
Episode 1370: 9.0
Episode 1371: 74.0
Episode 1372: 19.0
Episode 1373: 16.0
Episode 1374: 12.0
Episode 1375: 9.0
Episode 1376: 22.0
Episode 1377: 22.0
Episode 1378: 35.0
Episode 1379: 27.0
Episode 1380: 17.0
Episode 1381: 23.0
Episode 1382: 11.0
Episode 1383: 