In [1]:
import gym
from gym import wrappers
import qlearning
import numpy
import matplotlib.pyplot as plt

NUM_EPISODES = 2000
N_BINS = [8, 8, 8, 8]
MAX_STEPS = 200
FAIL_PENALTY = -100
EPSILON = 0.5
EPSILON_DECAY = 0.99
LEARNING_RATE = 0.05
DISCOUNT_FACTOR = 0.9


RECORD = False

MIN_VALUES = [-0.5, -2.0, -0.5, -3.0]
MAX_VALUES = [0.5, 2.0, 0.5, 3.0]
BINS = [numpy.linspace(MIN_VALUES[i], MAX_VALUES[i], N_BINS[i])
        for i in range(4)]

In [2]:
def discretize(obs):
  return tuple([int(numpy.digitize(obs[i], BINS[i])) for i in range(4)])


def train(agent, env, history, num_episodes=NUM_EPISODES):
  for i in range(NUM_EPISODES):
    if i % 100:
      print ("Episode {}".format(i + 1))
    obs = env.reset()
    cur_state = discretize(obs)
    
    for t in range(MAX_STEPS):
      action = agent.get_action(cur_state)
      observation, reward, done, info = env.step(action)
      next_state = discretize(observation)
      if done:
        reward = FAIL_PENALTY
        agent.learn(cur_state, action, next_state, reward, done)
        print("Episode finished after {} timesteps".format(t + 1))
        history.append(t + 1)
        break
      agent.learn(cur_state, action, next_state, reward, done)
      cur_state = next_state
      if t == MAX_STEPS - 1:
        history.append(t + 1)
        print("Episode finished after {} timesteps".format(t + 1))
  return agent, history

In [3]:
env = gym.make('CartPole-v0')
if RECORD:
  env = wrappers.Monitor(env, '/tmp/cartpole-experiment-1', force=True)
def get_actions(state):
  return [0, 1]


agent = qlearning.QLearningAgent(get_actions,
                                     epsilon=EPSILON,
                                     alpha=LEARNING_RATE,
                                     gamma=DISCOUNT_FACTOR,
                                     epsilon_decay=EPSILON_DECAY)

history = []

agent, history = train(agent, env, history)

if RECORD:
  env.monitor.close()

avg_reward = [numpy.mean(history[i*100:(i+1)*100]) for i in range(int(len(history)/100))]
f_reward = plt.figure(1)
plt.plot(numpy.linspace(0, len(history), len(avg_reward)), avg_reward)
plt.ylabel('Rewards')
f_reward.show()
print ('press enter to continue')
input()
plt.close()


# Display:
print ('press ctrl-c to stop')
while True:
  obs = env.reset()
  cur_state = discretize(obs)
  done = False

  t = 0
  while not done:
    env.render()
    t = t+1
    action = agent.get_action(cur_state)
    observation, reward, done, info = env.step(action)
    next_state = discretize(observation)
    if done:
      reward = FAIL_PENALTY
      agent.learn(cur_state, action, next_state, reward, done)
      print("Episode finished after {} timesteps".format(t+1))
      history.append(t+1)
      break
    agent.learn(cur_state, action, next_state, reward, done)
    cur_state = next_state

[2019-05-20 22:59:36,929] Making new env: CartPole-v0


Episode finished after 10 timesteps
Episode 2
Episode finished after 9 timesteps
Episode 3
Episode finished after 11 timesteps
Episode 4
Episode finished after 12 timesteps
Episode 5
Episode finished after 13 timesteps
Episode 6
Episode finished after 12 timesteps
Episode 7
Episode finished after 10 timesteps
Episode 8
Episode finished after 10 timesteps
Episode 9
Episode finished after 11 timesteps
Episode 10
Episode finished after 10 timesteps
Episode 11
Episode finished after 11 timesteps
Episode 12
Episode finished after 11 timesteps
Episode 13
Episode finished after 11 timesteps
Episode 14
Episode finished after 13 timesteps
Episode 15
Episode finished after 9 timesteps
Episode 16
Episode finished after 11 timesteps
Episode 17
Episode finished after 11 timesteps
Episode 18
Episode finished after 11 timesteps
Episode 19
Episode finished after 11 timesteps
Episode 20
Episode finished after 8 timesteps
Episode 21
Episode finished after 9 timesteps
Episode 22
Episode finished after 10

Episode finished after 104 timesteps
Episode 197
Episode finished after 97 timesteps
Episode 198
Episode finished after 90 timesteps
Episode 199
Episode finished after 159 timesteps
Episode 200
Episode finished after 79 timesteps
Episode finished after 81 timesteps
Episode 202
Episode finished after 121 timesteps
Episode 203
Episode finished after 124 timesteps
Episode 204
Episode finished after 121 timesteps
Episode 205
Episode finished after 49 timesteps
Episode 206
Episode finished after 63 timesteps
Episode 207
Episode finished after 69 timesteps
Episode 208
Episode finished after 130 timesteps
Episode 209
Episode finished after 113 timesteps
Episode 210
Episode finished after 111 timesteps
Episode 211
Episode finished after 114 timesteps
Episode 212
Episode finished after 112 timesteps
Episode 213
Episode finished after 127 timesteps
Episode 214
Episode finished after 125 timesteps
Episode 215
Episode finished after 121 timesteps
Episode 216
Episode finished after 101 timesteps
Ep

Episode finished after 112 timesteps
Episode 394
Episode finished after 88 timesteps
Episode 395
Episode finished after 89 timesteps
Episode 396
Episode finished after 85 timesteps
Episode 397
Episode finished after 144 timesteps
Episode 398
Episode finished after 122 timesteps
Episode 399
Episode finished after 82 timesteps
Episode 400
Episode finished after 95 timesteps
Episode finished after 102 timesteps
Episode 402
Episode finished after 86 timesteps
Episode 403
Episode finished after 113 timesteps
Episode 404
Episode finished after 84 timesteps
Episode 405
Episode finished after 93 timesteps
Episode 406
Episode finished after 116 timesteps
Episode 407
Episode finished after 134 timesteps
Episode 408
Episode finished after 59 timesteps
Episode 409
Episode finished after 83 timesteps
Episode 410
Episode finished after 107 timesteps
Episode 411
Episode finished after 95 timesteps
Episode 412
Episode finished after 95 timesteps
Episode 413
Episode finished after 132 timesteps
Episode

Episode finished after 129 timesteps
Episode 576
Episode finished after 138 timesteps
Episode 577
Episode finished after 78 timesteps
Episode 578
Episode finished after 134 timesteps
Episode 579
Episode finished after 137 timesteps
Episode 580
Episode finished after 133 timesteps
Episode 581
Episode finished after 153 timesteps
Episode 582
Episode finished after 140 timesteps
Episode 583
Episode finished after 134 timesteps
Episode 584
Episode finished after 156 timesteps
Episode 585
Episode finished after 125 timesteps
Episode 586
Episode finished after 158 timesteps
Episode 587
Episode finished after 155 timesteps
Episode 588
Episode finished after 93 timesteps
Episode 589
Episode finished after 90 timesteps
Episode 590
Episode finished after 24 timesteps
Episode 591
Episode finished after 187 timesteps
Episode 592
Episode finished after 179 timesteps
Episode 593
Episode finished after 161 timesteps
Episode 594
Episode finished after 163 timesteps
Episode 595
Episode finished after 1

Episode finished after 136 timesteps
Episode 755
Episode finished after 140 timesteps
Episode 756
Episode finished after 144 timesteps
Episode 757
Episode finished after 146 timesteps
Episode 758
Episode finished after 150 timesteps
Episode 759
Episode finished after 146 timesteps
Episode 760
Episode finished after 165 timesteps
Episode 761
Episode finished after 24 timesteps
Episode 762
Episode finished after 150 timesteps
Episode 763
Episode finished after 137 timesteps
Episode 764
Episode finished after 159 timesteps
Episode 765
Episode finished after 145 timesteps
Episode 766
Episode finished after 147 timesteps
Episode 767
Episode finished after 150 timesteps
Episode 768
Episode finished after 145 timesteps
Episode 769
Episode finished after 143 timesteps
Episode 770
Episode finished after 136 timesteps
Episode 771
Episode finished after 45 timesteps
Episode 772
Episode finished after 144 timesteps
Episode 773
Episode finished after 149 timesteps
Episode 774
Episode finished after

Episode finished after 119 timesteps
Episode 936
Episode finished after 118 timesteps
Episode 937
Episode finished after 111 timesteps
Episode 938
Episode finished after 118 timesteps
Episode 939
Episode finished after 119 timesteps
Episode 940
Episode finished after 56 timesteps
Episode 941
Episode finished after 58 timesteps
Episode 942
Episode finished after 124 timesteps
Episode 943
Episode finished after 116 timesteps
Episode 944
Episode finished after 125 timesteps
Episode 945
Episode finished after 118 timesteps
Episode 946
Episode finished after 123 timesteps
Episode 947
Episode finished after 117 timesteps
Episode 948
Episode finished after 110 timesteps
Episode 949
Episode finished after 133 timesteps
Episode 950
Episode finished after 115 timesteps
Episode 951
Episode finished after 117 timesteps
Episode 952
Episode finished after 116 timesteps
Episode 953
Episode finished after 124 timesteps
Episode 954
Episode finished after 116 timesteps
Episode 955
Episode finished after

Episode finished after 64 timesteps
Episode 1120
Episode finished after 109 timesteps
Episode 1121
Episode finished after 122 timesteps
Episode 1122
Episode finished after 116 timesteps
Episode 1123
Episode finished after 133 timesteps
Episode 1124
Episode finished after 142 timesteps
Episode 1125
Episode finished after 128 timesteps
Episode 1126
Episode finished after 155 timesteps
Episode 1127
Episode finished after 132 timesteps
Episode 1128
Episode finished after 139 timesteps
Episode 1129
Episode finished after 144 timesteps
Episode 1130
Episode finished after 134 timesteps
Episode 1131
Episode finished after 137 timesteps
Episode 1132
Episode finished after 141 timesteps
Episode 1133
Episode finished after 130 timesteps
Episode 1134
Episode finished after 140 timesteps
Episode 1135
Episode finished after 125 timesteps
Episode 1136
Episode finished after 132 timesteps
Episode 1137
Episode finished after 134 timesteps
Episode 1138
Episode finished after 133 timesteps
Episode 1139
E

Episode finished after 131 timesteps
Episode 1296
Episode finished after 134 timesteps
Episode 1297
Episode finished after 135 timesteps
Episode 1298
Episode finished after 131 timesteps
Episode 1299
Episode finished after 137 timesteps
Episode 1300
Episode finished after 129 timesteps
Episode finished after 151 timesteps
Episode 1302
Episode finished after 60 timesteps
Episode 1303
Episode finished after 131 timesteps
Episode 1304
Episode finished after 138 timesteps
Episode 1305
Episode finished after 127 timesteps
Episode 1306
Episode finished after 137 timesteps
Episode 1307
Episode finished after 137 timesteps
Episode 1308
Episode finished after 131 timesteps
Episode 1309
Episode finished after 133 timesteps
Episode 1310
Episode finished after 132 timesteps
Episode 1311
Episode finished after 155 timesteps
Episode 1312
Episode finished after 137 timesteps
Episode 1313
Episode finished after 130 timesteps
Episode 1314
Episode finished after 137 timesteps
Episode 1315
Episode finish

Episode finished after 136 timesteps
Episode 1469
Episode finished after 149 timesteps
Episode 1470
Episode finished after 128 timesteps
Episode 1471
Episode finished after 60 timesteps
Episode 1472
Episode finished after 137 timesteps
Episode 1473
Episode finished after 131 timesteps
Episode 1474
Episode finished after 142 timesteps
Episode 1475
Episode finished after 131 timesteps
Episode 1476
Episode finished after 133 timesteps
Episode 1477
Episode finished after 137 timesteps
Episode 1478
Episode finished after 132 timesteps
Episode 1479
Episode finished after 143 timesteps
Episode 1480
Episode finished after 134 timesteps
Episode 1481
Episode finished after 126 timesteps
Episode 1482
Episode finished after 135 timesteps
Episode 1483
Episode finished after 151 timesteps
Episode 1484
Episode finished after 130 timesteps
Episode 1485
Episode finished after 132 timesteps
Episode 1486
Episode finished after 142 timesteps
Episode 1487
Episode finished after 151 timesteps
Episode 1488
E

Episode finished after 137 timesteps
Episode 1637
Episode finished after 127 timesteps
Episode 1638
Episode finished after 150 timesteps
Episode 1639
Episode finished after 133 timesteps
Episode 1640
Episode finished after 160 timesteps
Episode 1641
Episode finished after 142 timesteps
Episode 1642
Episode finished after 126 timesteps
Episode 1643
Episode finished after 132 timesteps
Episode 1644
Episode finished after 137 timesteps
Episode 1645
Episode finished after 140 timesteps
Episode 1646
Episode finished after 135 timesteps
Episode 1647
Episode finished after 137 timesteps
Episode 1648
Episode finished after 136 timesteps
Episode 1649
Episode finished after 157 timesteps
Episode 1650
Episode finished after 140 timesteps
Episode 1651
Episode finished after 138 timesteps
Episode 1652
Episode finished after 136 timesteps
Episode 1653
Episode finished after 131 timesteps
Episode 1654
Episode finished after 142 timesteps
Episode 1655
Episode finished after 141 timesteps
Episode 1656


Episode finished after 144 timesteps
Episode 1819
Episode finished after 133 timesteps
Episode 1820
Episode finished after 145 timesteps
Episode 1821
Episode finished after 141 timesteps
Episode 1822
Episode finished after 133 timesteps
Episode 1823
Episode finished after 133 timesteps
Episode 1824
Episode finished after 133 timesteps
Episode 1825
Episode finished after 139 timesteps
Episode 1826
Episode finished after 140 timesteps
Episode 1827
Episode finished after 130 timesteps
Episode 1828
Episode finished after 136 timesteps
Episode 1829
Episode finished after 134 timesteps
Episode 1830
Episode finished after 16 timesteps
Episode 1831
Episode finished after 130 timesteps
Episode 1832
Episode finished after 154 timesteps
Episode 1833
Episode finished after 140 timesteps
Episode 1834
Episode finished after 135 timesteps
Episode 1835
Episode finished after 128 timesteps
Episode 1836
Episode finished after 133 timesteps
Episode 1837
Episode finished after 138 timesteps
Episode 1838
E

  "matplotlib is currently using a non-GUI backend, "


Episode finished after 128 timesteps
Episode 1985
Episode finished after 134 timesteps
Episode 1986
Episode finished after 133 timesteps
Episode 1987
Episode finished after 137 timesteps
Episode 1988
Episode finished after 134 timesteps
Episode 1989
Episode finished after 137 timesteps
Episode 1990
Episode finished after 133 timesteps
Episode 1991
Episode finished after 135 timesteps
Episode 1992
Episode finished after 143 timesteps
Episode 1993
Episode finished after 140 timesteps
Episode 1994
Episode finished after 153 timesteps
Episode 1995
Episode finished after 130 timesteps
Episode 1996
Episode finished after 134 timesteps
Episode 1997
Episode finished after 137 timesteps
Episode 1998
Episode finished after 129 timesteps
Episode 1999
Episode finished after 144 timesteps
Episode 2000
Episode finished after 131 timesteps
press enter to continue
1
press ctrl-c to stop
Episode finished after 138 timesteps
Episode finished after 135 timesteps
Episode finished after 148 timesteps
Episo

KeyboardInterrupt: 