In [2]:
from collections import deque
import gym
import numpy as np
import torch
class PolicyNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Linear(4, 2, bias=False)
    def forward(self, x):
        x = self.fc(x)
        return torch.nn.functional.softmax(x, dim=-1)
pi = PolicyNetwork()
optimizer = torch.optim.SGD(pi.parameters(), lr=0.001)
env = gym.make('CartPole-v0')
max_steps = 200
discount = 0.995  # discount factor gamma
reward_history = deque(maxlen=100)


for episode in range(1, 10000 + 1):
    state = env.reset()
    state = torch.tensor(state, dtype=torch.float32)
    history = []
    rewards = 0
    done = False
    for t in range(1, max_steps + 1):
        #env.render()
        probs = pi(state)
        action = torch.multinomial(probs, 1).item()
        state_next, reward, done, info = env.step(action)
        rewards += reward
        timeout = (t == max_steps)
        history.append([reward, probs[action]])
        if done:
            break
        state = torch.tensor(state_next, dtype=torch.float32)
    # compute average reward over 100 episodes
    reward_history.append(rewards)
    avg = np.mean(reward_history)
    print('episode: {}, reward: {}, avg: {}'.format(episode, rewards, avg))
    # update policy
    loss = 0
    G = 200 if timeout else 0
    for r, prob in reversed(history):
        G = r + discount*G
        loss -= G * prob.log()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
env.close()


episode: 1, reward: 31.0, avg: 31.0
episode: 2, reward: 13.0, avg: 22.0
episode: 3, reward: 9.0, avg: 17.666666666666668
episode: 4, reward: 29.0, avg: 20.5
episode: 5, reward: 10.0, avg: 18.4
episode: 6, reward: 14.0, avg: 17.666666666666668
episode: 7, reward: 16.0, avg: 17.428571428571427
episode: 8, reward: 40.0, avg: 20.25
episode: 9, reward: 17.0, avg: 19.88888888888889
episode: 10, reward: 22.0, avg: 20.1
episode: 11, reward: 28.0, avg: 20.818181818181817
episode: 12, reward: 18.0, avg: 20.583333333333332
episode: 13, reward: 19.0, avg: 20.46153846153846
episode: 14, reward: 14.0, avg: 20.0
episode: 15, reward: 36.0, avg: 21.066666666666666
episode: 16, reward: 16.0, avg: 20.75
episode: 17, reward: 18.0, avg: 20.58823529411765
episode: 18, reward: 21.0, avg: 20.61111111111111
episode: 19, reward: 19.0, avg: 20.526315789473685
episode: 20, reward: 17.0, avg: 20.35
episode: 21, reward: 17.0, avg: 20.19047619047619
episode: 22, reward: 17.0, avg: 20.045454545454547
episode: 23, rew

episode: 196, reward: 200.0, avg: 106.15
episode: 197, reward: 85.0, avg: 106.54
episode: 198, reward: 122.0, avg: 107.04
episode: 199, reward: 92.0, avg: 107.44
episode: 200, reward: 108.0, avg: 108.28
episode: 201, reward: 77.0, avg: 108.63
episode: 202, reward: 142.0, avg: 109.62
episode: 203, reward: 161.0, avg: 110.26
episode: 204, reward: 155.0, avg: 111.25
episode: 205, reward: 103.0, avg: 111.85
episode: 206, reward: 99.0, avg: 112.07
episode: 207, reward: 106.0, avg: 112.84
episode: 208, reward: 89.0, avg: 113.52
episode: 209, reward: 146.0, avg: 114.73
episode: 210, reward: 122.0, avg: 115.03
episode: 211, reward: 113.0, avg: 115.88
episode: 212, reward: 115.0, avg: 116.2
episode: 213, reward: 165.0, avg: 117.17
episode: 214, reward: 168.0, avg: 118.36
episode: 215, reward: 200.0, avg: 119.85
episode: 216, reward: 99.0, avg: 119.59
episode: 217, reward: 129.0, avg: 120.49
episode: 218, reward: 114.0, avg: 120.41
episode: 219, reward: 55.0, avg: 120.55
episode: 220, reward: 82

episode: 397, reward: 200.0, avg: 196.92
episode: 398, reward: 200.0, avg: 197.98
episode: 399, reward: 186.0, avg: 197.84
episode: 400, reward: 200.0, avg: 197.84
episode: 401, reward: 200.0, avg: 197.84
episode: 402, reward: 200.0, avg: 197.84
episode: 403, reward: 200.0, avg: 197.84
episode: 404, reward: 200.0, avg: 197.84
episode: 405, reward: 200.0, avg: 197.84
episode: 406, reward: 200.0, avg: 197.84
episode: 407, reward: 200.0, avg: 197.84
episode: 408, reward: 200.0, avg: 197.84
episode: 409, reward: 200.0, avg: 197.84
episode: 410, reward: 169.0, avg: 197.53
episode: 411, reward: 200.0, avg: 197.53
episode: 412, reward: 190.0, avg: 197.43
episode: 413, reward: 200.0, avg: 197.43
episode: 414, reward: 200.0, avg: 197.43
episode: 415, reward: 199.0, avg: 197.42
episode: 416, reward: 200.0, avg: 197.42
episode: 417, reward: 200.0, avg: 197.42
episode: 418, reward: 200.0, avg: 197.42
episode: 419, reward: 200.0, avg: 197.42
episode: 420, reward: 196.0, avg: 197.38
episode: 421, re

episode: 603, reward: 200.0, avg: 194.65
episode: 604, reward: 184.0, avg: 194.49
episode: 605, reward: 200.0, avg: 194.49
episode: 606, reward: 200.0, avg: 194.49
episode: 607, reward: 200.0, avg: 194.77
episode: 608, reward: 164.0, avg: 194.41
episode: 609, reward: 200.0, avg: 194.59
episode: 610, reward: 172.0, avg: 194.31
episode: 611, reward: 200.0, avg: 194.46
episode: 612, reward: 169.0, avg: 194.15
episode: 613, reward: 165.0, avg: 193.8
episode: 614, reward: 200.0, avg: 193.8
episode: 615, reward: 200.0, avg: 193.8
episode: 616, reward: 200.0, avg: 194.19
episode: 617, reward: 200.0, avg: 194.19
episode: 618, reward: 200.0, avg: 194.48
episode: 619, reward: 200.0, avg: 194.52
episode: 620, reward: 200.0, avg: 194.52
episode: 621, reward: 200.0, avg: 194.52
episode: 622, reward: 200.0, avg: 194.52
episode: 623, reward: 200.0, avg: 194.52
episode: 624, reward: 200.0, avg: 195.35
episode: 625, reward: 200.0, avg: 196.24
episode: 626, reward: 200.0, avg: 196.24
episode: 627, rewar

episode: 806, reward: 200.0, avg: 199.94
episode: 807, reward: 200.0, avg: 199.94
episode: 808, reward: 200.0, avg: 199.94
episode: 809, reward: 200.0, avg: 199.94
episode: 810, reward: 200.0, avg: 199.94
episode: 811, reward: 200.0, avg: 199.94
episode: 812, reward: 200.0, avg: 199.94
episode: 813, reward: 200.0, avg: 199.94
episode: 814, reward: 200.0, avg: 199.94
episode: 815, reward: 200.0, avg: 199.94
episode: 816, reward: 200.0, avg: 199.94
episode: 817, reward: 200.0, avg: 199.94
episode: 818, reward: 200.0, avg: 199.94
episode: 819, reward: 200.0, avg: 199.94
episode: 820, reward: 200.0, avg: 199.94
episode: 821, reward: 200.0, avg: 199.94
episode: 822, reward: 200.0, avg: 199.94
episode: 823, reward: 200.0, avg: 199.94
episode: 824, reward: 200.0, avg: 199.94
episode: 825, reward: 200.0, avg: 199.94
episode: 826, reward: 200.0, avg: 199.94
episode: 827, reward: 200.0, avg: 199.94
episode: 828, reward: 200.0, avg: 199.94
episode: 829, reward: 200.0, avg: 199.94
episode: 830, re

episode: 1015, reward: 200.0, avg: 200.0
episode: 1016, reward: 200.0, avg: 200.0
episode: 1017, reward: 200.0, avg: 200.0
episode: 1018, reward: 200.0, avg: 200.0
episode: 1019, reward: 200.0, avg: 200.0
episode: 1020, reward: 200.0, avg: 200.0
episode: 1021, reward: 200.0, avg: 200.0
episode: 1022, reward: 200.0, avg: 200.0
episode: 1023, reward: 200.0, avg: 200.0
episode: 1024, reward: 200.0, avg: 200.0
episode: 1025, reward: 200.0, avg: 200.0
episode: 1026, reward: 200.0, avg: 200.0
episode: 1027, reward: 200.0, avg: 200.0
episode: 1028, reward: 200.0, avg: 200.0
episode: 1029, reward: 200.0, avg: 200.0
episode: 1030, reward: 200.0, avg: 200.0
episode: 1031, reward: 200.0, avg: 200.0
episode: 1032, reward: 200.0, avg: 200.0
episode: 1033, reward: 200.0, avg: 200.0
episode: 1034, reward: 200.0, avg: 200.0
episode: 1035, reward: 200.0, avg: 200.0
episode: 1036, reward: 200.0, avg: 200.0
episode: 1037, reward: 200.0, avg: 200.0
episode: 1038, reward: 200.0, avg: 200.0
episode: 1039, r

episode: 1217, reward: 200.0, avg: 200.0
episode: 1218, reward: 200.0, avg: 200.0
episode: 1219, reward: 200.0, avg: 200.0
episode: 1220, reward: 200.0, avg: 200.0
episode: 1221, reward: 200.0, avg: 200.0
episode: 1222, reward: 200.0, avg: 200.0
episode: 1223, reward: 200.0, avg: 200.0
episode: 1224, reward: 200.0, avg: 200.0
episode: 1225, reward: 200.0, avg: 200.0
episode: 1226, reward: 200.0, avg: 200.0
episode: 1227, reward: 200.0, avg: 200.0
episode: 1228, reward: 200.0, avg: 200.0
episode: 1229, reward: 200.0, avg: 200.0
episode: 1230, reward: 200.0, avg: 200.0
episode: 1231, reward: 200.0, avg: 200.0
episode: 1232, reward: 200.0, avg: 200.0
episode: 1233, reward: 200.0, avg: 200.0
episode: 1234, reward: 200.0, avg: 200.0
episode: 1235, reward: 200.0, avg: 200.0
episode: 1236, reward: 200.0, avg: 200.0
episode: 1237, reward: 200.0, avg: 200.0
episode: 1238, reward: 200.0, avg: 200.0
episode: 1239, reward: 200.0, avg: 200.0
episode: 1240, reward: 200.0, avg: 200.0
episode: 1241, r

episode: 1420, reward: 200.0, avg: 200.0
episode: 1421, reward: 200.0, avg: 200.0
episode: 1422, reward: 200.0, avg: 200.0
episode: 1423, reward: 200.0, avg: 200.0
episode: 1424, reward: 200.0, avg: 200.0
episode: 1425, reward: 200.0, avg: 200.0
episode: 1426, reward: 200.0, avg: 200.0
episode: 1427, reward: 200.0, avg: 200.0
episode: 1428, reward: 200.0, avg: 200.0
episode: 1429, reward: 200.0, avg: 200.0
episode: 1430, reward: 200.0, avg: 200.0
episode: 1431, reward: 200.0, avg: 200.0
episode: 1432, reward: 200.0, avg: 200.0
episode: 1433, reward: 200.0, avg: 200.0
episode: 1434, reward: 200.0, avg: 200.0
episode: 1435, reward: 200.0, avg: 200.0
episode: 1436, reward: 200.0, avg: 200.0
episode: 1437, reward: 200.0, avg: 200.0
episode: 1438, reward: 200.0, avg: 200.0
episode: 1439, reward: 200.0, avg: 200.0
episode: 1440, reward: 200.0, avg: 200.0
episode: 1441, reward: 200.0, avg: 200.0
episode: 1442, reward: 200.0, avg: 200.0
episode: 1443, reward: 200.0, avg: 200.0
episode: 1444, r

episode: 1621, reward: 200.0, avg: 144.97
episode: 1622, reward: 200.0, avg: 145.79
episode: 1623, reward: 200.0, avg: 145.79
episode: 1624, reward: 200.0, avg: 147.14
episode: 1625, reward: 200.0, avg: 147.14
episode: 1626, reward: 200.0, avg: 147.34
episode: 1627, reward: 200.0, avg: 147.34
episode: 1628, reward: 200.0, avg: 147.34
episode: 1629, reward: 200.0, avg: 147.43
episode: 1630, reward: 200.0, avg: 147.43
episode: 1631, reward: 200.0, avg: 147.43
episode: 1632, reward: 124.0, avg: 146.95
episode: 1633, reward: 200.0, avg: 147.19
episode: 1634, reward: 200.0, avg: 148.19
episode: 1635, reward: 200.0, avg: 148.19
episode: 1636, reward: 200.0, avg: 149.55
episode: 1637, reward: 200.0, avg: 150.9
episode: 1638, reward: 200.0, avg: 152.37
episode: 1639, reward: 182.0, avg: 153.62
episode: 1640, reward: 200.0, avg: 155.32
episode: 1641, reward: 200.0, avg: 156.78
episode: 1642, reward: 200.0, avg: 158.3
episode: 1643, reward: 200.0, avg: 159.88
episode: 1644, reward: 200.0, avg: 1

episode: 1820, reward: 200.0, avg: 197.39
episode: 1821, reward: 140.0, avg: 196.79
episode: 1822, reward: 200.0, avg: 196.79
episode: 1823, reward: 198.0, avg: 196.77
episode: 1824, reward: 200.0, avg: 196.77
episode: 1825, reward: 200.0, avg: 196.77
episode: 1826, reward: 200.0, avg: 196.77
episode: 1827, reward: 200.0, avg: 196.77
episode: 1828, reward: 180.0, avg: 196.57
episode: 1829, reward: 189.0, avg: 196.46
episode: 1830, reward: 174.0, avg: 196.2
episode: 1831, reward: 184.0, avg: 196.04
episode: 1832, reward: 200.0, avg: 196.04
episode: 1833, reward: 173.0, avg: 195.77
episode: 1834, reward: 191.0, avg: 195.68
episode: 1835, reward: 181.0, avg: 195.49
episode: 1836, reward: 175.0, avg: 195.24
episode: 1837, reward: 200.0, avg: 195.24
episode: 1838, reward: 200.0, avg: 195.24
episode: 1839, reward: 172.0, avg: 195.14
episode: 1840, reward: 198.0, avg: 195.12
episode: 1841, reward: 200.0, avg: 195.12
episode: 1842, reward: 200.0, avg: 195.12
episode: 1843, reward: 200.0, avg: 

episode: 2016, reward: 200.0, avg: 192.82
episode: 2017, reward: 200.0, avg: 192.82
episode: 2018, reward: 200.0, avg: 192.83
episode: 2019, reward: 200.0, avg: 193.35
episode: 2020, reward: 200.0, avg: 193.35
episode: 2021, reward: 200.0, avg: 193.35
episode: 2022, reward: 200.0, avg: 193.35
episode: 2023, reward: 200.0, avg: 193.35
episode: 2024, reward: 197.0, avg: 193.65
episode: 2025, reward: 200.0, avg: 193.65
episode: 2026, reward: 184.0, avg: 193.49
episode: 2027, reward: 200.0, avg: 193.49
episode: 2028, reward: 200.0, avg: 193.49
episode: 2029, reward: 200.0, avg: 193.49
episode: 2030, reward: 191.0, avg: 193.5
episode: 2031, reward: 200.0, avg: 193.5
episode: 2032, reward: 200.0, avg: 193.5
episode: 2033, reward: 200.0, avg: 193.5
episode: 2034, reward: 200.0, avg: 193.6
episode: 2035, reward: 200.0, avg: 194.09
episode: 2036, reward: 200.0, avg: 194.31
episode: 2037, reward: 200.0, avg: 194.31
episode: 2038, reward: 200.0, avg: 194.31
episode: 2039, reward: 200.0, avg: 194.

episode: 2212, reward: 200.0, avg: 199.88
episode: 2213, reward: 200.0, avg: 199.88
episode: 2214, reward: 200.0, avg: 199.88
episode: 2215, reward: 200.0, avg: 199.88
episode: 2216, reward: 200.0, avg: 199.88
episode: 2217, reward: 200.0, avg: 199.88
episode: 2218, reward: 200.0, avg: 199.88
episode: 2219, reward: 200.0, avg: 199.88
episode: 2220, reward: 200.0, avg: 199.88
episode: 2221, reward: 200.0, avg: 199.88
episode: 2222, reward: 200.0, avg: 199.88
episode: 2223, reward: 200.0, avg: 199.88
episode: 2224, reward: 200.0, avg: 199.88
episode: 2225, reward: 200.0, avg: 199.88
episode: 2226, reward: 200.0, avg: 199.88
episode: 2227, reward: 200.0, avg: 199.88
episode: 2228, reward: 200.0, avg: 199.88
episode: 2229, reward: 200.0, avg: 199.88
episode: 2230, reward: 200.0, avg: 199.88
episode: 2231, reward: 200.0, avg: 199.88
episode: 2232, reward: 200.0, avg: 199.88
episode: 2233, reward: 200.0, avg: 199.88
episode: 2234, reward: 200.0, avg: 199.88
episode: 2235, reward: 200.0, avg:

episode: 2416, reward: 200.0, avg: 200.0
episode: 2417, reward: 200.0, avg: 200.0
episode: 2418, reward: 200.0, avg: 200.0
episode: 2419, reward: 200.0, avg: 200.0
episode: 2420, reward: 200.0, avg: 200.0
episode: 2421, reward: 200.0, avg: 200.0
episode: 2422, reward: 200.0, avg: 200.0
episode: 2423, reward: 200.0, avg: 200.0
episode: 2424, reward: 200.0, avg: 200.0
episode: 2425, reward: 200.0, avg: 200.0
episode: 2426, reward: 200.0, avg: 200.0
episode: 2427, reward: 200.0, avg: 200.0
episode: 2428, reward: 200.0, avg: 200.0
episode: 2429, reward: 200.0, avg: 200.0
episode: 2430, reward: 200.0, avg: 200.0
episode: 2431, reward: 200.0, avg: 200.0
episode: 2432, reward: 200.0, avg: 200.0
episode: 2433, reward: 200.0, avg: 200.0
episode: 2434, reward: 200.0, avg: 200.0
episode: 2435, reward: 200.0, avg: 200.0
episode: 2436, reward: 200.0, avg: 200.0
episode: 2437, reward: 200.0, avg: 200.0
episode: 2438, reward: 200.0, avg: 200.0
episode: 2439, reward: 164.0, avg: 199.64
episode: 2440, 

episode: 2616, reward: 200.0, avg: 193.51
episode: 2617, reward: 200.0, avg: 193.55
episode: 2618, reward: 200.0, avg: 193.67
episode: 2619, reward: 200.0, avg: 193.93
episode: 2620, reward: 200.0, avg: 194.0
episode: 2621, reward: 200.0, avg: 194.29
episode: 2622, reward: 200.0, avg: 195.12
episode: 2623, reward: 200.0, avg: 195.24
episode: 2624, reward: 200.0, avg: 195.24
episode: 2625, reward: 200.0, avg: 195.24
episode: 2626, reward: 200.0, avg: 195.24
episode: 2627, reward: 200.0, avg: 195.24
episode: 2628, reward: 200.0, avg: 195.24
episode: 2629, reward: 200.0, avg: 195.24
episode: 2630, reward: 183.0, avg: 195.07
episode: 2631, reward: 200.0, avg: 195.07
episode: 2632, reward: 200.0, avg: 195.07
episode: 2633, reward: 200.0, avg: 195.07
episode: 2634, reward: 200.0, avg: 195.17
episode: 2635, reward: 200.0, avg: 195.17
episode: 2636, reward: 200.0, avg: 195.32
episode: 2637, reward: 183.0, avg: 195.15
episode: 2638, reward: 200.0, avg: 195.15
episode: 2639, reward: 188.0, avg: 

episode: 2813, reward: 200.0, avg: 199.07
episode: 2814, reward: 200.0, avg: 199.07
episode: 2815, reward: 200.0, avg: 199.07
episode: 2816, reward: 200.0, avg: 199.26
episode: 2817, reward: 200.0, avg: 199.26
episode: 2818, reward: 200.0, avg: 199.49
episode: 2819, reward: 200.0, avg: 199.49
episode: 2820, reward: 200.0, avg: 199.49
episode: 2821, reward: 200.0, avg: 199.49
episode: 2822, reward: 200.0, avg: 199.69
episode: 2823, reward: 200.0, avg: 199.84
episode: 2824, reward: 200.0, avg: 199.84
episode: 2825, reward: 200.0, avg: 199.84
episode: 2826, reward: 200.0, avg: 199.84
episode: 2827, reward: 200.0, avg: 199.84
episode: 2828, reward: 200.0, avg: 199.84
episode: 2829, reward: 200.0, avg: 199.84
episode: 2830, reward: 200.0, avg: 200.0
episode: 2831, reward: 200.0, avg: 200.0
episode: 2832, reward: 200.0, avg: 200.0
episode: 2833, reward: 200.0, avg: 200.0
episode: 2834, reward: 200.0, avg: 200.0
episode: 2835, reward: 200.0, avg: 200.0
episode: 2836, reward: 200.0, avg: 200.0

episode: 3016, reward: 200.0, avg: 197.55
episode: 3017, reward: 200.0, avg: 197.55
episode: 3018, reward: 200.0, avg: 197.55
episode: 3019, reward: 200.0, avg: 197.55
episode: 3020, reward: 200.0, avg: 197.55
episode: 3021, reward: 200.0, avg: 197.55
episode: 3022, reward: 200.0, avg: 197.55
episode: 3023, reward: 200.0, avg: 197.55
episode: 3024, reward: 200.0, avg: 197.55
episode: 3025, reward: 200.0, avg: 197.55
episode: 3026, reward: 200.0, avg: 197.55
episode: 3027, reward: 200.0, avg: 197.55
episode: 3028, reward: 200.0, avg: 197.55
episode: 3029, reward: 200.0, avg: 197.55
episode: 3030, reward: 200.0, avg: 197.55
episode: 3031, reward: 200.0, avg: 197.55
episode: 3032, reward: 200.0, avg: 197.55
episode: 3033, reward: 200.0, avg: 197.55
episode: 3034, reward: 200.0, avg: 197.55
episode: 3035, reward: 200.0, avg: 197.55
episode: 3036, reward: 200.0, avg: 197.55
episode: 3037, reward: 200.0, avg: 197.55
episode: 3038, reward: 200.0, avg: 197.55
episode: 3039, reward: 190.0, avg:

episode: 3217, reward: 200.0, avg: 198.09
episode: 3218, reward: 200.0, avg: 198.09
episode: 3219, reward: 195.0, avg: 198.04
episode: 3220, reward: 200.0, avg: 198.04
episode: 3221, reward: 200.0, avg: 198.04
episode: 3222, reward: 200.0, avg: 198.04
episode: 3223, reward: 126.0, avg: 197.3
episode: 3224, reward: 197.0, avg: 197.27
episode: 3225, reward: 127.0, avg: 196.54
episode: 3226, reward: 200.0, avg: 196.54
episode: 3227, reward: 200.0, avg: 196.54
episode: 3228, reward: 200.0, avg: 196.54
episode: 3229, reward: 164.0, avg: 196.18
episode: 3230, reward: 200.0, avg: 196.18
episode: 3231, reward: 200.0, avg: 196.18
episode: 3232, reward: 200.0, avg: 196.18
episode: 3233, reward: 200.0, avg: 196.18
episode: 3234, reward: 200.0, avg: 196.18
episode: 3235, reward: 200.0, avg: 196.18
episode: 3236, reward: 200.0, avg: 196.18
episode: 3237, reward: 200.0, avg: 196.18
episode: 3238, reward: 131.0, avg: 195.49
episode: 3239, reward: 178.0, avg: 195.27
episode: 3240, reward: 200.0, avg: 

episode: 3417, reward: 200.0, avg: 168.86
episode: 3418, reward: 200.0, avg: 170.51
episode: 3419, reward: 115.0, avg: 171.1
episode: 3420, reward: 200.0, avg: 172.25
episode: 3421, reward: 200.0, avg: 173.29
episode: 3422, reward: 200.0, avg: 174.59
episode: 3423, reward: 200.0, avg: 175.82
episode: 3424, reward: 200.0, avg: 177.34
episode: 3425, reward: 200.0, avg: 178.52
episode: 3426, reward: 200.0, avg: 179.99
episode: 3427, reward: 200.0, avg: 181.35
episode: 3428, reward: 200.0, avg: 182.81
episode: 3429, reward: 200.0, avg: 184.35
episode: 3430, reward: 200.0, avg: 185.0
episode: 3431, reward: 200.0, avg: 186.45
episode: 3432, reward: 200.0, avg: 187.17
episode: 3433, reward: 200.0, avg: 187.17
episode: 3434, reward: 195.0, avg: 188.33
episode: 3435, reward: 200.0, avg: 189.11
episode: 3436, reward: 200.0, avg: 189.53
episode: 3437, reward: 200.0, avg: 190.34
episode: 3438, reward: 200.0, avg: 191.11
episode: 3439, reward: 200.0, avg: 192.41
episode: 3440, reward: 200.0, avg: 1

episode: 3619, reward: 200.0, avg: 165.57
episode: 3620, reward: 200.0, avg: 167.13
episode: 3621, reward: 200.0, avg: 168.6
episode: 3622, reward: 200.0, avg: 170.27
episode: 3623, reward: 200.0, avg: 171.93
episode: 3624, reward: 200.0, avg: 173.46
episode: 3625, reward: 200.0, avg: 174.63
episode: 3626, reward: 200.0, avg: 176.18
episode: 3627, reward: 200.0, avg: 177.44
episode: 3628, reward: 200.0, avg: 178.57
episode: 3629, reward: 200.0, avg: 179.91
episode: 3630, reward: 200.0, avg: 180.94
episode: 3631, reward: 200.0, avg: 181.04
episode: 3632, reward: 200.0, avg: 181.04
episode: 3633, reward: 200.0, avg: 181.04
episode: 3634, reward: 200.0, avg: 182.66
episode: 3635, reward: 200.0, avg: 183.32
episode: 3636, reward: 200.0, avg: 184.67
episode: 3637, reward: 200.0, avg: 186.21
episode: 3638, reward: 200.0, avg: 187.65
episode: 3639, reward: 200.0, avg: 189.2
episode: 3640, reward: 200.0, avg: 190.64
episode: 3641, reward: 200.0, avg: 191.32
episode: 3642, reward: 200.0, avg: 1

episode: 3821, reward: 200.0, avg: 87.19
episode: 3822, reward: 44.0, avg: 87.33
episode: 3823, reward: 97.0, avg: 88.09
episode: 3824, reward: 126.0, avg: 88.9
episode: 3825, reward: 200.0, avg: 90.66
episode: 3826, reward: 200.0, avg: 92.34
episode: 3827, reward: 91.0, avg: 93.01
episode: 3828, reward: 200.0, avg: 94.57
episode: 3829, reward: 98.0, avg: 95.25
episode: 3830, reward: 167.0, avg: 96.61
episode: 3831, reward: 107.0, avg: 97.46
episode: 3832, reward: 200.0, avg: 99.08
episode: 3833, reward: 120.0, avg: 99.95
episode: 3834, reward: 112.0, avg: 100.92
episode: 3835, reward: 67.0, avg: 101.33
episode: 3836, reward: 130.0, avg: 102.11
episode: 3837, reward: 200.0, avg: 103.7
episode: 3838, reward: 200.0, avg: 105.27
episode: 3839, reward: 200.0, avg: 106.72
episode: 3840, reward: 157.0, avg: 107.82
episode: 3841, reward: 99.0, avg: 108.18
episode: 3842, reward: 46.0, avg: 107.92
episode: 3843, reward: 200.0, avg: 109.22
episode: 3844, reward: 200.0, avg: 110.57
episode: 3845,

episode: 4023, reward: 200.0, avg: 200.0
episode: 4024, reward: 200.0, avg: 200.0
episode: 4025, reward: 200.0, avg: 200.0
episode: 4026, reward: 200.0, avg: 200.0
episode: 4027, reward: 200.0, avg: 200.0
episode: 4028, reward: 200.0, avg: 200.0
episode: 4029, reward: 200.0, avg: 200.0
episode: 4030, reward: 200.0, avg: 200.0
episode: 4031, reward: 200.0, avg: 200.0
episode: 4032, reward: 200.0, avg: 200.0
episode: 4033, reward: 200.0, avg: 200.0
episode: 4034, reward: 200.0, avg: 200.0
episode: 4035, reward: 200.0, avg: 200.0
episode: 4036, reward: 200.0, avg: 200.0
episode: 4037, reward: 200.0, avg: 200.0
episode: 4038, reward: 200.0, avg: 200.0
episode: 4039, reward: 200.0, avg: 200.0
episode: 4040, reward: 200.0, avg: 200.0
episode: 4041, reward: 200.0, avg: 200.0
episode: 4042, reward: 200.0, avg: 200.0
episode: 4043, reward: 200.0, avg: 200.0
episode: 4044, reward: 200.0, avg: 200.0
episode: 4045, reward: 200.0, avg: 200.0
episode: 4046, reward: 200.0, avg: 200.0
episode: 4047, r

episode: 4223, reward: 200.0, avg: 200.0
episode: 4224, reward: 200.0, avg: 200.0
episode: 4225, reward: 200.0, avg: 200.0
episode: 4226, reward: 200.0, avg: 200.0
episode: 4227, reward: 200.0, avg: 200.0
episode: 4228, reward: 200.0, avg: 200.0
episode: 4229, reward: 200.0, avg: 200.0
episode: 4230, reward: 200.0, avg: 200.0
episode: 4231, reward: 200.0, avg: 200.0
episode: 4232, reward: 200.0, avg: 200.0
episode: 4233, reward: 200.0, avg: 200.0
episode: 4234, reward: 200.0, avg: 200.0
episode: 4235, reward: 200.0, avg: 200.0
episode: 4236, reward: 200.0, avg: 200.0
episode: 4237, reward: 200.0, avg: 200.0
episode: 4238, reward: 200.0, avg: 200.0
episode: 4239, reward: 200.0, avg: 200.0
episode: 4240, reward: 200.0, avg: 200.0
episode: 4241, reward: 200.0, avg: 200.0
episode: 4242, reward: 200.0, avg: 200.0
episode: 4243, reward: 200.0, avg: 200.0
episode: 4244, reward: 200.0, avg: 200.0
episode: 4245, reward: 200.0, avg: 200.0
episode: 4246, reward: 200.0, avg: 200.0
episode: 4247, r

episode: 4426, reward: 200.0, avg: 200.0
episode: 4427, reward: 200.0, avg: 200.0
episode: 4428, reward: 200.0, avg: 200.0
episode: 4429, reward: 200.0, avg: 200.0
episode: 4430, reward: 200.0, avg: 200.0
episode: 4431, reward: 200.0, avg: 200.0
episode: 4432, reward: 200.0, avg: 200.0
episode: 4433, reward: 200.0, avg: 200.0
episode: 4434, reward: 200.0, avg: 200.0
episode: 4435, reward: 200.0, avg: 200.0
episode: 4436, reward: 200.0, avg: 200.0
episode: 4437, reward: 200.0, avg: 200.0
episode: 4438, reward: 200.0, avg: 200.0
episode: 4439, reward: 200.0, avg: 200.0
episode: 4440, reward: 200.0, avg: 200.0
episode: 4441, reward: 200.0, avg: 200.0
episode: 4442, reward: 200.0, avg: 200.0
episode: 4443, reward: 200.0, avg: 200.0
episode: 4444, reward: 200.0, avg: 200.0
episode: 4445, reward: 200.0, avg: 200.0
episode: 4446, reward: 200.0, avg: 200.0
episode: 4447, reward: 200.0, avg: 200.0
episode: 4448, reward: 200.0, avg: 200.0
episode: 4449, reward: 200.0, avg: 200.0
episode: 4450, r

episode: 4631, reward: 200.0, avg: 200.0
episode: 4632, reward: 200.0, avg: 200.0
episode: 4633, reward: 200.0, avg: 200.0
episode: 4634, reward: 200.0, avg: 200.0
episode: 4635, reward: 200.0, avg: 200.0
episode: 4636, reward: 200.0, avg: 200.0
episode: 4637, reward: 200.0, avg: 200.0
episode: 4638, reward: 200.0, avg: 200.0
episode: 4639, reward: 200.0, avg: 200.0
episode: 4640, reward: 200.0, avg: 200.0
episode: 4641, reward: 200.0, avg: 200.0
episode: 4642, reward: 200.0, avg: 200.0
episode: 4643, reward: 200.0, avg: 200.0
episode: 4644, reward: 200.0, avg: 200.0
episode: 4645, reward: 200.0, avg: 200.0
episode: 4646, reward: 200.0, avg: 200.0
episode: 4647, reward: 200.0, avg: 200.0
episode: 4648, reward: 200.0, avg: 200.0
episode: 4649, reward: 200.0, avg: 200.0
episode: 4650, reward: 200.0, avg: 200.0
episode: 4651, reward: 200.0, avg: 200.0
episode: 4652, reward: 200.0, avg: 200.0
episode: 4653, reward: 200.0, avg: 200.0
episode: 4654, reward: 200.0, avg: 200.0
episode: 4655, r

episode: 4837, reward: 200.0, avg: 200.0
episode: 4838, reward: 200.0, avg: 200.0
episode: 4839, reward: 200.0, avg: 200.0
episode: 4840, reward: 200.0, avg: 200.0
episode: 4841, reward: 200.0, avg: 200.0
episode: 4842, reward: 200.0, avg: 200.0
episode: 4843, reward: 200.0, avg: 200.0
episode: 4844, reward: 200.0, avg: 200.0
episode: 4845, reward: 200.0, avg: 200.0
episode: 4846, reward: 200.0, avg: 200.0
episode: 4847, reward: 200.0, avg: 200.0
episode: 4848, reward: 200.0, avg: 200.0
episode: 4849, reward: 200.0, avg: 200.0
episode: 4850, reward: 200.0, avg: 200.0
episode: 4851, reward: 200.0, avg: 200.0
episode: 4852, reward: 200.0, avg: 200.0
episode: 4853, reward: 200.0, avg: 200.0
episode: 4854, reward: 200.0, avg: 200.0
episode: 4855, reward: 200.0, avg: 200.0
episode: 4856, reward: 200.0, avg: 200.0
episode: 4857, reward: 200.0, avg: 200.0
episode: 4858, reward: 200.0, avg: 200.0
episode: 4859, reward: 200.0, avg: 200.0
episode: 4860, reward: 200.0, avg: 200.0
episode: 4861, r

episode: 5041, reward: 200.0, avg: 200.0
episode: 5042, reward: 200.0, avg: 200.0
episode: 5043, reward: 200.0, avg: 200.0
episode: 5044, reward: 200.0, avg: 200.0
episode: 5045, reward: 200.0, avg: 200.0
episode: 5046, reward: 200.0, avg: 200.0
episode: 5047, reward: 200.0, avg: 200.0
episode: 5048, reward: 200.0, avg: 200.0
episode: 5049, reward: 200.0, avg: 200.0
episode: 5050, reward: 200.0, avg: 200.0
episode: 5051, reward: 200.0, avg: 200.0
episode: 5052, reward: 200.0, avg: 200.0
episode: 5053, reward: 200.0, avg: 200.0
episode: 5054, reward: 200.0, avg: 200.0
episode: 5055, reward: 200.0, avg: 200.0
episode: 5056, reward: 198.0, avg: 199.98
episode: 5057, reward: 200.0, avg: 199.98
episode: 5058, reward: 200.0, avg: 199.98
episode: 5059, reward: 155.0, avg: 199.53
episode: 5060, reward: 200.0, avg: 199.53
episode: 5061, reward: 181.0, avg: 199.34
episode: 5062, reward: 200.0, avg: 199.34
episode: 5063, reward: 172.0, avg: 199.06
episode: 5064, reward: 200.0, avg: 199.06
episode

episode: 5239, reward: 172.0, avg: 179.75
episode: 5240, reward: 183.0, avg: 179.58
episode: 5241, reward: 200.0, avg: 179.58
episode: 5242, reward: 162.0, avg: 179.8
episode: 5243, reward: 130.0, avg: 179.1
episode: 5244, reward: 127.0, avg: 178.71
episode: 5245, reward: 193.0, avg: 178.64
episode: 5246, reward: 143.0, avg: 178.58
episode: 5247, reward: 200.0, avg: 178.58
episode: 5248, reward: 200.0, avg: 178.58
episode: 5249, reward: 156.0, avg: 178.14
episode: 5250, reward: 177.0, avg: 178.13
episode: 5251, reward: 143.0, avg: 177.56
episode: 5252, reward: 200.0, avg: 177.56
episode: 5253, reward: 135.0, avg: 176.91
episode: 5254, reward: 156.0, avg: 176.47
episode: 5255, reward: 200.0, avg: 176.47
episode: 5256, reward: 168.0, avg: 176.15
episode: 5257, reward: 200.0, avg: 176.15
episode: 5258, reward: 148.0, avg: 175.63
episode: 5259, reward: 186.0, avg: 175.49
episode: 5260, reward: 159.0, avg: 175.08
episode: 5261, reward: 200.0, avg: 175.08
episode: 5262, reward: 193.0, avg: 1

episode: 5437, reward: 200.0, avg: 182.76
episode: 5438, reward: 200.0, avg: 183.12
episode: 5439, reward: 200.0, avg: 183.82
episode: 5440, reward: 200.0, avg: 183.82
episode: 5441, reward: 200.0, avg: 183.82
episode: 5442, reward: 200.0, avg: 184.39
episode: 5443, reward: 200.0, avg: 185.17
episode: 5444, reward: 200.0, avg: 185.17
episode: 5445, reward: 200.0, avg: 185.17
episode: 5446, reward: 200.0, avg: 185.35
episode: 5447, reward: 200.0, avg: 185.35
episode: 5448, reward: 200.0, avg: 185.35
episode: 5449, reward: 200.0, avg: 185.65
episode: 5450, reward: 200.0, avg: 186.08
episode: 5451, reward: 186.0, avg: 185.94
episode: 5452, reward: 186.0, avg: 185.8
episode: 5453, reward: 200.0, avg: 185.8
episode: 5454, reward: 200.0, avg: 185.8
episode: 5455, reward: 190.0, avg: 186.19
episode: 5456, reward: 200.0, avg: 186.81
episode: 5457, reward: 200.0, avg: 186.81
episode: 5458, reward: 189.0, avg: 187.1
episode: 5459, reward: 200.0, avg: 187.73
episode: 5460, reward: 200.0, avg: 187

episode: 5636, reward: 200.0, avg: 191.88
episode: 5637, reward: 200.0, avg: 191.88
episode: 5638, reward: 200.0, avg: 191.88
episode: 5639, reward: 200.0, avg: 191.88
episode: 5640, reward: 200.0, avg: 192.21
episode: 5641, reward: 200.0, avg: 192.21
episode: 5642, reward: 200.0, avg: 192.38
episode: 5643, reward: 200.0, avg: 192.38
episode: 5644, reward: 200.0, avg: 192.46
episode: 5645, reward: 200.0, avg: 192.46
episode: 5646, reward: 200.0, avg: 192.46
episode: 5647, reward: 200.0, avg: 192.46
episode: 5648, reward: 200.0, avg: 192.46
episode: 5649, reward: 200.0, avg: 192.46
episode: 5650, reward: 200.0, avg: 192.46
episode: 5651, reward: 200.0, avg: 192.46
episode: 5652, reward: 200.0, avg: 192.46
episode: 5653, reward: 200.0, avg: 192.46
episode: 5654, reward: 200.0, avg: 192.46
episode: 5655, reward: 200.0, avg: 192.46
episode: 5656, reward: 200.0, avg: 192.51
episode: 5657, reward: 200.0, avg: 192.65
episode: 5658, reward: 200.0, avg: 193.18
episode: 5659, reward: 200.0, avg:

episode: 5838, reward: 200.0, avg: 200.0
episode: 5839, reward: 200.0, avg: 200.0
episode: 5840, reward: 200.0, avg: 200.0
episode: 5841, reward: 200.0, avg: 200.0
episode: 5842, reward: 200.0, avg: 200.0
episode: 5843, reward: 200.0, avg: 200.0
episode: 5844, reward: 200.0, avg: 200.0
episode: 5845, reward: 200.0, avg: 200.0
episode: 5846, reward: 200.0, avg: 200.0
episode: 5847, reward: 200.0, avg: 200.0
episode: 5848, reward: 200.0, avg: 200.0
episode: 5849, reward: 200.0, avg: 200.0
episode: 5850, reward: 200.0, avg: 200.0
episode: 5851, reward: 200.0, avg: 200.0
episode: 5852, reward: 200.0, avg: 200.0
episode: 5853, reward: 200.0, avg: 200.0
episode: 5854, reward: 200.0, avg: 200.0
episode: 5855, reward: 200.0, avg: 200.0
episode: 5856, reward: 200.0, avg: 200.0
episode: 5857, reward: 200.0, avg: 200.0
episode: 5858, reward: 200.0, avg: 200.0
episode: 5859, reward: 200.0, avg: 200.0
episode: 5860, reward: 200.0, avg: 200.0
episode: 5861, reward: 200.0, avg: 200.0
episode: 5862, r

episode: 6044, reward: 200.0, avg: 200.0
episode: 6045, reward: 200.0, avg: 200.0
episode: 6046, reward: 200.0, avg: 200.0
episode: 6047, reward: 200.0, avg: 200.0
episode: 6048, reward: 200.0, avg: 200.0
episode: 6049, reward: 200.0, avg: 200.0
episode: 6050, reward: 200.0, avg: 200.0
episode: 6051, reward: 200.0, avg: 200.0
episode: 6052, reward: 200.0, avg: 200.0
episode: 6053, reward: 200.0, avg: 200.0
episode: 6054, reward: 200.0, avg: 200.0
episode: 6055, reward: 200.0, avg: 200.0
episode: 6056, reward: 200.0, avg: 200.0
episode: 6057, reward: 200.0, avg: 200.0
episode: 6058, reward: 200.0, avg: 200.0
episode: 6059, reward: 200.0, avg: 200.0
episode: 6060, reward: 200.0, avg: 200.0
episode: 6061, reward: 200.0, avg: 200.0
episode: 6062, reward: 200.0, avg: 200.0
episode: 6063, reward: 200.0, avg: 200.0
episode: 6064, reward: 200.0, avg: 200.0
episode: 6065, reward: 200.0, avg: 200.0
episode: 6066, reward: 200.0, avg: 200.0
episode: 6067, reward: 200.0, avg: 200.0
episode: 6068, r

episode: 6248, reward: 200.0, avg: 200.0
episode: 6249, reward: 200.0, avg: 200.0
episode: 6250, reward: 200.0, avg: 200.0
episode: 6251, reward: 200.0, avg: 200.0
episode: 6252, reward: 200.0, avg: 200.0
episode: 6253, reward: 200.0, avg: 200.0
episode: 6254, reward: 200.0, avg: 200.0
episode: 6255, reward: 200.0, avg: 200.0
episode: 6256, reward: 200.0, avg: 200.0
episode: 6257, reward: 200.0, avg: 200.0
episode: 6258, reward: 200.0, avg: 200.0
episode: 6259, reward: 200.0, avg: 200.0
episode: 6260, reward: 200.0, avg: 200.0
episode: 6261, reward: 200.0, avg: 200.0
episode: 6262, reward: 200.0, avg: 200.0
episode: 6263, reward: 200.0, avg: 200.0
episode: 6264, reward: 200.0, avg: 200.0
episode: 6265, reward: 200.0, avg: 200.0
episode: 6266, reward: 200.0, avg: 200.0
episode: 6267, reward: 200.0, avg: 200.0
episode: 6268, reward: 200.0, avg: 200.0
episode: 6269, reward: 200.0, avg: 200.0
episode: 6270, reward: 200.0, avg: 200.0
episode: 6271, reward: 200.0, avg: 200.0
episode: 6272, r

episode: 6448, reward: 200.0, avg: 200.0
episode: 6449, reward: 200.0, avg: 200.0
episode: 6450, reward: 200.0, avg: 200.0
episode: 6451, reward: 200.0, avg: 200.0
episode: 6452, reward: 200.0, avg: 200.0
episode: 6453, reward: 200.0, avg: 200.0
episode: 6454, reward: 200.0, avg: 200.0
episode: 6455, reward: 200.0, avg: 200.0
episode: 6456, reward: 200.0, avg: 200.0
episode: 6457, reward: 200.0, avg: 200.0
episode: 6458, reward: 200.0, avg: 200.0
episode: 6459, reward: 200.0, avg: 200.0
episode: 6460, reward: 200.0, avg: 200.0
episode: 6461, reward: 200.0, avg: 200.0
episode: 6462, reward: 200.0, avg: 200.0
episode: 6463, reward: 200.0, avg: 200.0
episode: 6464, reward: 200.0, avg: 200.0
episode: 6465, reward: 200.0, avg: 200.0
episode: 6466, reward: 200.0, avg: 200.0
episode: 6467, reward: 200.0, avg: 200.0
episode: 6468, reward: 200.0, avg: 200.0
episode: 6469, reward: 200.0, avg: 200.0
episode: 6470, reward: 200.0, avg: 200.0
episode: 6471, reward: 200.0, avg: 200.0
episode: 6472, r

episode: 6651, reward: 200.0, avg: 199.98
episode: 6652, reward: 200.0, avg: 199.98
episode: 6653, reward: 200.0, avg: 199.98
episode: 6654, reward: 200.0, avg: 199.98
episode: 6655, reward: 200.0, avg: 199.98
episode: 6656, reward: 200.0, avg: 199.98
episode: 6657, reward: 200.0, avg: 199.98
episode: 6658, reward: 200.0, avg: 199.98
episode: 6659, reward: 200.0, avg: 199.98
episode: 6660, reward: 200.0, avg: 199.98
episode: 6661, reward: 200.0, avg: 199.98
episode: 6662, reward: 200.0, avg: 199.98
episode: 6663, reward: 200.0, avg: 199.98
episode: 6664, reward: 200.0, avg: 199.98
episode: 6665, reward: 200.0, avg: 199.98
episode: 6666, reward: 200.0, avg: 199.98
episode: 6667, reward: 200.0, avg: 199.98
episode: 6668, reward: 200.0, avg: 199.98
episode: 6669, reward: 200.0, avg: 199.98
episode: 6670, reward: 200.0, avg: 199.98
episode: 6671, reward: 200.0, avg: 199.98
episode: 6672, reward: 200.0, avg: 199.98
episode: 6673, reward: 200.0, avg: 199.98
episode: 6674, reward: 200.0, avg:

episode: 6850, reward: 200.0, avg: 199.76
episode: 6851, reward: 200.0, avg: 199.77
episode: 6852, reward: 200.0, avg: 199.77
episode: 6853, reward: 200.0, avg: 199.77
episode: 6854, reward: 200.0, avg: 199.77
episode: 6855, reward: 200.0, avg: 199.77
episode: 6856, reward: 200.0, avg: 199.9
episode: 6857, reward: 200.0, avg: 199.9
episode: 6858, reward: 200.0, avg: 199.9
episode: 6859, reward: 200.0, avg: 199.9
episode: 6860, reward: 200.0, avg: 199.9
episode: 6861, reward: 200.0, avg: 199.9
episode: 6862, reward: 200.0, avg: 199.9
episode: 6863, reward: 200.0, avg: 199.9
episode: 6864, reward: 200.0, avg: 199.9
episode: 6865, reward: 200.0, avg: 199.9
episode: 6866, reward: 200.0, avg: 199.9
episode: 6867, reward: 200.0, avg: 199.9
episode: 6868, reward: 200.0, avg: 199.9
episode: 6869, reward: 200.0, avg: 199.9
episode: 6870, reward: 200.0, avg: 199.9
episode: 6871, reward: 200.0, avg: 199.9
episode: 6872, reward: 200.0, avg: 199.9
episode: 6873, reward: 200.0, avg: 199.9
episode: 6

episode: 7052, reward: 200.0, avg: 200.0
episode: 7053, reward: 200.0, avg: 200.0
episode: 7054, reward: 200.0, avg: 200.0
episode: 7055, reward: 200.0, avg: 200.0
episode: 7056, reward: 200.0, avg: 200.0
episode: 7057, reward: 200.0, avg: 200.0
episode: 7058, reward: 200.0, avg: 200.0
episode: 7059, reward: 200.0, avg: 200.0
episode: 7060, reward: 200.0, avg: 200.0
episode: 7061, reward: 200.0, avg: 200.0
episode: 7062, reward: 200.0, avg: 200.0
episode: 7063, reward: 200.0, avg: 200.0
episode: 7064, reward: 200.0, avg: 200.0
episode: 7065, reward: 200.0, avg: 200.0
episode: 7066, reward: 200.0, avg: 200.0
episode: 7067, reward: 200.0, avg: 200.0
episode: 7068, reward: 200.0, avg: 200.0
episode: 7069, reward: 200.0, avg: 200.0
episode: 7070, reward: 200.0, avg: 200.0
episode: 7071, reward: 200.0, avg: 200.0
episode: 7072, reward: 200.0, avg: 200.0
episode: 7073, reward: 200.0, avg: 200.0
episode: 7074, reward: 200.0, avg: 200.0
episode: 7075, reward: 200.0, avg: 200.0
episode: 7076, r

episode: 7255, reward: 200.0, avg: 200.0
episode: 7256, reward: 200.0, avg: 200.0
episode: 7257, reward: 200.0, avg: 200.0
episode: 7258, reward: 200.0, avg: 200.0
episode: 7259, reward: 200.0, avg: 200.0
episode: 7260, reward: 200.0, avg: 200.0
episode: 7261, reward: 200.0, avg: 200.0
episode: 7262, reward: 200.0, avg: 200.0
episode: 7263, reward: 200.0, avg: 200.0
episode: 7264, reward: 200.0, avg: 200.0
episode: 7265, reward: 200.0, avg: 200.0
episode: 7266, reward: 200.0, avg: 200.0
episode: 7267, reward: 200.0, avg: 200.0
episode: 7268, reward: 200.0, avg: 200.0
episode: 7269, reward: 200.0, avg: 200.0
episode: 7270, reward: 200.0, avg: 200.0
episode: 7271, reward: 200.0, avg: 200.0
episode: 7272, reward: 200.0, avg: 200.0
episode: 7273, reward: 200.0, avg: 200.0
episode: 7274, reward: 200.0, avg: 200.0
episode: 7275, reward: 200.0, avg: 200.0
episode: 7276, reward: 200.0, avg: 200.0
episode: 7277, reward: 200.0, avg: 200.0
episode: 7278, reward: 200.0, avg: 200.0
episode: 7279, r

episode: 7458, reward: 200.0, avg: 200.0
episode: 7459, reward: 200.0, avg: 200.0
episode: 7460, reward: 200.0, avg: 200.0
episode: 7461, reward: 200.0, avg: 200.0
episode: 7462, reward: 200.0, avg: 200.0
episode: 7463, reward: 200.0, avg: 200.0
episode: 7464, reward: 200.0, avg: 200.0
episode: 7465, reward: 200.0, avg: 200.0
episode: 7466, reward: 200.0, avg: 200.0
episode: 7467, reward: 200.0, avg: 200.0
episode: 7468, reward: 200.0, avg: 200.0
episode: 7469, reward: 200.0, avg: 200.0
episode: 7470, reward: 200.0, avg: 200.0
episode: 7471, reward: 200.0, avg: 200.0
episode: 7472, reward: 200.0, avg: 200.0
episode: 7473, reward: 200.0, avg: 200.0
episode: 7474, reward: 200.0, avg: 200.0
episode: 7475, reward: 200.0, avg: 200.0
episode: 7476, reward: 200.0, avg: 200.0
episode: 7477, reward: 200.0, avg: 200.0
episode: 7478, reward: 200.0, avg: 200.0
episode: 7479, reward: 200.0, avg: 200.0
episode: 7480, reward: 200.0, avg: 200.0
episode: 7481, reward: 200.0, avg: 200.0
episode: 7482, r

episode: 7662, reward: 200.0, avg: 200.0
episode: 7663, reward: 200.0, avg: 200.0
episode: 7664, reward: 200.0, avg: 200.0
episode: 7665, reward: 200.0, avg: 200.0
episode: 7666, reward: 200.0, avg: 200.0
episode: 7667, reward: 200.0, avg: 200.0
episode: 7668, reward: 200.0, avg: 200.0
episode: 7669, reward: 200.0, avg: 200.0
episode: 7670, reward: 200.0, avg: 200.0
episode: 7671, reward: 200.0, avg: 200.0
episode: 7672, reward: 200.0, avg: 200.0
episode: 7673, reward: 200.0, avg: 200.0
episode: 7674, reward: 200.0, avg: 200.0
episode: 7675, reward: 200.0, avg: 200.0
episode: 7676, reward: 200.0, avg: 200.0
episode: 7677, reward: 200.0, avg: 200.0
episode: 7678, reward: 200.0, avg: 200.0
episode: 7679, reward: 200.0, avg: 200.0
episode: 7680, reward: 200.0, avg: 200.0
episode: 7681, reward: 200.0, avg: 200.0
episode: 7682, reward: 200.0, avg: 200.0
episode: 7683, reward: 200.0, avg: 200.0
episode: 7684, reward: 200.0, avg: 200.0
episode: 7685, reward: 200.0, avg: 200.0
episode: 7686, r

episode: 7862, reward: 200.0, avg: 200.0
episode: 7863, reward: 200.0, avg: 200.0
episode: 7864, reward: 200.0, avg: 200.0
episode: 7865, reward: 200.0, avg: 200.0
episode: 7866, reward: 200.0, avg: 200.0
episode: 7867, reward: 200.0, avg: 200.0
episode: 7868, reward: 200.0, avg: 200.0
episode: 7869, reward: 200.0, avg: 200.0
episode: 7870, reward: 200.0, avg: 200.0
episode: 7871, reward: 200.0, avg: 200.0
episode: 7872, reward: 200.0, avg: 200.0
episode: 7873, reward: 200.0, avg: 200.0
episode: 7874, reward: 200.0, avg: 200.0
episode: 7875, reward: 200.0, avg: 200.0
episode: 7876, reward: 200.0, avg: 200.0
episode: 7877, reward: 200.0, avg: 200.0
episode: 7878, reward: 200.0, avg: 200.0
episode: 7879, reward: 200.0, avg: 200.0
episode: 7880, reward: 200.0, avg: 200.0
episode: 7881, reward: 200.0, avg: 200.0
episode: 7882, reward: 200.0, avg: 200.0
episode: 7883, reward: 200.0, avg: 200.0
episode: 7884, reward: 200.0, avg: 200.0
episode: 7885, reward: 200.0, avg: 200.0
episode: 7886, r

episode: 8062, reward: 200.0, avg: 200.0
episode: 8063, reward: 200.0, avg: 200.0
episode: 8064, reward: 200.0, avg: 200.0
episode: 8065, reward: 200.0, avg: 200.0
episode: 8066, reward: 200.0, avg: 200.0
episode: 8067, reward: 200.0, avg: 200.0
episode: 8068, reward: 200.0, avg: 200.0
episode: 8069, reward: 200.0, avg: 200.0
episode: 8070, reward: 200.0, avg: 200.0
episode: 8071, reward: 200.0, avg: 200.0
episode: 8072, reward: 200.0, avg: 200.0
episode: 8073, reward: 200.0, avg: 200.0
episode: 8074, reward: 200.0, avg: 200.0
episode: 8075, reward: 200.0, avg: 200.0
episode: 8076, reward: 200.0, avg: 200.0
episode: 8077, reward: 200.0, avg: 200.0
episode: 8078, reward: 200.0, avg: 200.0
episode: 8079, reward: 200.0, avg: 200.0
episode: 8080, reward: 200.0, avg: 200.0
episode: 8081, reward: 200.0, avg: 200.0
episode: 8082, reward: 200.0, avg: 200.0
episode: 8083, reward: 200.0, avg: 200.0
episode: 8084, reward: 200.0, avg: 200.0
episode: 8085, reward: 200.0, avg: 200.0
episode: 8086, r

episode: 8264, reward: 200.0, avg: 200.0
episode: 8265, reward: 200.0, avg: 200.0
episode: 8266, reward: 200.0, avg: 200.0
episode: 8267, reward: 200.0, avg: 200.0
episode: 8268, reward: 200.0, avg: 200.0
episode: 8269, reward: 200.0, avg: 200.0
episode: 8270, reward: 200.0, avg: 200.0
episode: 8271, reward: 200.0, avg: 200.0
episode: 8272, reward: 200.0, avg: 200.0
episode: 8273, reward: 200.0, avg: 200.0
episode: 8274, reward: 200.0, avg: 200.0
episode: 8275, reward: 200.0, avg: 200.0
episode: 8276, reward: 200.0, avg: 200.0
episode: 8277, reward: 200.0, avg: 200.0
episode: 8278, reward: 200.0, avg: 200.0
episode: 8279, reward: 200.0, avg: 200.0
episode: 8280, reward: 200.0, avg: 200.0
episode: 8281, reward: 200.0, avg: 200.0
episode: 8282, reward: 200.0, avg: 200.0
episode: 8283, reward: 200.0, avg: 200.0
episode: 8284, reward: 200.0, avg: 200.0
episode: 8285, reward: 200.0, avg: 200.0
episode: 8286, reward: 200.0, avg: 200.0
episode: 8287, reward: 200.0, avg: 200.0
episode: 8288, r

episode: 8464, reward: 200.0, avg: 200.0
episode: 8465, reward: 200.0, avg: 200.0
episode: 8466, reward: 200.0, avg: 200.0
episode: 8467, reward: 200.0, avg: 200.0
episode: 8468, reward: 200.0, avg: 200.0
episode: 8469, reward: 200.0, avg: 200.0
episode: 8470, reward: 200.0, avg: 200.0
episode: 8471, reward: 200.0, avg: 200.0
episode: 8472, reward: 200.0, avg: 200.0
episode: 8473, reward: 200.0, avg: 200.0
episode: 8474, reward: 200.0, avg: 200.0
episode: 8475, reward: 200.0, avg: 200.0
episode: 8476, reward: 200.0, avg: 200.0
episode: 8477, reward: 200.0, avg: 200.0
episode: 8478, reward: 200.0, avg: 200.0
episode: 8479, reward: 200.0, avg: 200.0
episode: 8480, reward: 200.0, avg: 200.0
episode: 8481, reward: 200.0, avg: 200.0
episode: 8482, reward: 200.0, avg: 200.0
episode: 8483, reward: 200.0, avg: 200.0
episode: 8484, reward: 200.0, avg: 200.0
episode: 8485, reward: 200.0, avg: 200.0
episode: 8486, reward: 200.0, avg: 200.0
episode: 8487, reward: 200.0, avg: 200.0
episode: 8488, r

episode: 8669, reward: 200.0, avg: 200.0
episode: 8670, reward: 200.0, avg: 200.0
episode: 8671, reward: 200.0, avg: 200.0
episode: 8672, reward: 200.0, avg: 200.0
episode: 8673, reward: 200.0, avg: 200.0
episode: 8674, reward: 200.0, avg: 200.0
episode: 8675, reward: 200.0, avg: 200.0
episode: 8676, reward: 200.0, avg: 200.0
episode: 8677, reward: 200.0, avg: 200.0
episode: 8678, reward: 200.0, avg: 200.0
episode: 8679, reward: 200.0, avg: 200.0
episode: 8680, reward: 200.0, avg: 200.0
episode: 8681, reward: 200.0, avg: 200.0
episode: 8682, reward: 200.0, avg: 200.0
episode: 8683, reward: 200.0, avg: 200.0
episode: 8684, reward: 200.0, avg: 200.0
episode: 8685, reward: 200.0, avg: 200.0
episode: 8686, reward: 200.0, avg: 200.0
episode: 8687, reward: 200.0, avg: 200.0
episode: 8688, reward: 200.0, avg: 200.0
episode: 8689, reward: 200.0, avg: 200.0
episode: 8690, reward: 200.0, avg: 200.0
episode: 8691, reward: 200.0, avg: 200.0
episode: 8692, reward: 200.0, avg: 200.0
episode: 8693, r

episode: 8871, reward: 200.0, avg: 200.0
episode: 8872, reward: 200.0, avg: 200.0
episode: 8873, reward: 200.0, avg: 200.0
episode: 8874, reward: 200.0, avg: 200.0
episode: 8875, reward: 200.0, avg: 200.0
episode: 8876, reward: 200.0, avg: 200.0
episode: 8877, reward: 200.0, avg: 200.0
episode: 8878, reward: 200.0, avg: 200.0
episode: 8879, reward: 200.0, avg: 200.0
episode: 8880, reward: 200.0, avg: 200.0
episode: 8881, reward: 200.0, avg: 200.0
episode: 8882, reward: 200.0, avg: 200.0
episode: 8883, reward: 200.0, avg: 200.0
episode: 8884, reward: 200.0, avg: 200.0
episode: 8885, reward: 200.0, avg: 200.0
episode: 8886, reward: 200.0, avg: 200.0
episode: 8887, reward: 200.0, avg: 200.0
episode: 8888, reward: 200.0, avg: 200.0
episode: 8889, reward: 200.0, avg: 200.0
episode: 8890, reward: 200.0, avg: 200.0
episode: 8891, reward: 200.0, avg: 200.0
episode: 8892, reward: 200.0, avg: 200.0
episode: 8893, reward: 200.0, avg: 200.0
episode: 8894, reward: 200.0, avg: 200.0
episode: 8895, r

episode: 9076, reward: 200.0, avg: 200.0
episode: 9077, reward: 200.0, avg: 200.0
episode: 9078, reward: 200.0, avg: 200.0
episode: 9079, reward: 200.0, avg: 200.0
episode: 9080, reward: 200.0, avg: 200.0
episode: 9081, reward: 200.0, avg: 200.0
episode: 9082, reward: 200.0, avg: 200.0
episode: 9083, reward: 200.0, avg: 200.0
episode: 9084, reward: 200.0, avg: 200.0
episode: 9085, reward: 200.0, avg: 200.0
episode: 9086, reward: 200.0, avg: 200.0
episode: 9087, reward: 200.0, avg: 200.0
episode: 9088, reward: 200.0, avg: 200.0
episode: 9089, reward: 200.0, avg: 200.0
episode: 9090, reward: 200.0, avg: 200.0
episode: 9091, reward: 200.0, avg: 200.0
episode: 9092, reward: 200.0, avg: 200.0
episode: 9093, reward: 200.0, avg: 200.0
episode: 9094, reward: 200.0, avg: 200.0
episode: 9095, reward: 200.0, avg: 200.0
episode: 9096, reward: 200.0, avg: 200.0
episode: 9097, reward: 200.0, avg: 200.0
episode: 9098, reward: 200.0, avg: 200.0
episode: 9099, reward: 200.0, avg: 200.0
episode: 9100, r

episode: 9279, reward: 200.0, avg: 200.0
episode: 9280, reward: 200.0, avg: 200.0
episode: 9281, reward: 200.0, avg: 200.0
episode: 9282, reward: 200.0, avg: 200.0
episode: 9283, reward: 200.0, avg: 200.0
episode: 9284, reward: 200.0, avg: 200.0
episode: 9285, reward: 200.0, avg: 200.0
episode: 9286, reward: 200.0, avg: 200.0
episode: 9287, reward: 200.0, avg: 200.0
episode: 9288, reward: 200.0, avg: 200.0
episode: 9289, reward: 200.0, avg: 200.0
episode: 9290, reward: 200.0, avg: 200.0
episode: 9291, reward: 200.0, avg: 200.0
episode: 9292, reward: 200.0, avg: 200.0
episode: 9293, reward: 200.0, avg: 200.0
episode: 9294, reward: 200.0, avg: 200.0
episode: 9295, reward: 200.0, avg: 200.0
episode: 9296, reward: 200.0, avg: 200.0
episode: 9297, reward: 200.0, avg: 200.0
episode: 9298, reward: 200.0, avg: 200.0
episode: 9299, reward: 200.0, avg: 200.0
episode: 9300, reward: 200.0, avg: 200.0
episode: 9301, reward: 200.0, avg: 200.0
episode: 9302, reward: 200.0, avg: 200.0
episode: 9303, r

episode: 9479, reward: 200.0, avg: 200.0
episode: 9480, reward: 200.0, avg: 200.0
episode: 9481, reward: 200.0, avg: 200.0
episode: 9482, reward: 200.0, avg: 200.0
episode: 9483, reward: 200.0, avg: 200.0
episode: 9484, reward: 200.0, avg: 200.0
episode: 9485, reward: 200.0, avg: 200.0
episode: 9486, reward: 200.0, avg: 200.0
episode: 9487, reward: 200.0, avg: 200.0
episode: 9488, reward: 200.0, avg: 200.0
episode: 9489, reward: 200.0, avg: 200.0
episode: 9490, reward: 200.0, avg: 200.0
episode: 9491, reward: 200.0, avg: 200.0
episode: 9492, reward: 200.0, avg: 200.0
episode: 9493, reward: 58.0, avg: 198.58
episode: 9494, reward: 42.0, avg: 197.0
episode: 9495, reward: 89.0, avg: 195.89
episode: 9496, reward: 90.0, avg: 194.79
episode: 9497, reward: 134.0, avg: 194.13
episode: 9498, reward: 72.0, avg: 192.85
episode: 9499, reward: 89.0, avg: 191.74
episode: 9500, reward: 70.0, avg: 190.44
episode: 9501, reward: 67.0, avg: 189.11
episode: 9502, reward: 178.0, avg: 188.89
episode: 9503, 

episode: 9682, reward: 200.0, avg: 200.0
episode: 9683, reward: 200.0, avg: 200.0
episode: 9684, reward: 200.0, avg: 200.0
episode: 9685, reward: 200.0, avg: 200.0
episode: 9686, reward: 200.0, avg: 200.0
episode: 9687, reward: 200.0, avg: 200.0
episode: 9688, reward: 200.0, avg: 200.0
episode: 9689, reward: 200.0, avg: 200.0
episode: 9690, reward: 200.0, avg: 200.0
episode: 9691, reward: 200.0, avg: 200.0
episode: 9692, reward: 200.0, avg: 200.0
episode: 9693, reward: 200.0, avg: 200.0
episode: 9694, reward: 200.0, avg: 200.0
episode: 9695, reward: 200.0, avg: 200.0
episode: 9696, reward: 200.0, avg: 200.0
episode: 9697, reward: 200.0, avg: 200.0
episode: 9698, reward: 200.0, avg: 200.0
episode: 9699, reward: 200.0, avg: 200.0
episode: 9700, reward: 200.0, avg: 200.0
episode: 9701, reward: 200.0, avg: 200.0
episode: 9702, reward: 200.0, avg: 200.0
episode: 9703, reward: 200.0, avg: 200.0
episode: 9704, reward: 200.0, avg: 200.0
episode: 9705, reward: 200.0, avg: 200.0
episode: 9706, r

episode: 9886, reward: 200.0, avg: 200.0
episode: 9887, reward: 200.0, avg: 200.0
episode: 9888, reward: 200.0, avg: 200.0
episode: 9889, reward: 200.0, avg: 200.0
episode: 9890, reward: 200.0, avg: 200.0
episode: 9891, reward: 200.0, avg: 200.0
episode: 9892, reward: 200.0, avg: 200.0
episode: 9893, reward: 200.0, avg: 200.0
episode: 9894, reward: 200.0, avg: 200.0
episode: 9895, reward: 200.0, avg: 200.0
episode: 9896, reward: 200.0, avg: 200.0
episode: 9897, reward: 200.0, avg: 200.0
episode: 9898, reward: 200.0, avg: 200.0
episode: 9899, reward: 200.0, avg: 200.0
episode: 9900, reward: 200.0, avg: 200.0
episode: 9901, reward: 200.0, avg: 200.0
episode: 9902, reward: 200.0, avg: 200.0
episode: 9903, reward: 200.0, avg: 200.0
episode: 9904, reward: 200.0, avg: 200.0
episode: 9905, reward: 200.0, avg: 200.0
episode: 9906, reward: 200.0, avg: 200.0
episode: 9907, reward: 200.0, avg: 200.0
episode: 9908, reward: 200.0, avg: 200.0
episode: 9909, reward: 200.0, avg: 200.0
episode: 9910, r