In [5]:
import random
import gym
import numpy as np
from IPython.display import clear_output
from time import sleep
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

alpha = 0.001
gamma = 0.99

    
class PolicyNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fcA1 = torch.nn.Linear(4, 10)
        self.fcA2 = torch.nn.Linear(10, 2)
        
    def forward(self, x):
        x = self.fcA1(x)
        x = torch.nn.functional.relu(x)
        x = self.fcA2(x)  
        x = torch.nn.functional.softmax(x, dim=-1)
        return x
    
# network and optimizer
pi = PolicyNetwork().to(device)
pi_optimizer = torch.optim.Adam(pi.parameters(), lr=alpha)



def gen_episode():
    states = []
    actions = []
    rewards = []
    state = env.reset() 
    done = False
    score = 0
    while not done:
        probs = pi(torch.FloatTensor(state).to(device))
        action = torch.multinomial(probs, 1).item()
        next_state, reward, done, info = env.step(action)  # take a random action
        if done:
            reward = -10
            env.close()
        score = score + reward
        
        states.append(state)
        actions.append(action)
        rewards.append(reward)    
        state = next_state
    return states, actions, rewards, score

def G(t):
    G = 0
    for tau in range(t, len(rewards)):
        G = G + gamma**(tau - t) * rewards[tau]
    return G
        
env = gym.make('CartPole-v1')

episode = 0
MAX_EPISODES = 5000
while episode < MAX_EPISODES:  # episode loop
    states, actions, rewards, score = gen_episode()
    loss = 0
    for t in range(0, len(rewards)-1):
        s = states[t+1]
        a = actions[t+1]
        s=torch.FloatTensor(s).to(device)
        a=torch.tensor(a, dtype=torch.int8).to(device)
        loss = loss - G(t) * (gamma**t)* pi(s)[a].log()
   
    pi_optimizer.zero_grad()
    loss.backward()
    pi_optimizer.step()   
    clear_output(wait=True)
    print('episode: {}, reward: {:.1f}'.format(episode, score))
    episode = episode + 1
    



episode: 0, reward: 11.0
episode: 1, reward: 3.0
episode: 2, reward: 4.0
episode: 3, reward: 10.0
episode: 4, reward: 10.0
episode: 5, reward: 1.0
episode: 6, reward: 32.0
episode: 7, reward: 70.0
episode: 8, reward: 5.0
episode: 9, reward: 23.0
episode: 10, reward: 3.0
episode: 11, reward: 9.0
episode: 12, reward: 2.0
episode: 13, reward: 7.0
episode: 14, reward: 7.0
episode: 15, reward: 6.0
episode: 16, reward: 22.0
episode: 17, reward: 14.0
episode: 18, reward: 21.0
episode: 19, reward: 8.0
episode: 20, reward: 21.0
episode: 21, reward: 4.0
episode: 22, reward: 6.0
episode: 23, reward: 32.0
episode: 24, reward: 40.0
episode: 25, reward: 3.0
episode: 26, reward: 0.0
episode: 27, reward: 3.0
episode: 28, reward: 7.0
episode: 29, reward: 3.0
episode: 30, reward: 3.0
episode: 31, reward: 1.0
episode: 32, reward: -1.0
episode: 33, reward: 14.0
episode: 34, reward: 8.0
episode: 35, reward: 37.0
episode: 36, reward: 3.0
episode: 37, reward: 24.0
episode: 38, reward: 41.0
episode: 39, rewar

episode: 316, reward: 51.0
episode: 317, reward: 2.0
episode: 318, reward: 8.0
episode: 319, reward: 33.0
episode: 320, reward: 9.0
episode: 321, reward: 16.0
episode: 322, reward: 4.0
episode: 323, reward: 9.0
episode: 324, reward: 42.0
episode: 325, reward: 16.0
episode: 326, reward: 16.0
episode: 327, reward: 48.0
episode: 328, reward: 26.0
episode: 329, reward: 7.0
episode: 330, reward: 47.0
episode: 331, reward: 17.0
episode: 332, reward: 41.0
episode: 333, reward: 60.0
episode: 334, reward: 3.0
episode: 335, reward: 10.0
episode: 336, reward: 9.0
episode: 337, reward: 19.0
episode: 338, reward: 16.0
episode: 339, reward: 35.0
episode: 340, reward: 111.0
episode: 341, reward: 39.0
episode: 342, reward: 3.0
episode: 343, reward: 65.0
episode: 344, reward: 43.0
episode: 345, reward: 27.0
episode: 346, reward: 4.0
episode: 347, reward: 51.0
episode: 348, reward: 24.0
episode: 349, reward: 6.0
episode: 350, reward: 11.0
episode: 351, reward: 40.0
episode: 352, reward: 14.0
episode: 35

episode: 624, reward: 43.0
episode: 625, reward: 44.0
episode: 626, reward: 13.0
episode: 627, reward: 62.0
episode: 628, reward: 12.0
episode: 629, reward: 31.0
episode: 630, reward: 36.0
episode: 631, reward: 12.0
episode: 632, reward: 41.0
episode: 633, reward: 44.0
episode: 634, reward: 21.0
episode: 635, reward: 118.0
episode: 636, reward: 20.0
episode: 637, reward: 10.0
episode: 638, reward: 22.0
episode: 639, reward: 43.0
episode: 640, reward: 44.0
episode: 641, reward: 11.0
episode: 642, reward: 42.0
episode: 643, reward: 36.0
episode: 644, reward: 21.0
episode: 645, reward: 8.0
episode: 646, reward: 37.0
episode: 647, reward: 29.0
episode: 648, reward: 13.0
episode: 649, reward: 40.0
episode: 650, reward: 21.0
episode: 651, reward: 14.0
episode: 652, reward: 19.0
episode: 653, reward: 96.0
episode: 654, reward: 99.0
episode: 655, reward: 31.0
episode: 656, reward: 26.0
episode: 657, reward: 21.0
episode: 658, reward: 22.0
episode: 659, reward: 48.0
episode: 660, reward: 23.0
e

episode: 929, reward: 25.0
episode: 930, reward: 17.0
episode: 931, reward: 29.0
episode: 932, reward: 18.0
episode: 933, reward: 39.0
episode: 934, reward: 47.0
episode: 935, reward: 62.0
episode: 936, reward: 63.0
episode: 937, reward: 21.0
episode: 938, reward: 19.0
episode: 939, reward: 22.0
episode: 940, reward: 41.0
episode: 941, reward: 36.0
episode: 942, reward: 53.0
episode: 943, reward: 37.0
episode: 944, reward: 38.0
episode: 945, reward: 16.0
episode: 946, reward: 33.0
episode: 947, reward: 35.0
episode: 948, reward: 54.0
episode: 949, reward: 64.0
episode: 950, reward: 20.0
episode: 951, reward: 36.0
episode: 952, reward: 57.0
episode: 953, reward: 49.0
episode: 954, reward: 18.0
episode: 955, reward: 14.0
episode: 956, reward: -1.0
episode: 957, reward: 11.0
episode: 958, reward: 13.0
episode: 959, reward: 21.0
episode: 960, reward: 18.0
episode: 961, reward: 32.0
episode: 962, reward: 43.0
episode: 963, reward: 28.0
episode: 964, reward: 119.0
episode: 965, reward: 33.0


episode: 1224, reward: 79.0
episode: 1225, reward: 66.0
episode: 1226, reward: 66.0
episode: 1227, reward: 35.0
episode: 1228, reward: 51.0
episode: 1229, reward: 130.0
episode: 1230, reward: 35.0
episode: 1231, reward: 51.0
episode: 1232, reward: 70.0
episode: 1233, reward: 126.0
episode: 1234, reward: 62.0
episode: 1235, reward: 54.0
episode: 1236, reward: 50.0
episode: 1237, reward: 58.0
episode: 1238, reward: 40.0
episode: 1239, reward: 54.0
episode: 1240, reward: 78.0
episode: 1241, reward: 9.0
episode: 1242, reward: 60.0
episode: 1243, reward: 89.0
episode: 1244, reward: 56.0
episode: 1245, reward: 33.0
episode: 1246, reward: 105.0
episode: 1247, reward: 44.0
episode: 1248, reward: 107.0
episode: 1249, reward: 95.0
episode: 1250, reward: 17.0
episode: 1251, reward: 77.0
episode: 1252, reward: 36.0
episode: 1253, reward: 80.0
episode: 1254, reward: 65.0
episode: 1255, reward: 3.0
episode: 1256, reward: -1.0
episode: 1257, reward: 37.0
episode: 1258, reward: 87.0
episode: 1259, rew

episode: 1514, reward: 193.0
episode: 1515, reward: 134.0
episode: 1516, reward: 29.0
episode: 1517, reward: 156.0
episode: 1518, reward: 144.0
episode: 1519, reward: 49.0
episode: 1520, reward: 161.0
episode: 1521, reward: 96.0
episode: 1522, reward: 114.0
episode: 1523, reward: 225.0
episode: 1524, reward: 185.0
episode: 1525, reward: 180.0
episode: 1526, reward: 107.0
episode: 1527, reward: 113.0
episode: 1528, reward: 149.0
episode: 1529, reward: 145.0
episode: 1530, reward: 158.0
episode: 1531, reward: 43.0
episode: 1532, reward: 204.0
episode: 1533, reward: 145.0
episode: 1534, reward: 77.0
episode: 1535, reward: 89.0
episode: 1536, reward: 103.0
episode: 1537, reward: 151.0
episode: 1538, reward: 29.0
episode: 1539, reward: 42.0
episode: 1540, reward: 74.0
episode: 1541, reward: 110.0
episode: 1542, reward: 180.0
episode: 1543, reward: 142.0
episode: 1544, reward: 61.0
episode: 1545, reward: 145.0
episode: 1546, reward: 109.0
episode: 1547, reward: 148.0
episode: 1548, reward: 4

episode: 1800, reward: 279.0
episode: 1801, reward: 226.0
episode: 1802, reward: 152.0
episode: 1803, reward: 104.0
episode: 1804, reward: 333.0
episode: 1805, reward: 421.0
episode: 1806, reward: 281.0
episode: 1807, reward: 308.0
episode: 1808, reward: 251.0
episode: 1809, reward: 395.0
episode: 1810, reward: 278.0
episode: 1811, reward: 138.0
episode: 1812, reward: 159.0
episode: 1813, reward: 76.0
episode: 1814, reward: 489.0
episode: 1815, reward: 150.0
episode: 1816, reward: 202.0
episode: 1817, reward: 489.0
episode: 1818, reward: 81.0
episode: 1819, reward: 139.0
episode: 1820, reward: 117.0
episode: 1821, reward: 124.0
episode: 1822, reward: 111.0
episode: 1823, reward: 489.0
episode: 1824, reward: 357.0
episode: 1825, reward: 336.0
episode: 1826, reward: 248.0
episode: 1827, reward: 317.0
episode: 1828, reward: 131.0
episode: 1829, reward: 300.0
episode: 1830, reward: 489.0
episode: 1831, reward: 258.0
episode: 1832, reward: 90.0
episode: 1833, reward: 412.0
episode: 1834, re

episode: 2084, reward: 467.0
episode: 2085, reward: 292.0
episode: 2086, reward: 318.0
episode: 2087, reward: 176.0
episode: 2088, reward: 162.0
episode: 2089, reward: 131.0
episode: 2090, reward: 134.0
episode: 2091, reward: 489.0
episode: 2092, reward: 61.0
episode: 2093, reward: 226.0
episode: 2094, reward: 247.0
episode: 2095, reward: 248.0
episode: 2096, reward: 124.0
episode: 2097, reward: 313.0
episode: 2098, reward: 154.0
episode: 2099, reward: 434.0
episode: 2100, reward: 323.0
episode: 2101, reward: 209.0
episode: 2102, reward: 183.0
episode: 2103, reward: 176.0
episode: 2104, reward: 217.0
episode: 2105, reward: 489.0
episode: 2106, reward: 258.0
episode: 2107, reward: 224.0
episode: 2108, reward: 260.0
episode: 2109, reward: 353.0
episode: 2110, reward: 227.0
episode: 2111, reward: 171.0
episode: 2112, reward: 308.0
episode: 2113, reward: 195.0
episode: 2114, reward: 232.0
episode: 2115, reward: 227.0
episode: 2116, reward: 166.0
episode: 2117, reward: 304.0
episode: 2118, 

episode: 2368, reward: 489.0
episode: 2369, reward: 454.0
episode: 2370, reward: 489.0
episode: 2371, reward: 489.0
episode: 2372, reward: 489.0
episode: 2373, reward: 489.0
episode: 2374, reward: 489.0
episode: 2375, reward: 211.0
episode: 2376, reward: 6.0
episode: 2377, reward: 489.0
episode: 2378, reward: 360.0
episode: 2379, reward: 426.0
episode: 2380, reward: 376.0
episode: 2381, reward: 489.0
episode: 2382, reward: 489.0
episode: 2383, reward: 145.0
episode: 2384, reward: 330.0
episode: 2385, reward: 298.0
episode: 2386, reward: 474.0
episode: 2387, reward: 489.0
episode: 2388, reward: 28.0
episode: 2389, reward: 232.0
episode: 2390, reward: 381.0
episode: 2391, reward: 489.0
episode: 2392, reward: 19.0
episode: 2393, reward: 489.0
episode: 2394, reward: 489.0
episode: 2395, reward: 319.0
episode: 2396, reward: 489.0
episode: 2397, reward: 82.0
episode: 2398, reward: 196.0
episode: 2399, reward: 489.0
episode: 2400, reward: 379.0
episode: 2401, reward: 331.0
episode: 2402, rewa

episode: 2651, reward: 489.0
episode: 2652, reward: 489.0
episode: 2653, reward: 489.0
episode: 2654, reward: 489.0
episode: 2655, reward: 489.0
episode: 2656, reward: 385.0
episode: 2657, reward: 489.0
episode: 2658, reward: 489.0
episode: 2659, reward: 387.0
episode: 2660, reward: 391.0
episode: 2661, reward: 230.0
episode: 2662, reward: 489.0
episode: 2663, reward: 489.0
episode: 2664, reward: 489.0
episode: 2665, reward: 489.0
episode: 2666, reward: 489.0
episode: 2667, reward: 489.0
episode: 2668, reward: 489.0
episode: 2669, reward: 489.0
episode: 2670, reward: 142.0
episode: 2671, reward: 489.0
episode: 2672, reward: 175.0
episode: 2673, reward: 405.0
episode: 2674, reward: 330.0
episode: 2675, reward: 489.0
episode: 2676, reward: 489.0
episode: 2677, reward: 464.0
episode: 2678, reward: 489.0
episode: 2679, reward: 419.0
episode: 2680, reward: 89.0
episode: 2681, reward: 489.0
episode: 2682, reward: 489.0
episode: 2683, reward: 210.0
episode: 2684, reward: 322.0
episode: 2685, 

episode: 2936, reward: 489.0
episode: 2937, reward: 425.0
episode: 2938, reward: 333.0
episode: 2939, reward: 489.0
episode: 2940, reward: 489.0
episode: 2941, reward: 489.0
episode: 2942, reward: 489.0
episode: 2943, reward: 489.0
episode: 2944, reward: 307.0
episode: 2945, reward: 489.0
episode: 2946, reward: 489.0
episode: 2947, reward: 489.0
episode: 2948, reward: 102.0
episode: 2949, reward: 489.0
episode: 2950, reward: 233.0
episode: 2951, reward: 489.0
episode: 2952, reward: 489.0
episode: 2953, reward: 489.0
episode: 2954, reward: 489.0
episode: 2955, reward: 403.0
episode: 2956, reward: 489.0
episode: 2957, reward: 256.0
episode: 2958, reward: 450.0
episode: 2959, reward: 211.0
episode: 2960, reward: 400.0
episode: 2961, reward: 489.0
episode: 2962, reward: 434.0
episode: 2963, reward: 489.0
episode: 2964, reward: 489.0
episode: 2965, reward: 489.0
episode: 2966, reward: 259.0
episode: 2967, reward: 489.0
episode: 2968, reward: 489.0
episode: 2969, reward: 335.0
episode: 2970,

episode: 3219, reward: 225.0
episode: 3220, reward: 489.0
episode: 3221, reward: 265.0
episode: 3222, reward: 454.0
episode: 3223, reward: 489.0
episode: 3224, reward: 489.0
episode: 3225, reward: 489.0
episode: 3226, reward: 124.0
episode: 3227, reward: 489.0
episode: 3228, reward: 102.0
episode: 3229, reward: 322.0
episode: 3230, reward: 336.0
episode: 3231, reward: 489.0
episode: 3232, reward: 455.0
episode: 3233, reward: 404.0
episode: 3234, reward: 489.0
episode: 3235, reward: 183.0
episode: 3236, reward: 393.0
episode: 3237, reward: 396.0
episode: 3238, reward: 489.0
episode: 3239, reward: 489.0
episode: 3240, reward: 489.0
episode: 3241, reward: 489.0
episode: 3242, reward: 489.0
episode: 3243, reward: 254.0
episode: 3244, reward: 168.0
episode: 3245, reward: 73.0
episode: 3246, reward: 297.0
episode: 3247, reward: 489.0
episode: 3248, reward: 489.0
episode: 3249, reward: 141.0
episode: 3250, reward: 489.0
episode: 3251, reward: 489.0
episode: 3252, reward: 489.0
episode: 3253, 

episode: 3502, reward: 489.0
episode: 3503, reward: 489.0
episode: 3504, reward: 295.0
episode: 3505, reward: 296.0
episode: 3506, reward: 489.0
episode: 3507, reward: 376.0
episode: 3508, reward: 489.0
episode: 3509, reward: 489.0
episode: 3510, reward: 489.0
episode: 3511, reward: 489.0
episode: 3512, reward: 489.0
episode: 3513, reward: 489.0
episode: 3514, reward: 489.0
episode: 3515, reward: 489.0
episode: 3516, reward: 437.0
episode: 3517, reward: 489.0
episode: 3518, reward: 489.0
episode: 3519, reward: 489.0
episode: 3520, reward: 489.0
episode: 3521, reward: 489.0
episode: 3522, reward: 489.0
episode: 3523, reward: 489.0
episode: 3524, reward: 489.0
episode: 3525, reward: 489.0
episode: 3526, reward: 489.0
episode: 3527, reward: 489.0
episode: 3528, reward: 489.0
episode: 3529, reward: 489.0
episode: 3530, reward: 266.0
episode: 3531, reward: 334.0
episode: 3532, reward: 489.0
episode: 3533, reward: 489.0
episode: 3534, reward: 52.0
episode: 3535, reward: 182.0
episode: 3536, 

episode: 3785, reward: 489.0
episode: 3786, reward: 489.0
episode: 3787, reward: 489.0
episode: 3788, reward: 489.0
episode: 3789, reward: 489.0
episode: 3790, reward: 489.0
episode: 3791, reward: 489.0
episode: 3792, reward: 489.0
episode: 3793, reward: 489.0
episode: 3794, reward: 489.0
episode: 3795, reward: 489.0
episode: 3796, reward: 489.0
episode: 3797, reward: 489.0
episode: 3798, reward: 489.0
episode: 3799, reward: 489.0
episode: 3800, reward: 489.0
episode: 3801, reward: 489.0
episode: 3802, reward: 489.0
episode: 3803, reward: 489.0
episode: 3804, reward: 404.0
episode: 3805, reward: 489.0
episode: 3806, reward: 241.0
episode: 3807, reward: 489.0
episode: 3808, reward: 489.0
episode: 3809, reward: 489.0
episode: 3810, reward: 466.0
episode: 3811, reward: 489.0
episode: 3812, reward: 489.0
episode: 3813, reward: 489.0
episode: 3814, reward: 489.0
episode: 3815, reward: 489.0
episode: 3816, reward: 489.0
episode: 3817, reward: 489.0
episode: 3818, reward: 489.0
episode: 3819,

episode: 4068, reward: 489.0
episode: 4069, reward: 489.0
episode: 4070, reward: 489.0
episode: 4071, reward: 397.0
episode: 4072, reward: 489.0
episode: 4073, reward: 414.0
episode: 4074, reward: 457.0
episode: 4075, reward: 215.0
episode: 4076, reward: 489.0
episode: 4077, reward: 467.0
episode: 4078, reward: 489.0
episode: 4079, reward: 489.0
episode: 4080, reward: 489.0
episode: 4081, reward: 263.0
episode: 4082, reward: 489.0
episode: 4083, reward: 413.0
episode: 4084, reward: 447.0
episode: 4085, reward: 216.0
episode: 4086, reward: 300.0
episode: 4087, reward: 489.0
episode: 4088, reward: 489.0
episode: 4089, reward: 489.0
episode: 4090, reward: 489.0
episode: 4091, reward: 370.0
episode: 4092, reward: 489.0
episode: 4093, reward: 488.0
episode: 4094, reward: 489.0
episode: 4095, reward: 489.0
episode: 4096, reward: 465.0
episode: 4097, reward: 296.0
episode: 4098, reward: 257.0
episode: 4099, reward: 489.0
episode: 4100, reward: 489.0
episode: 4101, reward: 478.0
episode: 4102,

episode: 4351, reward: 489.0
episode: 4352, reward: 489.0
episode: 4353, reward: 489.0
episode: 4354, reward: 489.0
episode: 4355, reward: 489.0
episode: 4356, reward: 322.0
episode: 4357, reward: 489.0
episode: 4358, reward: 47.0
episode: 4359, reward: 489.0
episode: 4360, reward: 489.0
episode: 4361, reward: 489.0
episode: 4362, reward: 489.0
episode: 4363, reward: 489.0
episode: 4364, reward: 489.0
episode: 4365, reward: 324.0
episode: 4366, reward: 489.0
episode: 4367, reward: 489.0
episode: 4368, reward: 360.0
episode: 4369, reward: 489.0
episode: 4370, reward: 489.0
episode: 4371, reward: 489.0
episode: 4372, reward: 489.0
episode: 4373, reward: 489.0
episode: 4374, reward: 489.0
episode: 4375, reward: 489.0
episode: 4376, reward: 489.0
episode: 4377, reward: 489.0
episode: 4378, reward: 489.0
episode: 4379, reward: 458.0
episode: 4380, reward: 489.0
episode: 4381, reward: 489.0
episode: 4382, reward: 489.0
episode: 4383, reward: 489.0
episode: 4384, reward: 489.0
episode: 4385, 

episode: 4634, reward: 333.0
episode: 4635, reward: 489.0
episode: 4636, reward: 489.0
episode: 4637, reward: 489.0
episode: 4638, reward: 489.0
episode: 4639, reward: 489.0
episode: 4640, reward: 489.0
episode: 4641, reward: 489.0
episode: 4642, reward: 489.0
episode: 4643, reward: 489.0
episode: 4644, reward: 406.0
episode: 4645, reward: 489.0
episode: 4646, reward: 489.0
episode: 4647, reward: 489.0
episode: 4648, reward: 489.0
episode: 4649, reward: 489.0
episode: 4650, reward: 489.0
episode: 4651, reward: 489.0
episode: 4652, reward: 456.0
episode: 4653, reward: 489.0
episode: 4654, reward: 489.0
episode: 4655, reward: 489.0
episode: 4656, reward: 489.0
episode: 4657, reward: 489.0
episode: 4658, reward: 489.0
episode: 4659, reward: 489.0
episode: 4660, reward: 489.0
episode: 4661, reward: 489.0
episode: 4662, reward: 489.0
episode: 4663, reward: 489.0
episode: 4664, reward: 489.0
episode: 4665, reward: 489.0
episode: 4666, reward: 489.0
episode: 4667, reward: 489.0
episode: 4668,

episode: 4917, reward: 489.0
episode: 4918, reward: 489.0
episode: 4919, reward: 489.0
episode: 4920, reward: 270.0
episode: 4921, reward: 62.0
episode: 4922, reward: 489.0
episode: 4923, reward: 489.0
episode: 4924, reward: 489.0
episode: 4925, reward: 489.0
episode: 4926, reward: 489.0
episode: 4927, reward: 489.0
episode: 4928, reward: 489.0
episode: 4929, reward: 489.0
episode: 4930, reward: 489.0
episode: 4931, reward: 489.0
episode: 4932, reward: 489.0
episode: 4933, reward: 489.0
episode: 4934, reward: 489.0
episode: 4935, reward: 319.0
episode: 4936, reward: 489.0
episode: 4937, reward: 489.0
episode: 4938, reward: 489.0
episode: 4939, reward: 489.0
episode: 4940, reward: 489.0
episode: 4941, reward: 489.0
episode: 4942, reward: 489.0
episode: 4943, reward: 489.0
episode: 4944, reward: 489.0
episode: 4945, reward: 489.0
episode: 4946, reward: 489.0
episode: 4947, reward: 489.0
episode: 4948, reward: 489.0
episode: 4949, reward: 489.0
episode: 4950, reward: 264.0
episode: 4951, 

In [6]:
# TEST   
episode = 0
state = env.reset()    
while episode < 5:  # episode loop
    env.render()
    probs = pi(torch.FloatTensor(state).to(device))     
    action = torch.multinomial(probs, 1).item()
    next_state, reward, done, info = env.step(action)  # take a random action
    state = next_state
    sleep(0.01)
    if done:
        reward = -10
        env.close()
    score = score + reward
    if done:
        episode = episode + 1
        print('Episode: {} Score: {}'.format(episode, score))
        state = env.reset()
env.close()       


Episode: 1 Score: 500.0
Episode: 2 Score: 585.0
Episode: 3 Score: 1074.0
Episode: 4 Score: 1563.0
Episode: 5 Score: 2036.0
