In [1]:
%pip install moviepy
import gym
import random
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
from moviepy.editor import ImageSequenceClip

# hyper parameters
EPISODES = 2000  # number of episodes
EPS_START = 0.9  # e-greedy threshold start value
EPS_END = 0.05  # e-greedy threshold end value
EPS_DECAY = 200  # e-greedy threshold decay
GAMMA = 0.75  # Q-learning discount factor
LR = 0.001  # NN optimizer learning rate
HIDDEN_LAYER = 164  # NN hidden layer size
BATCH_SIZE = 64  # Q-learning batch size

# if gpu is to be used
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
Tensor = FloatTensor


class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []

    def push(self, transition):
        self.memory.append(transition)
        if len(self.memory) > self.capacity:
            del self.memory[0]

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

class Network(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.l1 = nn.Linear(4, HIDDEN_LAYER)
        self.l2 = nn.Linear(HIDDEN_LAYER, 2)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = self.l2(x)
        return x

env = gym.make('CartPole-v0').unwrapped

model = Network()
if use_cuda:
    model.cuda()
memory = ReplayMemory(10000)
optimizer = optim.Adam(model.parameters(), LR)
steps_done = 0
ed = []

# def plot_durations(d):
#     plt.figure(2)
#     plt.clf()
#     plt.title('Training...')
#     plt.xlabel('Episode')
#     plt.ylabel('Duration')
#     plt.plot(d)
#
#     plt.savefig('test2.png')

def select_action(state, train=True):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if train:
        if sample > eps_threshold:
            return model(Variable(state, volatile=True).type(FloatTensor)).data.max(1)[1].view(1, 1)
        else:
            return LongTensor([[random.randrange(2)]])
    else:
        return model(Variable(state, volatile=True).type(FloatTensor)).data.max(1)[1].view(1, 1)

def run_episode(episode, env):
    state = env.reset()
    steps = 0
    while True:
        # env.render()
        action = select_action(FloatTensor([state]))
        next_state, reward, done, _ = env.step(action[0, 0].item())

        # negative reward when attempt ends
        if done:
            if steps < 30:
                reward -= 10
            else:
                reward = -1
        if steps > 100:
            reward += 1
        if steps > 200:
            reward += 1
        if steps > 300:
            reward += 1

        memory.push((FloatTensor([state]),
                     action,  # action is already a tensor
                     FloatTensor([next_state]),
                     FloatTensor([reward])))

        learn()

        state = next_state
        steps += 1

        if done or steps >= 1000:
            ed.append(steps)
            print("[Episode {:>5}]  steps: {:>5}".format(episode, steps))
            if sum(ed[-10:])/10 > 800:
                return True
            break
    return False

def learn():
    if len(memory) < BATCH_SIZE:
        return

    # random transition batch is taken from experience replay memory
    transitions = memory.sample(BATCH_SIZE)
    batch_state, batch_action, batch_next_state, batch_reward = zip(*transitions)

    batch_state = Variable(torch.cat(batch_state))
    batch_action = Variable(torch.cat(batch_action))
    batch_reward = Variable(torch.cat(batch_reward))
    batch_next_state = Variable(torch.cat(batch_next_state))

    # current Q values are estimated by NN for all actions
    current_q_values = model(batch_state).gather(1, batch_action)
    # expected Q values are estimated from actions which gives maximum Q value
    max_next_q_values = model(batch_next_state).detach().max(1)[0]
    expected_q_values = batch_reward + (GAMMA * max_next_q_values)

    # loss is measured from error between current and newly expected Q values
    loss = F.smooth_l1_loss(current_q_values, expected_q_values)

    # backpropagation of loss to NN
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

def botPlay():
    state = env.reset()
    steps = 0
    frames = []
    while True:
        frame = env.render(mode='rgb_array')
        frames.append(frame)
        action = select_action(FloatTensor([state]))
        next_state, reward, done, _ = env.step(action[0, 0])

        state = next_state
        steps += 1

        if done or steps >= 1000:
            break

    clip = ImageSequenceClip(frames, fps=20)
    clip.write_gif('test2.gif', fps=20)

for e in range(EPISODES):
    complete = run_episode(e, env)

    if complete:
        print('complete...!')
        break


Looking in indexes: https://pypi.douban.com/simple/
You should consider upgrading via the '/home/dell/myProjects/dqn/dqn_pytorch_cartpole/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
[Episode     0]  steps:    14
[Episode     1]  steps:    11
[Episode     2]  steps:    11
[Episode     3]  steps:    19
[Episode     4]  steps:    11
[Episode     5]  steps:    22
[Episode     6]  steps:    17
[Episode     7]  steps:    12
[Episode     8]  steps:    16
[Episode     9]  steps:    11
[Episode    10]  steps:    18
[Episode    11]  steps:    16
[Episode    12]  steps:    14
[Episode    13]  steps:     8
[Episode    14]  steps:    12
[Episode    15]  steps:    12
[Episode    16]  steps:    11
[Episode    17]  steps:     9
[Episode    18]  steps:     9
[Episode    19]  steps:     9
[Episode    20]  steps:    10
[Episode    21]  steps:    11
[Episode    22]  steps:     9
[Episode    23]  steps:     9
[Episode    24]  steps

[Episode   204]  steps:    10
[Episode   205]  steps:    27
[Episode   206]  steps:    14
[Episode   207]  steps:    11
[Episode   208]  steps:    12
[Episode   209]  steps:    15
[Episode   210]  steps:    25
[Episode   211]  steps:     8
[Episode   212]  steps:    14
[Episode   213]  steps:     9
[Episode   214]  steps:    25
[Episode   215]  steps:    10
[Episode   216]  steps:    17
[Episode   217]  steps:    13
[Episode   218]  steps:    15
[Episode   219]  steps:    10
[Episode   220]  steps:    29
[Episode   221]  steps:    14
[Episode   222]  steps:    12
[Episode   223]  steps:    16
[Episode   224]  steps:    17
[Episode   225]  steps:    18
[Episode   226]  steps:    10
[Episode   227]  steps:    31
[Episode   228]  steps:    12
[Episode   229]  steps:    11
[Episode   230]  steps:    17
[Episode   231]  steps:    24
[Episode   232]  steps:    11
[Episode   233]  steps:    13
[Episode   234]  steps:    16
[Episode   235]  steps:     8
[Episode   236]  steps:    13
[Episode  

[Episode   483]  steps:     9
[Episode   484]  steps:    10
[Episode   485]  steps:    10
[Episode   486]  steps:     8
[Episode   487]  steps:    27
[Episode   488]  steps:     9
[Episode   489]  steps:    12
[Episode   490]  steps:    16
[Episode   491]  steps:    23
[Episode   492]  steps:     9
[Episode   493]  steps:    25
[Episode   494]  steps:    12
[Episode   495]  steps:     9
[Episode   496]  steps:    10
[Episode   497]  steps:    14
[Episode   498]  steps:    34
[Episode   499]  steps:    10
[Episode   500]  steps:    12
[Episode   501]  steps:    47
[Episode   502]  steps:     9
[Episode   503]  steps:     9
[Episode   504]  steps:    15
[Episode   505]  steps:    12
[Episode   506]  steps:    10
[Episode   507]  steps:    39
[Episode   508]  steps:     8
[Episode   509]  steps:    13
[Episode   510]  steps:     8
[Episode   511]  steps:    27
[Episode   512]  steps:    16
[Episode   513]  steps:    16
[Episode   514]  steps:    10
[Episode   515]  steps:    28
[Episode  

[Episode   760]  steps:    18
[Episode   761]  steps:    10
[Episode   762]  steps:    18
[Episode   763]  steps:     9
[Episode   764]  steps:    33
[Episode   765]  steps:     8
[Episode   766]  steps:    13
[Episode   767]  steps:     9
[Episode   768]  steps:     9
[Episode   769]  steps:     9
[Episode   770]  steps:     9
[Episode   771]  steps:     9
[Episode   772]  steps:     9
[Episode   773]  steps:    28
[Episode   774]  steps:    11
[Episode   775]  steps:     9
[Episode   776]  steps:    14
[Episode   777]  steps:    20
[Episode   778]  steps:    10
[Episode   779]  steps:    13
[Episode   780]  steps:    26
[Episode   781]  steps:     9
[Episode   782]  steps:     9
[Episode   783]  steps:    10
[Episode   784]  steps:    21
[Episode   785]  steps:    35
[Episode   786]  steps:    38
[Episode   787]  steps:    14
[Episode   788]  steps:    18
[Episode   789]  steps:    13
[Episode   790]  steps:    10
[Episode   791]  steps:    11
[Episode   792]  steps:    16
[Episode  

[Episode  1037]  steps:     8
[Episode  1038]  steps:     9
[Episode  1039]  steps:    20
[Episode  1040]  steps:    18
[Episode  1041]  steps:    22
[Episode  1042]  steps:    15
[Episode  1043]  steps:    13
[Episode  1044]  steps:    18
[Episode  1045]  steps:    10
[Episode  1046]  steps:     9
[Episode  1047]  steps:    10
[Episode  1048]  steps:    26
[Episode  1049]  steps:    28
[Episode  1050]  steps:     9
[Episode  1051]  steps:    20
[Episode  1052]  steps:    14
[Episode  1053]  steps:    15
[Episode  1054]  steps:    10
[Episode  1055]  steps:    31
[Episode  1056]  steps:    38
[Episode  1057]  steps:    10
[Episode  1058]  steps:     9
[Episode  1059]  steps:    18
[Episode  1060]  steps:    15
[Episode  1061]  steps:    10
[Episode  1062]  steps:     9
[Episode  1063]  steps:    29
[Episode  1064]  steps:    32
[Episode  1065]  steps:    10
[Episode  1066]  steps:    18
[Episode  1067]  steps:    10
[Episode  1068]  steps:    18
[Episode  1069]  steps:     8
[Episode  

[Episode  1314]  steps:    11
[Episode  1315]  steps:    13
[Episode  1316]  steps:    30
[Episode  1317]  steps:     9
[Episode  1318]  steps:    13
[Episode  1319]  steps:    17
[Episode  1320]  steps:    10
[Episode  1321]  steps:    11
[Episode  1322]  steps:     8
[Episode  1323]  steps:    14
[Episode  1324]  steps:     9
[Episode  1325]  steps:    10
[Episode  1326]  steps:    24
[Episode  1327]  steps:    12
[Episode  1328]  steps:    10
[Episode  1329]  steps:     8
[Episode  1330]  steps:    15
[Episode  1331]  steps:    17
[Episode  1332]  steps:    15
[Episode  1333]  steps:    11
[Episode  1334]  steps:    27
[Episode  1335]  steps:    30
[Episode  1336]  steps:     8
[Episode  1337]  steps:    11
[Episode  1338]  steps:    33
[Episode  1339]  steps:    16
[Episode  1340]  steps:    19
[Episode  1341]  steps:     9
[Episode  1342]  steps:    12
[Episode  1343]  steps:    23
[Episode  1344]  steps:    41
[Episode  1345]  steps:    16
[Episode  1346]  steps:     9
[Episode  

[Episode  1595]  steps:    15
[Episode  1596]  steps:    15
[Episode  1597]  steps:    10
[Episode  1598]  steps:     9
[Episode  1599]  steps:    18
[Episode  1600]  steps:     9
[Episode  1601]  steps:    11
[Episode  1602]  steps:    17
[Episode  1603]  steps:    20
[Episode  1604]  steps:     9
[Episode  1605]  steps:     9
[Episode  1606]  steps:    20
[Episode  1607]  steps:     8
[Episode  1608]  steps:     9
[Episode  1609]  steps:    29
[Episode  1610]  steps:    10
[Episode  1611]  steps:    15
[Episode  1612]  steps:    13
[Episode  1613]  steps:    11
[Episode  1614]  steps:    27
[Episode  1615]  steps:    25
[Episode  1616]  steps:    10
[Episode  1617]  steps:    11
[Episode  1618]  steps:    10
[Episode  1619]  steps:     8
[Episode  1620]  steps:    10
[Episode  1621]  steps:    18
[Episode  1622]  steps:    11
[Episode  1623]  steps:    10
[Episode  1624]  steps:    17
[Episode  1625]  steps:    27
[Episode  1626]  steps:    11
[Episode  1627]  steps:    10
[Episode  

[Episode  1869]  steps:    20
[Episode  1870]  steps:    23
[Episode  1871]  steps:    10
[Episode  1872]  steps:    18
[Episode  1873]  steps:    16
[Episode  1874]  steps:    11
[Episode  1875]  steps:     9
[Episode  1876]  steps:     9
[Episode  1877]  steps:     9
[Episode  1878]  steps:    24
[Episode  1879]  steps:    19
[Episode  1880]  steps:     9
[Episode  1881]  steps:    10
[Episode  1882]  steps:    44
[Episode  1883]  steps:    10
[Episode  1884]  steps:    20
[Episode  1885]  steps:     9
[Episode  1886]  steps:    10
[Episode  1887]  steps:    26
[Episode  1888]  steps:    11
[Episode  1889]  steps:    17
[Episode  1890]  steps:    16
[Episode  1891]  steps:    12
[Episode  1892]  steps:    22
[Episode  1893]  steps:     9
[Episode  1894]  steps:    11
[Episode  1895]  steps:    12
[Episode  1896]  steps:     9
[Episode  1897]  steps:     9
[Episode  1898]  steps:    10
[Episode  1899]  steps:    18
[Episode  1900]  steps:     9
[Episode  1901]  steps:    27
[Episode  