In [3]:
import numpy as np
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten
from keras.optimizers import Adam


class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95   # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01  # exploration will not decay futher
        self.epsilon_decay = 0.990
        self.learning_rate = 0.0005
        self.model = self._build_model()
        self.weight_backup = 'model_weights.h5'
    '''
    def _build_model(self):
        model = Sequential(),
        model.add(Conv2D(32, kernel_size=8, subsample=(4, 4), activation='relu', padding='same', input_shape=4))  #80*80*4
        model.add(Conv2D(64, kernel_size=4, subsample=(2, 2), activation='relu', padding='same'))
        model.add(Conv2D(64, kernel_size=3, subsample=(1, 1), activation='relu', padding='same'))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(2, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model
    '''

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def save_model(self):
            self.model.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, new_state, done):
        self.memory.append((state, action, reward, new_state, done))

    def memory_replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        Sample = random.sample(self.memory, batch_size)
        for state, action, reward, new_state, done in Sample:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(new_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [4]:
import sys
import gym
from gym import wrappers

batch_size = 32
#episodes = sys.argv[1] if len(sys.argv) > 1 else 5000
#env_name = sys.argv[2] if len(sys.argv) > 2 else "Pong-v0"

episodes = 1500
env_name = "CartPole-v0"

env = gym.make(env_name)

env = wrappers.Monitor(env, env_name, force=True)

agent = Agent(env.observation_space.shape[0], env.action_space.n)

for i_episodes in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, env.observation_space.shape[0]])
    index = 0
    done = False
    while not done:
        env.render()
        action = agent.act(state)
        new_state, reward, done, info = env.step(action)
        new_state = np.reshape(new_state, [1, env.observation_space.shape[0]])
        agent.remember(state, action, reward, new_state, done)
        state = new_state
        index += 1
    agent.memory_replay(batch_size)
    if done:
        print("{} episode, score = {} ".format(i_episodes + 1, index + 1))
        agent.save_model()

env.close()
gym.upload(env_name, api_key='sk_WRCITkqmTJKYB9hvBk5tPA')

[2017-08-12 20:18:34,617] Making new env: CartPole-v0
[2017-08-12 20:18:34,630] Clearing 24 monitor files from previous run (because force=True was provided)
[2017-08-12 20:18:34,727] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000000.mp4
[2017-08-12 20:18:35,480] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000001.mp4


1 episode, score = 25 
2 episode, score = 37 
3 episode, score = 22 
4 episode, score = 29 
5 episode, score = 15 
6 episode, score = 13 
7 episode, score = 13 
8 episode, score = 26 


[2017-08-12 20:18:37,906] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000008.mp4


9 episode, score = 14 
10 episode, score = 30 
11 episode, score = 16 
12 episode, score = 13 
13 episode, score = 14 
14 episode, score = 26 
15 episode, score = 14 
16 episode, score = 14 
17 episode, score = 11 
18 episode, score = 16 
19 episode, score = 19 
20 episode, score = 19 
21 episode, score = 16 
22 episode, score = 27 
23 episode, score = 21 
24 episode, score = 13 
25 episode, score = 26 
26 episode, score = 16 
27 episode, score = 26 


[2017-08-12 20:18:41,729] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000027.mp4


28 episode, score = 16 
29 episode, score = 22 
30 episode, score = 15 
31 episode, score = 18 
32 episode, score = 27 
33 episode, score = 32 
34 episode, score = 21 
35 episode, score = 17 
36 episode, score = 13 
37 episode, score = 10 
38 episode, score = 20 
39 episode, score = 12 
40 episode, score = 21 
41 episode, score = 13 
42 episode, score = 28 
43 episode, score = 26 
44 episode, score = 14 
45 episode, score = 12 
46 episode, score = 11 
47 episode, score = 12 
48 episode, score = 12 
49 episode, score = 15 
50 episode, score = 32 
51 episode, score = 12 
52 episode, score = 11 
53 episode, score = 16 
54 episode, score = 14 
55 episode, score = 12 
56 episode, score = 17 
57 episode, score = 23 
58 episode, score = 20 
59 episode, score = 10 
60 episode, score = 11 
61 episode, score = 15 
62 episode, score = 13 
63 episode, score = 11 


[2017-08-12 20:18:48,854] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000064.mp4


64 episode, score = 14 
65 episode, score = 11 
66 episode, score = 18 
67 episode, score = 11 
68 episode, score = 12 
69 episode, score = 11 
70 episode, score = 20 
71 episode, score = 11 
72 episode, score = 15 
73 episode, score = 16 
74 episode, score = 11 
75 episode, score = 12 
76 episode, score = 12 
77 episode, score = 11 
78 episode, score = 13 
79 episode, score = 12 
80 episode, score = 14 
81 episode, score = 12 
82 episode, score = 11 
83 episode, score = 11 
84 episode, score = 12 
85 episode, score = 12 
86 episode, score = 11 
87 episode, score = 11 
88 episode, score = 12 
89 episode, score = 14 
90 episode, score = 12 
91 episode, score = 12 
92 episode, score = 12 
93 episode, score = 12 
94 episode, score = 10 
95 episode, score = 19 
96 episode, score = 16 
97 episode, score = 14 
98 episode, score = 13 
99 episode, score = 10 
100 episode, score = 10 
101 episode, score = 12 
102 episode, score = 11 
103 episode, score = 9 
104 episode, score = 10 
105 episode,

[2017-08-12 20:18:59,777] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000125.mp4


125 episode, score = 13 
126 episode, score = 11 
127 episode, score = 9 
128 episode, score = 11 
129 episode, score = 14 
130 episode, score = 11 
131 episode, score = 10 
132 episode, score = 13 
133 episode, score = 13 
134 episode, score = 12 
135 episode, score = 10 
136 episode, score = 10 
137 episode, score = 11 
138 episode, score = 13 
139 episode, score = 15 
140 episode, score = 13 
141 episode, score = 11 
142 episode, score = 12 
143 episode, score = 10 
144 episode, score = 11 
145 episode, score = 11 
146 episode, score = 11 
147 episode, score = 11 
148 episode, score = 11 
149 episode, score = 10 
150 episode, score = 9 
151 episode, score = 38 
152 episode, score = 27 
153 episode, score = 44 
154 episode, score = 25 
155 episode, score = 53 
156 episode, score = 32 
157 episode, score = 28 
158 episode, score = 25 
159 episode, score = 24 
160 episode, score = 15 
161 episode, score = 18 
162 episode, score = 12 
163 episode, score = 11 
164 episode, score = 14 
16

[2017-08-12 20:19:20,525] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000216.mp4


216 episode, score = 17 
217 episode, score = 21 
218 episode, score = 20 
219 episode, score = 18 
220 episode, score = 14 
221 episode, score = 14 
222 episode, score = 12 
223 episode, score = 10 
224 episode, score = 10 
225 episode, score = 10 
226 episode, score = 12 
227 episode, score = 12 
228 episode, score = 17 
229 episode, score = 14 
230 episode, score = 12 
231 episode, score = 13 
232 episode, score = 12 
233 episode, score = 12 
234 episode, score = 15 
235 episode, score = 13 
236 episode, score = 18 
237 episode, score = 21 
238 episode, score = 17 
239 episode, score = 19 
240 episode, score = 17 
241 episode, score = 19 
242 episode, score = 18 
243 episode, score = 20 
244 episode, score = 15 
245 episode, score = 32 
246 episode, score = 60 
247 episode, score = 33 
248 episode, score = 19 
249 episode, score = 29 
250 episode, score = 34 
251 episode, score = 30 
252 episode, score = 35 
253 episode, score = 34 
254 episode, score = 24 
255 episode, score = 17 


[2017-08-12 20:20:02,438] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000343.mp4


343 episode, score = 31 
344 episode, score = 42 
345 episode, score = 44 
346 episode, score = 107 
347 episode, score = 16 
348 episode, score = 70 
349 episode, score = 46 
350 episode, score = 47 
351 episode, score = 31 
352 episode, score = 26 
353 episode, score = 26 
354 episode, score = 15 
355 episode, score = 14 
356 episode, score = 25 
357 episode, score = 22 
358 episode, score = 37 
359 episode, score = 44 
360 episode, score = 40 
361 episode, score = 29 
362 episode, score = 55 
363 episode, score = 65 
364 episode, score = 32 
365 episode, score = 33 
366 episode, score = 46 
367 episode, score = 22 
368 episode, score = 19 
369 episode, score = 18 
370 episode, score = 36 
371 episode, score = 35 
372 episode, score = 44 
373 episode, score = 114 
374 episode, score = 124 
375 episode, score = 137 
376 episode, score = 61 
377 episode, score = 96 
378 episode, score = 41 
379 episode, score = 27 
380 episode, score = 75 
381 episode, score = 73 
382 episode, score = 

[2017-08-12 20:21:29,927] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000512.mp4


512 episode, score = 16 
513 episode, score = 15 
514 episode, score = 13 
515 episode, score = 13 
516 episode, score = 19 
517 episode, score = 19 
518 episode, score = 20 
519 episode, score = 24 
520 episode, score = 17 
521 episode, score = 18 
522 episode, score = 34 
523 episode, score = 38 
524 episode, score = 43 
525 episode, score = 62 
526 episode, score = 62 
527 episode, score = 46 
528 episode, score = 43 
529 episode, score = 49 
530 episode, score = 31 
531 episode, score = 38 
532 episode, score = 60 
533 episode, score = 90 
534 episode, score = 144 
535 episode, score = 166 
536 episode, score = 87 
537 episode, score = 118 
538 episode, score = 75 
539 episode, score = 201 
540 episode, score = 78 
541 episode, score = 201 
542 episode, score = 83 
543 episode, score = 40 
544 episode, score = 37 
545 episode, score = 94 
546 episode, score = 68 
547 episode, score = 101 
548 episode, score = 85 
549 episode, score = 130 
550 episode, score = 153 
551 episode, scor

[2017-08-12 20:23:37,465] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video000729.mp4


729 episode, score = 18 
730 episode, score = 14 
731 episode, score = 17 
732 episode, score = 15 
733 episode, score = 22 
734 episode, score = 34 
735 episode, score = 39 
736 episode, score = 23 
737 episode, score = 20 
738 episode, score = 28 
739 episode, score = 35 
740 episode, score = 47 
741 episode, score = 47 
742 episode, score = 53 
743 episode, score = 70 
744 episode, score = 55 
745 episode, score = 68 
746 episode, score = 64 
747 episode, score = 51 
748 episode, score = 81 
749 episode, score = 196 
750 episode, score = 71 
751 episode, score = 96 
752 episode, score = 201 
753 episode, score = 81 
754 episode, score = 120 
755 episode, score = 92 
756 episode, score = 123 
757 episode, score = 104 
758 episode, score = 94 
759 episode, score = 86 
760 episode, score = 82 
761 episode, score = 201 
762 episode, score = 94 
763 episode, score = 201 
764 episode, score = 90 
765 episode, score = 108 
766 episode, score = 115 
767 episode, score = 75 
768 episode, sco

[2017-08-12 20:27:08,232] Starting new video recorder writing to /home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0/openaigym.video.1.9886.video001000.mp4


1000 episode, score = 86 
1001 episode, score = 41 
1002 episode, score = 45 
1003 episode, score = 40 
1004 episode, score = 23 
1005 episode, score = 33 
1006 episode, score = 73 
1007 episode, score = 66 
1008 episode, score = 26 
1009 episode, score = 43 
1010 episode, score = 70 
1011 episode, score = 33 
1012 episode, score = 33 
1013 episode, score = 34 
1014 episode, score = 31 
1015 episode, score = 25 
1016 episode, score = 70 
1017 episode, score = 39 
1018 episode, score = 38 
1019 episode, score = 50 
1020 episode, score = 46 
1021 episode, score = 42 
1022 episode, score = 57 
1023 episode, score = 37 
1024 episode, score = 29 
1025 episode, score = 40 
1026 episode, score = 32 
1027 episode, score = 38 
1028 episode, score = 35 
1029 episode, score = 45 
1030 episode, score = 36 
1031 episode, score = 25 
1032 episode, score = 20 
1033 episode, score = 16 
1034 episode, score = 15 
1035 episode, score = 17 
1036 episode, score = 14 
1037 episode, score = 13 
1038 episode

1309 episode, score = 190 
1310 episode, score = 197 
1311 episode, score = 195 
1312 episode, score = 201 
1313 episode, score = 200 
1314 episode, score = 201 
1315 episode, score = 201 
1316 episode, score = 201 
1317 episode, score = 201 
1318 episode, score = 201 
1319 episode, score = 182 
1320 episode, score = 189 
1321 episode, score = 201 
1322 episode, score = 201 
1323 episode, score = 198 
1324 episode, score = 185 
1325 episode, score = 200 
1326 episode, score = 201 
1327 episode, score = 201 
1328 episode, score = 201 
1329 episode, score = 183 
1330 episode, score = 201 
1331 episode, score = 201 
1332 episode, score = 201 
1333 episode, score = 201 
1334 episode, score = 201 
1335 episode, score = 201 
1336 episode, score = 201 
1337 episode, score = 201 
1338 episode, score = 201 
1339 episode, score = 201 
1340 episode, score = 201 
1341 episode, score = 194 
1342 episode, score = 201 
1343 episode, score = 201 
1344 episode, score = 178 
1345 episode, score = 201 
1

[2017-08-12 20:35:33,384] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/z0m6ie/Documents/GitHub/Machine_Learning_Projects/deep-Q-learning/Untitled Folder/CartPole-v0')
[2017-08-12 20:35:33,394] [CartPole-v0] Uploading 1500 episodes of training data


1500 episode, score = 201 


[2017-08-12 20:35:34,742] [CartPole-v0] Uploading videos of 11 training episodes (30115 bytes)
[2017-08-12 20:35:35,126] [CartPole-v0] Creating evaluation object from CartPole-v0 with learning curve and training video
[2017-08-12 20:35:35,542] 
****************************************************
You successfully uploaded your evaluation on CartPole-v0 to
OpenAI Gym! You can find it at:

    https://gym.openai.com/evaluations/eval_lbEN48CuTTSozMLoSgIlLQ

****************************************************


In [None]:
new_state.shape

In [7]:
env.reset()
env.close()

In [None]:
agent.memory