In [16]:
class Agent():
    def __init__(self,state_size, action_size):
        self.weight_backup = "cartpole_weight.h5"
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.learning_rate = 0.001
        self.gamma = 0.95
        self.exploration_rate = 1.0
        self.exploration_min = 0.01
        self.exploration_decay = 0.005
        self.brain = self._build_model()
    
    def _build_model(self):
        model = Sequential()
        model.add(Dense(24,input_dim = self.state_size,activation = 'relu'))
        model.add(Dense(self.action_size,activation = 'linear'))
        model.compile(loss = 'mse',optimizer = Adam(lr = self.learning_rate))
        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        return model
    
    def save_model(self):
        self.brain.save(self.weight_backup)
        
    def act(self,state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])
    
    def remember(self,state, action, reward, next_state, done):
        self.memory.append((state,action,reward,next_state,done))
        
    def replay(self,sample_batch_size):
        if len(self.memory) < sample_batch_size:
            return
        sample_batch = random.sample(self.memory,sample_batch_size)
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
                target_f = self.brain.predict(state)
                target_f[0][action] = target
                self.brain.fit(state,target_f,epochs=1,verbose = 0)
            if self.exploration_rate > self.exploration_min:
                self.exploration_rate *= self.exploration_decay

In [None]:
import gym
import os
import numpy as np
import random
from keras.models import Sequential
from collections import deque
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam

class CartPole:
    def __init__(self):
        self.sample_batch_size = 32
        self.episodes          = 10000
        self.env               = gym.make('CartPole-v1')
        self.state_size        = self.env.observation_space.shape[0]
        self.action_size       = self.env.action_space.n
        self.agent             = Agent(self.state_size, self.action_size)

    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = np.reshape(state, [1, self.state_size])
                done = False
                index = 0
                while not done:
                    self.env.render()
                    action = self.agent.act(state)
                    next_state , reward , done, _ = self.env.step(action)
                    next_state = np.reshape(next_state, [1,self.state_size])
                    self.agent.remember(state,action,reward,next_state,done)
                    self.agent.replay(self.sample_batch_size)
                    state = next_state
                    index += 1
                    print("Episode {}# Score: {}".format(index_episode, index + 1))
                    self.agent.replay(self.sample_batch_size)
        finally:
                self.agent.save_model()
                
if __name__ == "__main__":
    cartpole = CartPole()
    cartpole.run()

Episode 0# Score: 2
Episode 0# Score: 3
Episode 0# Score: 4
Episode 0# Score: 5
Episode 0# Score: 6
Episode 0# Score: 7
Episode 0# Score: 8
Episode 0# Score: 9
Episode 0# Score: 10
Episode 0# Score: 11
Episode 0# Score: 12
Episode 0# Score: 13
Episode 0# Score: 14
Episode 0# Score: 15
Episode 0# Score: 16
Episode 1# Score: 2
Episode 1# Score: 3
Episode 1# Score: 4
Episode 1# Score: 5
Episode 1# Score: 6
Episode 1# Score: 7
Episode 1# Score: 8
Episode 1# Score: 9
Episode 1# Score: 10
Episode 2# Score: 2
Episode 2# Score: 3
Episode 2# Score: 4
Episode 2# Score: 5
Episode 2# Score: 6
Episode 2# Score: 7
Episode 2# Score: 8
Episode 2# Score: 9
Episode 2# Score: 10
Episode 2# Score: 11
Episode 2# Score: 12
Episode 2# Score: 13
Episode 3# Score: 2
Episode 3# Score: 3
Episode 3# Score: 4
Episode 3# Score: 5
Episode 3# Score: 6
Episode 3# Score: 7
Episode 3# Score: 8
Episode 3# Score: 9
Episode 3# Score: 10
Episode 3# Score: 11
Episode 4# Score: 2
Episode 4# Score: 3
Episode 4# Score: 4
Episod

Episode 40# Score: 11
Episode 41# Score: 2
Episode 41# Score: 3
Episode 41# Score: 4
Episode 41# Score: 5
Episode 41# Score: 6
Episode 41# Score: 7
Episode 41# Score: 8
Episode 41# Score: 9
Episode 41# Score: 10
Episode 41# Score: 11
Episode 42# Score: 2
Episode 42# Score: 3
Episode 42# Score: 4
Episode 42# Score: 5
Episode 42# Score: 6
Episode 42# Score: 7
Episode 42# Score: 8
Episode 42# Score: 9
Episode 42# Score: 10
Episode 42# Score: 11
Episode 43# Score: 2
Episode 43# Score: 3
Episode 43# Score: 4
Episode 43# Score: 5
Episode 43# Score: 6
Episode 43# Score: 7
Episode 43# Score: 8
Episode 43# Score: 9
Episode 43# Score: 10
Episode 44# Score: 2
Episode 44# Score: 3
Episode 44# Score: 4
Episode 44# Score: 5
Episode 44# Score: 6
Episode 44# Score: 7
Episode 44# Score: 8
Episode 44# Score: 9
Episode 44# Score: 10
Episode 44# Score: 11
Episode 44# Score: 12
Episode 45# Score: 2
Episode 45# Score: 3
Episode 45# Score: 4
Episode 45# Score: 5
Episode 45# Score: 6
Episode 45# Score: 7
Epis