[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/barhilleli/Deep-Learning-Boot-Camp/blob/master/keras_cartpole_colab.ipynb)
(need to open in new a tab)

In [None]:
import gym
import random
import os
import numpy as np
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam

class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "cartpole_weight.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if len(self.memory) < sample_batch_size:
            return
        sample_batch = random.sample(self.memory, sample_batch_size)
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            target_f[0][action] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

            
            
class CartPole:
    def __init__(self):
        self.sample_batch_size = 32
        self.episodes          = 10000
        self.env               = gym.make('CartPole-v1')

        self.state_size        = self.env.observation_space.shape[0]
        self.action_size       = self.env.action_space.n
        self.agent             = Agent(self.state_size, self.action_size)
        self.episode_duration  = []


    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = np.reshape(state, [1, self.state_size])

                done = False
                index = 0
                while not done:
                    self.env.render()

                    action = self.agent.act(state)

                    next_state, reward, done, _ = self.env.step(action)
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                print("Episode {}# Score: {}".format(index_episode, index + 1))
                self.episode_duration.append(index+1)
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()

            
    def plot_durations(self):
        plt.figure(2)
        plt.clf()
        plt.title('Training...')
        plt.xlabel('Episode')
        plt.ylabel('Duration')
        plt.plot(self.episode_duration)
        # Take 100 episode averages and plot them too
        if len(self.episode_duration) >= 100:
            means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
            means = torch.cat((torch.zeros(99), means))
            plt.plot(means.numpy())

    #     plt.pause(0.001)  # pause a bit so that plots are updated - don't need this!!
        if is_ipython:
            display.clear_output(wait=True)
            display.display(plt.gcf())

            
if __name__ == "__main__":
    cartpole = CartPole()
    cartpole.run()


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Instructions for updating:
keep_dims is deprecated, use keepdims instead
Episode 0# Score: 13
Episode 1# Score: 26
Episode 2# Score: 22
Episode 3# Score: 34
Episode 4# Score: 22
Episode 5# Score: 38
Episode 6# Score: 24
Episode 7# Score: 15
Episode 8# Score: 34
Episode 9# Score: 34
Episode 10# Score: 25
Episode 11# Score: 28
Episode 12# Score: 18
Episode 13# Score: 25
Episode 14# Score: 14
Episode 15# Score: 24
Episode 16# Score: 20
Episode 17# Score: 26
Episode 18# Score: 24
Episode 19# Score: 14
Episode 20# Score: 20
Episode 21# Score: 20
Episode 22# Score: 22
Episode 23# Score: 19
Episode 24# Score: 13
Episode 25# Score: 15
Episode 26# Score: 13
Episode 27# Score: 16
Episode 28# Score: 14
Episode 29# Score: 11
Episode 30# Score: 15
Episode 31# Score: 41
Episode 32# Score: 16
Episode 33# Score: 15
Episode 34# Score: 36
Episode 35# Score: 15
Episode 36# Score: 18
Episode 37# Score: 12
Episode 38# Score: 15
Episode 39# Score: 19
Episode 40# Score: 13
Episode 41# Score: 27
Episode 42# S