## Cart-Pole game using Deep Q learning

**1. Importing necessary packages**

In [15]:
import gym
import random
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


**2. Creating the environment**

In [16]:
env = gym.make('CartPole-v1')

**3. Various parameters** 

In [17]:
memory = deque(maxlen=5000)
gamma  = 0.85
exploration_rate = 1.0
exploration_min = 0.001
exploration_decay = 0.995

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

#if you want to use pre trained weights make it True
test_cart = True

**4. Neural Network Model**

In [18]:
model = Sequential()
model.add(Dense(24, input_dim=state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(action_size, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=0.002))

if test_cart == True:
    try:
        model.load_weights("cart_pole_weights.h5")
        exploration_rate = exploration_min
    except:
        print("No weights present")


**5. Some utility functions**

In [19]:
def get_action(state):
    '''
    function to decide whether to explore or exploit using greedy epsilon method
    '''
    if np.random.rand() <= exploration_rate:
        return random.randrange(action_size)
    else:
        return np.argmax(model.predict(state)[0])

def add_to_memory(mem):
    memory.append(mem)
    

def train_using_replay(batch_size):
    global exploration_rate
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target = (reward + gamma*np.amax(model.predict(next_state)[0]))
        target_f = model.predict(state)
        target_f[0][action] = target
        model.fit(state, target_f, epochs=1, verbose=0)
    if exploration_rate > exploration_min:
        exploration_rate *= exploration_decay


**6. Final run**

In [None]:
num_episodes = 600
batch_size = 32
show_game = True
done = False

for episode in range(num_episodes):
    done = False
    state = np.reshape(env.reset(), [1, state_size])
    i = 1
    while not done:
        if show_game:
            env.render()
        action = get_action(state)
        state_next, reward, done, _ = env.step(action)
        state_next = np.reshape(state_next, [1, state_size])
        add_to_memory((state, action, reward, state_next, done))
        if done and episode % 10 == 0:
            print("{}/{} total reward: {}, epsilon: {:.2}".format(episode, num_episodes, i, exploration_rate))
            break
        state = state_next
        i = i+1

    if len(memory) > batch_size:
        train_using_replay(batch_size)

In [21]:
model.save_weights("cart_pole_weights.h5")