### Game training

In [3]:
import gym
env = gym.make('MountainCar-v0')
env.reset()
for i in range(500):
    env.step(env.action_space.sample())
    env.render()
env.close()

In [17]:
## Number of parameters
env.action_space

Discrete(3)

In [11]:
len(env.reset())

2

### Model

In [9]:
###Importing the libraries
import gym
import numpy as np
import pandas as pd
from keras.optimizers import Adam
from keras.layers import Dense
from keras.models import Sequential
from collections import deque
import random 

In [20]:
class Agent:
    def __init__(self,state_size,action_size):
        self.state_size = state_size ##Define the current state parameters eg X,Y cor or speed
        self.action_size = action_size ##Define the number of actions that can be taken
        self.gamma = 0.5 ##Penelty value
        self.memory = deque(maxlen=2000)
        self.epislon = 1.0 ##Define the randomness in the system
        self.epislon_decay = 0.995
        self.epislon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._create_model()
    def _create_model(self): ##Neural network for approxmating the future score
        model = Sequential()
        model.add(Dense(24,input_dim = self.state_size,activation='relu'))
        model.add(Dense(24,activation='relu'))
        model.add(Dense(self.action_size,activation='linear'))
        model.compile(loss='mse',optimizer=Adam(lr=0.001))
        return model
    def remember(self,state,action,reward,next_state,done): ##Remember the past
        self.memory.append((state,action,reward,next_state,done))
    def act(self,state):
        if np.random.randn()<=self.epislon:
            return random.randrange(self.action_size)
        else:
            return np.argmax(self.model.predict(state)[0])
    def train(self,batch_size):
        minibatch = random.sample(self.memory,batch_size)
        for state,action,reward,next_state,done in minibatch:
            if not done:
                target = reward + self.gamma*np.amax(self.model.predict(next_state)[0])
            else:
                target = reward
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state,target_f,epochs=1,verbose=0)
        if self.epislon > self.epislon_min:
                self.epislon *= self.epislon_decay
    
    def load(self,name):
        self.model.load_weights(name)
    def save(self,name):
        self.model.save_weights(name)

In [23]:
agent = Agent(state_size=2,action_size=3)
batch_size = 32
n_episodes = 50
output_dir = 'Mountain-cart/'
state_size = 2
action_size = 3

In [24]:
for e in range(n_episodes):
    state = env.reset()
    state = np.reshape(state,(1,state_size))
    for t in range(500):
        action = agent.act(state)
        next_state,reward,done,other_info = env.step(action) 
        reward = reward if not done else -10
        next_state = np.reshape(next_state,[1,state_size])
        agent.remember(state,action,reward,next_state,done)
        state = next_state
        if done:
            print("Game Episode :{}/{}, High Score:{},Exploration Rate:{:.2}".format(e,n_episodes,t,agent.epislon))
            break
            
    if len(agent.memory)>batch_size:
        agent.train(batch_size)
        
env.close()

Game Episode :0/50, High Score:199,Exploration Rate:1.0
Game Episode :1/50, High Score:199,Exploration Rate:0.99
Game Episode :2/50, High Score:199,Exploration Rate:0.99
Game Episode :3/50, High Score:199,Exploration Rate:0.99
Game Episode :4/50, High Score:199,Exploration Rate:0.98
Game Episode :5/50, High Score:199,Exploration Rate:0.98
Game Episode :6/50, High Score:199,Exploration Rate:0.97
Game Episode :7/50, High Score:199,Exploration Rate:0.97
Game Episode :8/50, High Score:199,Exploration Rate:0.96
Game Episode :9/50, High Score:199,Exploration Rate:0.96
Game Episode :10/50, High Score:199,Exploration Rate:0.95
Game Episode :11/50, High Score:199,Exploration Rate:0.95
Game Episode :12/50, High Score:199,Exploration Rate:0.94
Game Episode :13/50, High Score:199,Exploration Rate:0.94
Game Episode :14/50, High Score:199,Exploration Rate:0.93
Game Episode :15/50, High Score:199,Exploration Rate:0.93
Game Episode :16/50, High Score:199,Exploration Rate:0.92
Game Episode :17/50, High