<a href="https://colab.research.google.com/github/amfei/Reinforcement_Learning/blob/main/Q_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###The Mountain Car environment simulates a simple physics problem where an underpowered car must drive up a steep hill. The car’s engine is not strong enough to climb the hill in a direct route, so the car must learn to leverage potential energy by driving back and forth to build up enough momentum.

###The environment's dynamics are defined by the equations of motion for the car's position and velocity.

In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt


class MountainCarEnv:

    '''
    The environment simulates a one-dimensional mountain car where the goal is
    to drive the car up a steep mountain.
    '''

    def __init__(self):
        self.min_position = -1.2
        self.max_position = 0.6
        self.goal_position = 0.5
        self.max_speed = 0.07
        #power: The amount of force applied when accelerating.
        #gravity: The effect of gravity on the car, modeled by a cosine function.
        self.power = 0.0015
        self.gravity = -0.0025
        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

    def reset(self):
        '''reset method initializes the car's position and velocity. The position is randomly set between -0.6 and -0.4, and the velocity is set to 0.'''

        self.position = np.random.uniform(low=-0.6, high=-0.4)
        self.velocity = 0
        return np.array([self.position, self.velocity])


    def step(self, action):

        '''The step method updates the car's position and velocity based on the action taken.

        Take an action in the environment.

        Returns : tuple: (next_state, reward, done, info)

        '''

        position, velocity = self.position, self.velocity
        #The term (action−1)×power applies the force to accelerate left (-1), do nothing (0), or accelerate right (+1).
        #The term cos(3×position)×gravity models the effect of gravity on the car based on its position.
        velocity += (action - 1) * self.power + math.cos(3 * position) * self.gravity
        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
        #The car's position is updated by adding the velocity to the current position
        position += velocity
        #Both position and velocity are clipped to ensure they stay within the defined limits
        position = np.clip(position, self.min_position, self.max_position)
        if position == self.min_position and velocity < 0:
            velocity = 0


        #The episode ends when the car's position reaches or exceeds the goal position
        done = bool(position >= self.goal_position)
        #The reward is -1 for each time step until the car reaches the goal, at which point the reward is 0
        reward = -1.0 if not done else 0.0  #

        self.position, self.velocity = position, velocity
        return np.array([self.position, self.velocity]), reward, done, {}

    def render(self):
        plt.figure(figsize=(10, 5))
        plt.plot(np.linspace(self.min_position, self.max_position, 100), np.sin(3 * np.linspace(self.min_position, self.max_position, 100)))
        plt.plot(self.position, np.sin(3 * self.position), 'o', markersize=12, label='Car')
        plt.axhline(y=np.sin(3 * self.goal_position), color='r', linestyle='--', label='Goal')
        plt.title('Mountain Car')
        plt.xlabel('Position')
        plt.ylabel('Height')
        plt.legend()
        plt.grid()
        plt.show()


In [None]:
# Q-Learning implementation
q_table_size = [20, 20]
action_space = 3  # Actions: 0 (left), 1 (no action), 2 (right)
q_table = np.random.uniform(low=-1, high=1, size=(q_table_size + [action_space]))
q_table.shape

In [None]:


def discretize_state(state, env, q_table_size):
    '''To simplify the continuous values of location and velocity into discrete categories,
    I’ll convert each into 20 distinct bins.
    '''

    env_low = env.low
    env_high = env.high
    env_dx = (env_high - env_low) / q_table_size
    discrete_state = (state - env_low) / env_dx
    return tuple(discrete_state.astype(int))

def calculate_q_value(current_state_qvalue, reward, next_state_qvalue, learning_rate=0.1, discount_factor=0.9):
    return (1 - learning_rate) * current_state_qvalue + learning_rate * (reward + discount_factor * next_state_qvalue)

def select_action(q_table, state, epsilon=0):
    if np.random.random() >= epsilon:
        # Take an random action by using q-value
        action = np.argmax(q_table[state])
    else:
        # Take an action by random
        action = np.random.choice(range(action_space))
    return action

def update_q_table(q_table, state, action, next_state, reward, learning_rate=0.1, discount_factor=0.9):
    current_q = q_table[state][action]
    max_future_q = np.max(q_table[next_state])
    new_q = calculate_q_value(current_q, reward, max_future_q, learning_rate, discount_factor)
    q_table[state][action] = new_q
    return q_table

env = MountainCarEnv()
episodes = 500
epsilon = 1.0
epsilon_decay = 0.9995
min_epsilon = 0.01

for e in range(episodes):
    current_state_raw = env.reset()
    current_state = discretize_state(current_state_raw, env, q_table_size)
    done = False

    while not done:
        action = select_action(q_table, current_state, epsilon)
        next_state_raw, reward, done, _ = env.step(action)
        next_state = discretize_state(next_state_raw, env, q_table_size)
        q_table = update_q_table(q_table, current_state, action, next_state, reward)
        current_state = next_state

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

# Test the agent
state_raw = env.reset()
state = discretize_state(state_raw, env, q_table_size)
done = False

while not done:
    action = select_action(q_table, state, epsilon=0)
    next_state_raw, reward, done, _ = env.step(action)
    next_state = discretize_state(next_state_raw, env, q_table_size)
    env.render()  # Implement this method if you want to visualize the environment
    state = next_state
