# Mountain Card RL-Practice

### Install required libraries
!pip install gym[classic_control] numpy

### Import libs

In [14]:
import gymnasium as gym
import numpy as np

### Hyper Parameters

In [15]:
learning_rate = 0.1
discount_factor = 0.95
epsilon = 0.5
epsilon_decay = 0.995
min_epsilon = 0.01
episodes = 5000
max_steps = 200

### Creating Enviroment & State Space

In [16]:
env = gym.make("MountainCar-v0")
action_space = env.action_space.n  
state_space = tuple((env.observation_space.high - env.observation_space.low) * np.array([10, 100], dtype=int))
state_space_size = (int(state_space[0]) + 1, int(state_space[1]) + 1)

In [17]:
q_table = np.random.uniform(low=-2, high=0, size=(state_space_size + (action_space,)))

### Convert continuous state discrete state

In [18]:
def discretize_state(state):
    discrete_state = (state - env.observation_space.low) * np.array([10, 100], dtype=int)
    return tuple(map(int, discrete_state))  

### Q-Learning Algorithm

In [19]:
for episode in range(episodes):
    state, _ = env.reset()
    discrete_state = discretize_state(state)
    done = False

    for step in range(max_steps):
        if np.random.random() < epsilon:
            action = np.random.randint(0, action_space)  
        else:
            action = np.argmax(q_table[discrete_state])  

        new_state, reward, done, truncated, _ = env.step(action)
        new_discrete_state = discretize_state(new_state)

        max_future_q = np.max(q_table[new_discrete_state])
        current_q = q_table[discrete_state + (action,)]
        new_q = current_q + learning_rate * (reward + discount_factor * max_future_q - current_q)
        q_table[discrete_state + (action,)] = new_q

        discrete_state = new_discrete_state

        if done:
            break

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    if episode % 100 == 0:
        print(f"Episode: {episode}, Epsilon: {epsilon:.3f}")

env.close()

Episode: 0, Epsilon: 0.497
Episode: 100, Epsilon: 0.301
Episode: 200, Epsilon: 0.183
Episode: 300, Epsilon: 0.111
Episode: 400, Epsilon: 0.067
Episode: 500, Epsilon: 0.041
Episode: 600, Epsilon: 0.025
Episode: 700, Epsilon: 0.015
Episode: 800, Epsilon: 0.010
Episode: 900, Epsilon: 0.010
Episode: 1000, Epsilon: 0.010
Episode: 1100, Epsilon: 0.010
Episode: 1200, Epsilon: 0.010
Episode: 1300, Epsilon: 0.010
Episode: 1400, Epsilon: 0.010
Episode: 1500, Epsilon: 0.010
Episode: 1600, Epsilon: 0.010
Episode: 1700, Epsilon: 0.010
Episode: 1800, Epsilon: 0.010
Episode: 1900, Epsilon: 0.010
Episode: 2000, Epsilon: 0.010
Episode: 2100, Epsilon: 0.010
Episode: 2200, Epsilon: 0.010
Episode: 2300, Epsilon: 0.010
Episode: 2400, Epsilon: 0.010
Episode: 2500, Epsilon: 0.010
Episode: 2600, Epsilon: 0.010
Episode: 2700, Epsilon: 0.010
Episode: 2800, Epsilon: 0.010
Episode: 2900, Epsilon: 0.010
Episode: 3000, Epsilon: 0.010
Episode: 3100, Epsilon: 0.010
Episode: 3200, Epsilon: 0.010
Episode: 3300, Epsilon

### Result after learning
im using render_mode="human" to display GUI of agent learned result.

In [20]:
env = gym.make("MountainCar-v0", render_mode="human")  
state, _ = env.reset()
done = False

while not done:
    env.render()
    action = np.argmax(q_table[discretize_state(state)])
    state, _, done, _, _ = env.step(action)
env.close()
