<a href="https://colab.research.google.com/github/aayu-7/MountainCarGame-Using-ReinforcementLearning/blob/main/MountainCarGame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the required library
!pip install gym

# Import required libraries
import gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
import glob
import io
from base64 import b64encode
from gym.wrappers import RecordVideo

# Function to discretize state space
def discretize_state(state, bins):
    position_bins = np.linspace(-1.2, 0.6, bins)  # Discretize position
    velocity_bins = np.linspace(-0.07, 0.07, bins)  # Discretize velocity
    position_idx = np.digitize(state[0], position_bins) - 1
    velocity_idx = np.digitize(state[1], velocity_bins) - 1
    return (position_idx, velocity_idx)

# Function to display video
def show_video():
    video_path = glob.glob('./video/*.mp4')[0]
    video = io.open(video_path, 'r+b').read()
    encoded = b64encode(video)
    return HTML(data=f'''
        <video width="640" height="480" controls>
            <source src="data:video/mp4;base64,{encoded.decode('ascii')}" type="video/mp4">
        </video>''')

# Initialize the environment
env = gym.make("MountainCar-v0")

# Hyperparameters
n_bins = 20  # Number of bins for discretization
alpha = 0.1  # Learning rate
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
n_episodes = 5000

# Initialize Q-table
n_actions = env.action_space.n
q_table = np.zeros((n_bins, n_bins, n_actions))

# Training loop
for episode in range(n_episodes):
    state_raw, _ = env.reset()  # Updated reset for gym
    state = discretize_state(state_raw, n_bins)
    done = False
    total_reward = 0

    while not done:
        # Choose action: exploration vs exploitation
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        # Perform action
        next_state_raw, reward, done, _, _ = env.step(action)  # Modern step returns extra value
        next_state = discretize_state(next_state_raw, n_bins)

        # Update Q-value
        best_next_action = np.argmax(q_table[next_state])
        td_target = reward + gamma * q_table[next_state][best_next_action]
        q_table[state][action] += alpha * (td_target - q_table[state][action])

        state = next_state
        total_reward += reward

    # Decay epsilon
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    if episode % 100 == 0:
        print(f"Episode {episode}, Total Reward: {total_reward}")

print("Training Complete!")

# Close the current env before wrapping for recording
env.close()

# Wrap environment for recording
env = gym.make("MountainCar-v0")
env = RecordVideo(env, "./video", episode_trigger=lambda x: x == 0)

# Test the trained agent
state_raw, _ = env.reset()  # Updated reset
state = discretize_state(state_raw, n_bins)
done = False
while not done:
    action = np.argmax(q_table[state])
    next_state_raw, _, done, _, _ = env.step(action)  # Updated step
    state = discretize_state(next_state_raw, n_bins)

env.close()

# Display the video
show_video()


