<a href="https://colab.research.google.com/github/Wyatt-Kugler/Cartpole-RL-Model/blob/main/Cartpole.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gymnasium
!pip install imageio
!pip install IPython
!pip install numpy
!pip install tqdm




In [None]:
from google.colab import drive
drive.mount('/content/drive')

save_folder = "/content/drive/MyDrive/Cartpole/Cartpole_GIFs/"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import gymnasium as gym
import imageio
import numpy as np

from IPython.display import Image
from collections import defaultdict

In [None]:
import numpy as np
from collections import defaultdict

def discretize(obs, bins):
    """Convert continuous obs to discrete indices."""
    upper_bounds = [4.8, 5, 0.418, 5]  # CartPole-v1 env limits
    lower_bounds = [-4.8, -5, -0.418, -5]

    ratios = [(obs[i] - lower_bounds[i]) / (upper_bounds[i] - lower_bounds[i]) for i in range(4)]
    new_obs = [int(np.clip(r * (bins[i]-1), 0, bins[i]-1)) for i, r in enumerate(ratios)]
    return tuple(new_obs)

class CartAgent:
    def __init__(
        self,
        env,
        learning_rate,
        initial_epsilon,
        epsilon_decay,
        final_epsilon,
        discount_factor=0.95,
    ):
        """ Initializes Q-Learning Agent """
        self.env = env
        self.q_values = defaultdict(lambda: np.zeros(env.action_space.n))
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.training_error = []

    def get_action(self, obs):
        """ Choose action using epsilon-greedy policy """
        state_index = discretize(obs, bins=[10, 10, 10, 10])
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.q_values[state_index])

    def update(self, obs, action, reward, terminated, next_obs):
        """ Update Q-values """
        obs_index = discretize(obs, bins=[10, 10, 10, 10])
        next_index = discretize(next_obs, bins=[10, 10, 10, 10])

        # only include future reward if episode is not terminated
        future_q_value = np.max(self.q_values[next_index]) * (not terminated)
        target = reward + self.discount_factor * future_q_value

        td_error = target - self.q_values[obs_index][action]
        self.q_values[obs_index][action] += self.lr * td_error

        self.training_error.append(td_error)

    def decay_epsilon(self):
        """ Decay exploration rate """
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)


In [None]:
learning_rate = 0.01
n_episodes = 5000
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes / 2)
final_epsilon = 0.1

env = gym.make("CartPole-v1", render_mode="rgb_array", max_episode_steps=1000)

agent = CartAgent(
    env=env,
    learning_rate=learning_rate,
    initial_epsilon=start_epsilon,
    epsilon_decay=epsilon_decay,
    final_epsilon=final_epsilon,
)


In [None]:
from tqdm import tqdm
import imageio

Recording_Number = 0
gif_files = []

for episode in tqdm(range(n_episodes)):
  recording = False
  if episode % 100 == 0:
    frames = []
    Recording_Number += 1
    recording = True
  obs, info = env.reset()

  if recording:
    frames.append(env.render())

  done = False

  while not done:
    action = agent.get_action(obs)
    next_obs, reward, terminated, truncated, info = env.step(action)

    agent.update(obs, action, reward, terminated, next_obs)

    done = terminated or truncated

    if recording:
      frames.append(env.render())
    obs = next_obs
  if recording:
    gif_path = f"cartpole_loop_{Recording_Number}.gif"

    full_path = save_folder + gif_path

    imageio.mimsave(full_path, frames, fps=30)

    gif_files.append(full_path)

  agent.decay_epsilon()

  0%|          | 0/5000 [00:00<?, ?it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_1.gif


  2%|▏         | 95/5000 [00:03<01:12, 67.39it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_2.gif


  4%|▍         | 200/5000 [00:09<00:57, 83.76it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_3.gif


  6%|▌         | 286/5000 [00:10<00:42, 109.65it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_4.gif


  8%|▊         | 377/5000 [00:10<00:34, 134.44it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_5.gif


 10%|▉         | 493/5000 [00:12<00:36, 123.27it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_6.gif


 12%|█▏        | 578/5000 [00:13<00:41, 107.13it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_7.gif


 14%|█▎        | 685/5000 [00:15<00:53, 80.15it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_8.gif


 16%|█▌        | 785/5000 [00:18<00:51, 82.34it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_9.gif


 18%|█▊        | 889/5000 [00:19<00:39, 102.80it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_10.gif


 20%|█▉        | 979/5000 [00:22<01:13, 54.96it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_11.gif


 22%|██▏       | 1094/5000 [00:23<00:31, 123.38it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_12.gif


 24%|██▎       | 1180/5000 [00:24<00:29, 130.63it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_13.gif


 26%|██▌       | 1295/5000 [00:26<00:33, 109.02it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_14.gif


 28%|██▊       | 1386/5000 [00:27<00:32, 111.88it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_15.gif


 30%|██▉       | 1482/5000 [00:29<00:37, 94.17it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_16.gif


 31%|███▏      | 1572/5000 [00:30<00:33, 101.93it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_17.gif


 34%|███▍      | 1697/5000 [00:31<00:26, 126.93it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_18.gif


 36%|███▌      | 1778/5000 [00:32<00:34, 92.70it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_19.gif


 38%|███▊      | 1889/5000 [00:34<00:32, 94.48it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_20.gif


 40%|███▉      | 1993/5000 [00:36<00:32, 91.80it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_21.gif


 42%|████▏     | 2078/5000 [00:37<00:27, 107.36it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_22.gif


 44%|████▎     | 2179/5000 [00:39<00:23, 118.12it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_23.gif


 45%|████▌     | 2267/5000 [00:40<00:26, 101.41it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_24.gif


 48%|████▊     | 2383/5000 [00:41<00:20, 125.03it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_25.gif


 50%|████▉     | 2484/5000 [00:42<00:24, 104.55it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_26.gif


 52%|█████▏    | 2586/5000 [00:44<00:21, 114.89it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_27.gif


 54%|█████▍    | 2691/5000 [00:45<00:17, 130.02it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_28.gif


 56%|█████▌    | 2780/5000 [00:46<00:20, 110.41it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_29.gif


 58%|█████▊    | 2890/5000 [00:47<00:16, 127.39it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_30.gif


 60%|█████▉    | 2979/5000 [00:49<00:23, 84.57it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_31.gif


 62%|██████▏   | 3099/5000 [00:51<00:16, 115.23it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_32.gif


 64%|██████▎   | 3184/5000 [00:52<00:16, 108.55it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_33.gif


 66%|██████▌   | 3278/5000 [00:53<00:18, 91.47it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_34.gif


 68%|██████▊   | 3397/5000 [00:55<00:12, 128.98it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_35.gif


 70%|██████▉   | 3490/5000 [00:56<00:13, 116.02it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_36.gif


 72%|███████▏  | 3597/5000 [00:57<00:12, 113.49it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_37.gif


 74%|███████▎  | 3682/5000 [00:58<00:11, 110.62it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_38.gif


 76%|███████▌  | 3783/5000 [00:59<00:11, 106.63it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_39.gif


 78%|███████▊  | 3895/5000 [01:02<00:11, 97.57it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_40.gif


 80%|███████▉  | 3992/5000 [01:05<00:13, 76.82it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_41.gif


 82%|████████▏ | 4092/5000 [01:06<00:07, 119.29it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_42.gif


 84%|████████▍ | 4188/5000 [01:08<00:07, 103.74it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_43.gif


 86%|████████▌ | 4300/5000 [01:10<00:07, 93.63it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_44.gif


 88%|████████▊ | 4384/5000 [01:11<00:05, 104.23it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_45.gif


 90%|████████▉ | 4484/5000 [01:14<00:05, 89.11it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_46.gif


 92%|█████████▏| 4593/5000 [01:17<00:05, 79.48it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_47.gif


 94%|█████████▍| 4696/5000 [01:20<00:04, 74.82it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_48.gif


 96%|█████████▌| 4792/5000 [01:22<00:02, 85.24it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_49.gif


 98%|█████████▊| 4895/5000 [01:24<00:01, 74.97it/s]

Saving GIF to: /content/drive/MyDrive/Cartpole/Cartpole_GIFs/cartpole_loop_50.gif


100%|██████████| 5000/5000 [01:26<00:00, 57.77it/s]
