<a href="https://colab.research.google.com/github/aiAintEasy-HolyCalamity/CartPoleProblem/blob/main/Cart_Pole.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Cart Pole Problem**
# Reinforced Learning Concepts


*   Agent, Environment, State, Action, Reward
*   Exploration vs Exploitation
*   PPO Algorithm
*   Model evaluation with video replay
*   Video documentation for storytelling

Install Dependencies

In [1]:
!pip install gymnasium[classic_control] stable-baselines3 matplotlib moviepy

Collecting stable-baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

Training and Recording Setup

In [2]:
import gymnasium as gym
from stable_baselines3 import PPO
from gymnasium.wrappers import RecordVideo
import os

video_folder = './cartpole_videos'
os.makedirs(video_folder, exist_ok=True)

Train Models and Record Videos

In [4]:
from IPython.display import HTML
from base64 import b64encode
def train_and_record(timesteps, video_name):

	# Train without video​
	env_train = gym.make('CartPole-v1', render_mode="rgb_array")
	model = PPO('MlpPolicy', env_train, verbose=0)
	model.learn(total_timesteps=timesteps)
	env_train.close()

	# Record video only during the test run​
	env = gym.make('CartPole-v1', render_mode="rgb_array")
	env = RecordVideo(env, video_folder=video_folder, name_prefix=video_name)
	obs, info = env.reset()
	frame_count = 0
	max_frames = 800 	# Extended to ensure ~20 seconds video​

	for _ in range(max_frames):
		action, _ = model.predict(obs, deterministic=True)
		obs, reward, terminated, truncated, info = env.step(action)
		frame_count += 1
		if terminated or truncated:
			break
	env.close()

# 1. Weak Model​
train_and_record(timesteps=500, video_name='weak_model')
# 2. Improved Model​
train_and_record(timesteps=5000, video_name='improved_model')
# 3. Best Model​
train_and_record(timesteps=50000, video_name='best_model')

  """
  logger.warn(
  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  logger.warn(


Display Weak Video Inline (Colab/Jupyter)

In [5]:
def display_video(video_file):
		mp4 = open(video_file, 'rb').read()
		data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
		return HTML(f'<video width=640 controls><source src="{data_url}" type="video/mp4"></video>')

# Display Weak Model Video​
video_path = f"{video_folder}/weak_model-episode-0.mp4"
display_video(video_path)

Display Improved Video Inline (Colab/Jupyter)

In [6]:
# Display Improved Model Video​
video_path = f"{video_folder}/improved_model-episode-0.mp4"
display_video(video_path)

Display Best Video Inline (Colab/Jupyter)

In [7]:
# Display Best Model Video​
video_path = f"{video_folder}/best_model-episode-0.mp4"
display_video(video_path)