In [1]:
# @title Atari Pong AI - Installation Script (More Robust)

# This script aims to provide the most robust installation for Atari Pong
# using Stable Baselines3 and Gymnasium in Google Colab.

# IMPORTANT:
# 1. Start with a FRESH COLAB NOTEBOOK (Runtime -> Restart runtime).
# 2. Run this cell FIRST and wait for it to complete.
# 3. Carefully observe ALL output.
# 4. If the "Quick Environment Test" at the end FAILS, go to "Runtime -> Restart runtime"
#    and run *this entire cell again from scratch*. This is often necessary.
# 5. If it still fails after a couple of restarts and reruns, please share the FULL output.

print("--- Starting Robust Installation for Atari Pong ---")

# 0. Ensure pip is up-to-date
print("\n0. Upgrading pip...")
!pip install --upgrade pip

# 1. Install/Upgrade core Gymnasium and specific ale-py.
#    `gymnasium` is the successor to `gym`.
#    We explicitly install `ale-py` and then `gymnasium[atari]` to ensure order.
print("\n1. Installing/Upgrading ale-py and gymnasium[atari]...")
# Install ale-py first, explicitly, to ensure it's present for gymnasium[atari]
!pip install --upgrade ale-py
# Then install gymnasium with atari extras, which should now find ale-py
!pip install --upgrade gymnasium[atari]

# 2. Install Stable Baselines3 (SB3).
print("\n2. Installing/Upgrading stable-baselines3...")
!pip install --upgrade stable-baselines3

# 3. Install AutoROM.
#    This is CRUCIAL for Atari ROM management.
print("\n3. Installing AutoROM...")
!pip install autorom[accept-rom-license]

# 4. Run AutoROM.build() to download Atari ROMs.
#    This command needs to be run explicitly. This is the most common point of failure.
print("\n4. Running AutoROM.build() to download Atari ROMs. This may take a moment...")
print("Look for messages indicating ROMs are being downloaded/accepted.")
!python -m autorom.accept-rom-license

# 5. Install OpenCV Python (cv2).
print("\n5. Installing opencv-python...")
!pip install --upgrade opencv-python

print("\n--- Installation Steps Completed ---")

# --- Robust Environment Test ---
# This test attempts to create the Pong environment to verify installation.
print("\n--- Running Robust Environment Test for 'ALE/Pong-v5' ---")
try:
    import gymnasium as gym
    # Try importing ale_py directly to check if it's found
    try:
        import ale_py
        print(f"Successfully imported ale_py version: {ale_py.__version__}")
    except ImportError:
        print("ERROR: Could not import 'ale_py'. This indicates a fundamental installation issue.")
        raise

    # Attempt to make the environment
    env_test = gym.make("ALE/Pong-v5")
    env_test.reset()
    env_test.close()
    print(f"Successfully created and reset 'ALE/Pong-v5' environment.")
    print("This indicates that the Atari ROMs and dependencies are likely set up correctly.")
    print("\nSUCCESS: You can now proceed to the training script in a new cell.")
except Exception as e:
    print(f"\nFATAL ERROR: Failed to create 'ALE/Pong-v5' environment during test: {e}")
    print("This error means the Atari ROMs or the 'ale-py' library are NOT correctly set up.")
    print("\n--- TROUBLESHOOTING STEPS ---")
    print("1. Go to 'Runtime -> Restart runtime' in the Colab menu.")
    print("2. Run *this entire installation cell* again from scratch.")
    print("3. Carefully verify the output of `!python -m autorom.accept-rom-license` for ROM downloads.")
    print("4. If the error persists after 2-3 attempts, consider trying a different Colab instance or reporting the full error output.")

--- Starting Robust Installation for Atari Pong ---

0. Upgrading pip...

1. Installing/Upgrading ale-py and gymnasium[atari]...
Collecting gymnasium[atari]
  Using cached gymnasium-1.2.0-py3-none-any.whl.metadata (9.9 kB)
Using cached gymnasium-1.2.0-py3-none-any.whl (944 kB)
Installing collected packages: gymnasium
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 1.1.1
    Uninstalling gymnasium-1.1.1:
      Successfully uninstalled gymnasium-1.1.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
stable-baselines3 2.6.0 requires gymnasium<1.2.0,>=0.29.1, but you have gymnasium 1.2.0 which is incompatible.[0m[31m
[0mSuccessfully installed gymnasium-1.2.0

2. Installing/Upgrading stable-baselines3...
Collecting gymnasium<1.2.0,>=0.29.1 (from stable-baselines3)
  Using cached gymnasium-1.1.1-py3-none-any.whl.metadata (9.4 kB)
U

A.L.E: Arcade Learning Environment (version 0.11.2+ecc1138)
[Powered by Stella]


In [2]:
from stable_baselines3.common.torch_layers import NatureCNN
from torch import nn
from stable_baselines3.common.vec_env import VecFrameStack

# @title Atari Pong AI - Training Script (Saving to Google Drive)

# This script trains an AI agent to play Atari Pong using Stable Baselines3 (PPO algorithm)
# and Gymnasium. It saves the trained models directly to Google Drive.

# IMPORTANT:
# 1. Run the installation script first and ensure it completes successfully.
# 2. When prompted, authorize Google Colab to access your Google Drive.

import gymnasium as gym
# import gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback
import os

# # --- 0. Mount Google Drive ---
# print("--- Mounting Google Drive ---")
# from google.colab import drive
# drive.mount('/content/drive')
# # print("Google Drive mounted successfully.")

# # --- Configuration ---
ENV_ID = "ALE/Pong-v5"  # The Gymnasium ID for Atari Pong

# # Set the log directory to a path within your Google Drive
# # This will create a folder named 'pong_ppo_logs' directly in your MyDrive root.
LOG_DIR = "data/pong_ppo_logs/"
TOTAL_TIMESTEPS = 100_000  # Total number of timesteps for training (increased for Pong)
SAVE_FREQ = 100_000  # Save model every X timesteps
N_ENVS = 4  # Number of parallel environments to run for vectorized training

# Create log directory in Google Drive if it doesn't exist
os.makedirs(LOG_DIR, exist_ok=True)

print(f"--- Starting Training for {ENV_ID} ---")
print(f"Logs and models will be saved in: {LOG_DIR}")
print(f"Total timesteps: {TOTAL_TIMESTEPS}")
print(f"Number of parallel environments: {N_ENVS}")

# --- Environment Setup ---
# Create a vectorized environment. This speeds up training by running multiple
# environments in parallel.
# For Atari, SB3 automatically applies necessary wrappers (e.g., frame stacking,
# grayscale, resizing) when using CnnPolicy.
try:
    vec_env = make_vec_env(ENV_ID, n_envs=N_ENVS, seed=0)
    FRAME_STACK=10
    vec_env = VecFrameStack(vec_env, n_stack=FRAME_STACK)
    print(f"Successfully created vectorized environment for {ENV_ID}")
except Exception as e:
    print(f"ERROR: Failed to create environment '{ENV_ID}': {e}")
    print("This error almost always means the Atari ROMs are not correctly installed or recognized.")
    print("Please go back to the installation cell, ensure it runs without errors,")
    print("and specifically check the output of `!python -m autorom.accept-rom-license`.")
    print("You might need to restart the Colab runtime (Runtime -> Restart runtime) and run the installation cell again, then this training cell.")
    exit() # Exit if environment creation fails, as training cannot proceed.

# --- Model Definition ---
# PPO (Proximal Policy Optimization) is a robust and widely used algorithm
# for continuous and discrete action spaces.
# `CnnPolicy` is used for environments with image observations (like Atari).
# `verbose=1` prints training progress.
# `tensorboard_log` enables logging to TensorBoard for visualization.
class SmallerCNN(NatureCNN):
    def __init__(self, observation_space, features_dim=256 // 2):
        super().__init__(observation_space, features_dim=features_dim)

# Use the smaller CNN in the PPO policy
policy_kwargs = dict(
    features_extractor_class=SmallerCNN
)

print("PPO model initialized with CnnPolicy and reduced CNN features.")
model = PPO(
    "CnnPolicy",
    vec_env,
    policy_kwargs=policy_kwargs,
    gamma=0.90,
    n_steps=2048,
    learning_rate=2.5e-4,
    ent_coef=0.01,
    clip_range=0.2,
    gae_lambda=0.95,
    n_epochs=5,
    verbose=1,
    tensorboard_log=LOG_DIR,
    device="auto"
)

# print("PPO model initialized with CnnPolicy.")
# model = PPO(
#     "CnnPolicy",
#     vec_env,
#     gamma=0.90,
#     n_steps=2048,
#     learning_rate=2.5e-4,
#     ent_coef=0.01,
#     clip_range=0.2,
#     gae_lambda=0.95,
#     n_epochs=5,
#     verbose=1,
#     tensorboard_log=LOG_DIR,
#     # device="auto" # Automatically uses GPU if available, otherwise CPU
# )
# --- Callbacks ---
# CheckpointCallback saves the model periodically during training.
# This allows you to resume training or evaluate intermediate models.
checkpoint_callback = CheckpointCallback(
    save_freq=max(SAVE_FREQ // N_ENVS, 1), # save_freq is in terms of environment steps, not total timesteps
    save_path=LOG_DIR,
    name_prefix="pong_ppo_model"
)
print(f"Checkpoint callback set to save every {SAVE_FREQ} total timesteps.")

# --- Training ---
print("\n--- Starting Training Process ---")
try:
    model.learn(
        total_timesteps=TOTAL_TIMESTEPS,
        callback=checkpoint_callback,
        progress_bar=True # Shows a progress bar in Colab
    )
    print("\nTraining completed!")
except KeyboardInterrupt:
    print("\nTraining interrupted by user.")
except Exception as e:
    print(f"\nAn unexpected error occurred during training: {e}")

# --- Save Final Model ---
final_model_path = os.path.join(LOG_DIR, "pong_ppo_final_model")
model.save(final_model_path)
print(f"Final model saved to: {final_model_path}.zip")

# --- Optional: Evaluate the trained agent (after training) ---
print("\n--- Evaluation (Optional) ---")
print("Loading the final trained model for evaluation...")
try:
    # Load the trained model
    loaded_model = PPO.load(final_model_path)

    # Create a separate environment for evaluation (without vectorization)
    eval_env = gym.make(ENV_ID, render_mode="rgb_array") # Use rgb_array for rendering frames
    obs, info = eval_env.reset()
    print("Evaluation environment created.")

    num_episodes = 5
    for episode in range(num_episodes):
        episode_reward = 0
        terminated = False
        truncated = False
        obs, info = eval_env.reset()
        print(f"Starting evaluation episode {episode + 1}/{num_episodes}...")
        while not terminated and not truncated:
            action, _states = loaded_model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = eval_env.step(action)
            episode_reward += reward
            # You can optionally render frames here if you want to save a video
            # frame = eval_env.render() # This would return an array, you'd need to save it
        print(f"Episode {episode + 1} finished with reward: {episode_reward}")
    eval_env.close()
    print("Evaluation complete.")

except Exception as e:
    print(f"Error during evaluation: {e}")
    print("Evaluation skipped. You can manually load and evaluate the model later.")

print("\nTo view training progress, you can use TensorBoard:")
print(f"Load TensorBoard in a new Colab cell with: %load_ext tensorboard")
print(f"Then run: %tensorboard --logdir {LOG_DIR}")

2025-07-21 14:49:33.381278: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-21 14:49:33.485231: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753134573.525056 3870342 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753134573.536612 3870342 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753134573.624591 3870342 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

--- Starting Training for ALE/Pong-v5 ---
Logs and models will be saved in: data/pong_ppo_logs/
Total timesteps: 100000
Number of parallel environments: 4
Successfully created vectorized environment for ALE/Pong-v5
PPO model initialized with CnnPolicy and reduced CNN features.
Using cuda device
Wrapping the env in a VecTransposeImage.
Checkpoint callback set to save every 100000 total timesteps.

--- Starting Training Process ---
Logging to data/pong_ppo_logs/PPO_10


Output()

: 

: 

: 

In [None]:
# @title Record Atari Pong Game Video

# This script loads a trained Stable Baselines3 model for Atari Pong
# from the Colab runtime disk and records a video of its gameplay.

import gymnasium as gym
from stable_baselines3 import PPO
from gymnasium.wrappers import RecordVideo
import os

# --- Configuration ---
ENV_ID = "ALE/Pong-v5"  # The Gymnasium ID for Atari Pong

# Path to the trained model on the Colab runtime disk
# This assumes your training script saved the final model here.
MODEL_PATH_ON_RUNTIME_DISK = "./data/pong_ppo_logs/pong_ppo_final_model.zip"

# Directory where the video will be saved on the Colab runtime disk
VIDEO_DIR = "./pong_game_videos/"
VIDEO_PREFIX = "pong_agent_game" # Prefix for the video filename

# Create the video directory if it doesn't exist
os.makedirs(VIDEO_DIR, exist_ok=True)

print(f"--- Starting Video Recording for {ENV_ID} ---")
print(f"Loading model from: {MODEL_PATH_ON_RUNTIME_DISK}")
print(f"Video will be saved to: {VIDEO_DIR}")

# --- Load the Trained Model ---
try:
    model = PPO.load(MODEL_PATH_ON_RUNTIME_DISK)
    print("Model loaded successfully!")
except Exception as e:
    print(f"ERROR: Could not load model from {MODEL_PATH_ON_RUNTIME_DISK}: {e}")
    print("Please ensure the training script completed and saved the model to this path.")
    print("If you restarted the runtime, the model might have been deleted. You might need to re-run training or load from Google Drive.")
    exit() # Exit if model cannot be loaded

# --- Create Environment with Video Recording Wrapper ---
# The RecordVideo wrapper will automatically save a video of the episode.
# `video_folder`: directory to save videos.
# `episode_trigger`: records every episode (here, we only run one).
# `disable_logger`: disables verbose logging from the wrapper.
# try:
    # We need render_mode="rgb_array" for video recording
env = gym.make(ENV_ID, render_mode="rgb_array")
env = RecordVideo(
    env,
    video_folder=VIDEO_DIR,
    episode_trigger=lambda x: True, # Record every episode
    name_prefix=VIDEO_PREFIX,
    disable_logger=True # Suppress some logging messages from RecordVideo
)
print(f"Environment '{ENV_ID}' created and wrapped for video recording.")
# except Exception as e:
#     print(f"ERROR: Could not create environment or video wrapper: {e}")
#     print("Ensure gymnasium and its Atari dependencies are correctly installed.")
#     exit()

# --- Play One Episode and Record ---
print("\nStarting game episode and recording video...")
obs, info = env.reset()
done = False
truncated = False
episode_reward = 0


while not done and not truncated:
    action, _states = model.predict(obs, deterministic=True) # deterministic=True for consistent playback
    obs, reward, done, truncated, info = env.step(action)
    episode_reward += reward

print(f"\nGame episode finished. Total reward: {episode_reward}")

# --- Close Environment and Finalize Video ---
env.close() # This is crucial for the RecordVideo wrapper to finalize the video file.
print("Environment closed. Video recording finalized.")

# --- Instructions for Downloading Video ---
print("\n--- Video Saved! ---")
print(f"Your video should be saved in the '{VIDEO_DIR}' directory on the Colab runtime disk.")
print("To download it:")
print("1. Click the 'Files' icon (folder icon) on the left sidebar in Colab.")
print(f"2. Navigate into the '{VIDEO_DIR}' folder.")
print(f"3. Look for a file named something like '{VIDEO_PREFIX}-episode-0.mp4'.")
print("4. Right-click on the video file and select 'Download'.")
print("\nRemember: Colab runtime disk is temporary. Download your video before the session ends!")

--- Starting Video Recording for ALE/Pong-v5 ---
Loading model from: ./data/pong_ppo_logs/pong_ppo_final_model.zip
Video will be saved to: ./pong_game_videos/
Model loaded successfully!
Environment 'ALE/Pong-v5' created and wrapped for video recording.

Starting game episode and recording video...


  logger.warn(



Game episode finished. Total reward: -16.0
Environment closed. Video recording finalized.

--- Video Saved! ---
Your video should be saved in the './pong_game_videos/' directory on the Colab runtime disk.
To download it:
1. Click the 'Files' icon (folder icon) on the left sidebar in Colab.
2. Navigate into the './pong_game_videos/' folder.
3. Look for a file named something like 'pong_agent_game-episode-0.mp4'.
4. Right-click on the video file and select 'Download'.

Remember: Colab runtime disk is temporary. Download your video before the session ends!
