In [2]:
import subprocess
import sys

print("Installing dependencies for Kaggle environment...")

# Uninstall conflicting packages first
print("Removing conflicting packages...")
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", "-q", "keras", "tensorflow"], 
               stderr=subprocess.DEVNULL)

# Install compatible versions
dependencies = [
    "numpy<2.0",
    "scipy<1.14",
    "protobuf<4.0",
    "tensorboard==2.15.0",
    "stable-baselines3[extra]==2.3.2",
    "ale-py==0.8.1",
    "gymnasium[atari,accept-rom-license]==0.29.1",
    "shimmy[atari]",
]

for dep in dependencies:
    print(f"Installing {dep}...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", dep])

print("\nAll dependencies installed successfully")
print("\nVerifying installation...")
try:
    import ale_py
    import gymnasium as gym
    from stable_baselines3 import DQN
    print("All imports successful!")
except Exception as e:
    print(f"Import error: {e}")

Installing dependencies for Kaggle environment...
Removing conflicting packages...
Installing numpy<2.0...
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.0/61.0 kB 2.0 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 80.6 MB/s eta 0:00:00


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dopamine-rl 4.1.2 requires tensorflow>=2.2.0, which is not installed.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
tensorflow-decision-forests 1.11.0 requires tensorflow==2.18.0, which is not installed.
datasets 4.4.1 requires pyarrow>=21.0.0, but you have pyarrow 19.0.1 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
kaggle-environments 1.18.0 requires stable-baselines3==2.1.0, but you have stable-baselines3 2.3.2 which is incompatible.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
google-colab 1.0.0 requires notebook==6.5.7, but you have notebook 6.5.4 which is incompatible.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is inc

Installing scipy<1.14...
Installing protobuf<4.0...
Installing tensorboard==2.15.0...
Installing stable-baselines3[extra]==2.3.2...


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dopamine-rl 4.1.2 requires tensorflow>=2.2.0, which is not installed.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
tensorflow-decision-forests 1.11.0 requires tensorflow==2.18.0, which is not installed.
mkl-umath 0.1.1 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
mkl-random 1.2.4 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
mkl-fft 1.3.8 requires numpy<1.27.0,>=1.26.4, but you have numpy 2.2.6 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.6 which is incompatible.
datasets 4.4.1 requires pyarrow>=21.0.0, but you have pyarrow 19.0.1 which is incompatible.
onnx 1.18.0 requires protobuf>=4.25.1, but you have protobuf 3.20.3 which is incompatible.
ydata-profili

Installing ale-py==0.8.1...
Installing gymnasium[atari,accept-rom-license]==0.29.1...
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 953.9/953.9 kB 13.6 MB/s eta 0:00:00


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dopamine-rl 4.1.2 requires tensorflow>=2.2.0, which is not installed.
kaggle-environments 1.18.0 requires gymnasium==0.29.0, but you have gymnasium 0.29.1 which is incompatible.
kaggle-environments 1.18.0 requires shimmy>=1.2.1, but you have shimmy 0.2.1 which is incompatible.
kaggle-environments 1.18.0 requires stable-baselines3==2.1.0, but you have stable-baselines3 2.3.2 which is incompatible.
dopamine-rl 4.1.2 requires ale-py>=0.10.1, but you have ale-py 0.8.1 which is incompatible.
dopamine-rl 4.1.2 requires gymnasium>=1.0.0, but you have gymnasium 0.29.1 which is incompatible.


Installing shimmy[atari]...

All dependencies installed successfully

Verifying installation...
All imports successful!


In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings('ignore')

import ale_py
import gymnasium as gym
gym.register_envs(ale_py)
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np
import json
import torch
import gc

class LoggingCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(LoggingCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_lengths = []
        self.current_reward = 0
        self.current_length = 0
        
    def _on_step(self):
        self.current_reward += self.locals['rewards'][0]
        self.current_length += 1
        
        if self.locals['dones'][0]:
            self.episode_rewards.append(self.current_reward)
            self.episode_lengths.append(self.current_length)
            self.current_reward = 0
            self.current_length = 0
            
            if len(self.episode_rewards) % 10 == 0:
                gc.collect()
                torch.cuda.empty_cache()
        return True

CONFIG = {
    "lr": 0.0001,
    "gamma": 0.99,
    "batch_size": 32,
    "eps_start": 1.0,
    "eps_end": 0.01,
    "eps_decay": 0.1,
    "policy": "MlpPolicy"
}

print(f"Training Configuration 1:")
print(f"Learning Rate: {CONFIG['lr']}")
print(f"Gamma: {CONFIG['gamma']}")
print(f"Batch Size: {CONFIG['batch_size']}")
print(f"Epsilon: {CONFIG['eps_start']} -> {CONFIG['eps_end']} (decay: {CONFIG['eps_decay']})")

gc.collect()
torch.cuda.empty_cache()

env = gym.make("ALE/SpaceInvaders-v5", frameskip=4)
env = DummyVecEnv([lambda: env])

model = DQN(
    CONFIG["policy"],
    env,
    learning_rate=CONFIG["lr"],
    gamma=CONFIG["gamma"],
    batch_size=CONFIG["batch_size"],
    buffer_size=20000,
    learning_starts=5000,
    exploration_initial_eps=CONFIG["eps_start"],
    exploration_final_eps=CONFIG["eps_end"],
    exploration_fraction=CONFIG["eps_decay"],
    target_update_interval=500,
    train_freq=4,
    gradient_steps=1,
    tensorboard_log="./logs/mlp-policy-1/",
    verbose=1,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

callback = LoggingCallback()

model.learn(
    total_timesteps=300000,
    tb_log_name="SpaceInvaders_MlpPolicy_1",
    callback=callback,
    progress_bar=True
)

mean_reward, std_reward = evaluate_policy(
    model,
    env,
    n_eval_episodes=10,
    deterministic=True
)

results = {
    "config": CONFIG,
    "mean_reward": float(mean_reward),
    "std_reward": float(std_reward),
    "episode_rewards": [float(r) for r in callback.episode_rewards],
    "episode_lengths": [int(l) for l in callback.episode_lengths],
    "total_episodes": len(callback.episode_rewards)
}

model.save("/kaggle/working/train-mlp-policy-1")

with open("/kaggle/working/train-mlp-policy-1-results.json", "w") as f:
    json.dump(results, f, indent=2)

print(f"\nTraining Complete")
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")
print(f"Total Episodes: {len(callback.episode_rewards)}")
print(f"Model saved to: /kaggle/working/train-mlp-policy-1.zip")
print(f"Results saved to: /kaggle/working/train-mlp-policy-1-results.json")

env.close()
gc.collect()
torch.cuda.empty_cache()

Training Configuration 1:
Learning Rate: 0.0001
Gamma: 0.99
Batch Size: 32
Epsilon: 1.0 -> 0.01 (decay: 0.1)


A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to ./logs/mlp-policy-1/SpaceInvaders_MlpPolicy_1_1


Output()

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.936    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 891      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1926     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.865    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 939      |
|    time_elapsed     | 4        |
|    total_timesteps  | 4104     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.803    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 688      |
|    time_elapsed     | 8        |
|    total_timesteps  | 5968     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss           


Training Complete
Mean Reward: 120.50 +/- 33.35
Total Episodes: 478
Model saved to: /kaggle/working/train-mlp-policy-1.zip
Results saved to: /kaggle/working/train-mlp-policy-1-results.json


In [None]:
from IPython.display import FileLink
FileLink('train-cnn-policy-5.zip')