# MsPacman

![MsPacman](https://media4.giphy.com/media/v1.Y2lkPTc5MGI3NjExYXl1dzV6ZzU3d25oODNieTUxNXhwMmY3aW42ZXV5ZGhjZWJrdmRobiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/WPVKBGZYiIdSU/giphy.webp)

Install main Gymnasium library, Atari 2600 game familiy dependencies, and other Python libraries we'll be using for data manipulation & visualization

In [None]:
!apt-get install -y swig cmake ffmpeg

In [None]:
!pip install gymnasium
!pip install "gymnasium[atari, accept-rom-license]"
!pip install shimmy

In [None]:
!sudo apt-get update
!sudo apt-get install -y python3-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

!pip install moviepy==1.0.3

In [None]:
import os
os.kill(os.getpid(), 9)

## Import

In [29]:
import gym
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from collections import deque
import numpy as np
import random
import datetime
import base64
import IPython

reward_number = 0.37

In [None]:
gym.envs.registration.registry.keys()

# CNN

In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.convolution1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.convolution2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5)
        self.convolution3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7)
        self.fc1 = nn.Linear(in_features=1792, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=256)
        self.fc4 = nn.Linear(in_features=256, out_features=32)
        self.fc5 = nn.Linear(in_features=32, out_features=9)
   
    def forward(self, x):
        x = x.cuda()
        x = F.relu(F.max_pool2d(self.convolution1(x), 3))
        x = F.relu(F.max_pool2d(self.convolution2(x), 3))
        x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
        x = x.reshape(x.size(0), - 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

In [3]:
model = CNN()
model = model.cuda()

In [4]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)

# DQN 

In [42]:
class DQNAgent:
    def __init__(self, action_size = 9):
        self.state_size = 4
        self.action_size = action_size
        self.memory_n = deque(maxlen=2000)
        self.memory_p = deque(maxlen=2000)
        self.gamma = 1.0    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.5
        self.epsilon_decay = 0.995
        self.learning_rate = 0.1
        self.model = model

    def remember(self, state, action, reward, next_state, done):
        if reward == 0:
            self.memory_p.append((state, action, reward, next_state, done))
        else:
            self.memory_n.append((state, action, reward, next_state, done))
            
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state_tensor = torch.from_numpy(state).float()
        act_values = self.model(state_tensor).cpu().detach().numpy()
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        if len(agent.memory_n) > batch_size / 2:
            minibatch_n = random.sample(self.memory_n, 5)
            minibatch_p = random.sample(self.memory_p, 59)
            minibatch = random.sample((minibatch_p+minibatch_n), batch_size)
        else:
            minibatch = random.sample(self.memory_p, batch_size)
        for state, action, reward, next_state, done in minibatch:
            # reward = 0.001 if reward == 0 else 0.001
            ns_model = self.model(torch.from_numpy(next_state).float()).cpu().detach().numpy()
            if reward == 0:
                reward = 1.0001
                # print("Reward:", reward)
                target = reward * np.amax(ns_model[0])
                # print("target: ", target)
                target_f = ns_model
                # print("target_f: ", target_f)
                # print('Argmax: ', np.argmax(ns_model[0]))
                target_f[0][np.argmax(ns_model[0])] = target 
                # print("target_f[0][np.argmax(ns_model[0])]: ", target_f)            
            else:
                reward = reward_number
                # print("Reward:", reward)
                target = reward * np.amin(ns_model[0])
                # print("target: ", target)
                target_max = 0.0001 * np.amax(ns_model[0])
                # print("target_max: ", target_max)
                target_f = ns_model
                # print("target_f: ", target_f)
                target_f[0][action] = target
                target_f[0][random.choice([i for i in range(0,9) if i not in [action]])] = target_max
                # print("target_f[0][several actions]: ", target_f)
            self.train(next_state, target_f, epochs=1)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def train(self, input, target, epochs = 1):
        input = torch.from_numpy(input).float().cuda()
        target = torch.from_numpy(target).float().cuda()
        y_pred = 0
        for t in range(1):
            y_pred = model(input)
            loss = - criterion(y_pred, target)
            # print(t, loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 

    def load_all(self, name):
        loaded = torch.load(name)
        self.memory_n = loaded['memory_n']
        self.memory_p = loaded['memory_p']
        self.model.load_state_dict(loaded['state'])
        
    def save_all(self, name):
        torch.save({'state': self.model.state_dict(),
                    'memory_n': self.memory_n,
                    'memory_p': self.memory_p
                   }, name)
        
    def load(self, name):
        self.model.load_state_dict(torch.load(name))
        
    def save(self, name):
        torch.save(self.model.state_dict(), name)

# Environment

Create MsPacman environment, visualize the environment.
Our seed resets the RNG of the environment and produces a random state upon resetting

In [41]:
env = gym.make('MsPacman-v4', render_mode='rgb_array')
state_size = env.observation_space.shape
action_size = env.action_space.n
print(state_size, action_size)

(210, 160, 3) 9


# Train Agent

In [49]:
# if you have already an agent
agent = DQNAgent()
agent.load('/kaggle/working/Pacman/Reward_number_0.37_Frames:_666_Episode_4_Date_2024-07-01 14:33:28.080896.pt')

In [None]:
agent = DQNAgent()

done = False
batch_size = 64

EPISODES = 50

for e in range(EPISODES):
    if e % 10 == 0:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        vw = cv2.VideoWriter('/kaggle/working/Pacman/' + "Reward_number_" + str(reward_number) + "_" + str(e) + str(datetime.datetime.now()) +  '.avi', fourcc, 4, (160,210))
    state, _ = env.reset()
    state = np.reshape(state, (1, 210,160,3)).transpose(0,3,1,2)/255
    for time in range(1000):
        if time % 100 == 0:
            print(time)
        action = agent.act(state)
        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        vw.write(next_state)        
        reward = reward if not done else 10
        reward = reward if reward ==0 else 10
        next_state = np.reshape(next_state, (1, 210,160,3)).transpose(0,3,1,2)/255
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            vw.release()
            agent.save('/kaggle/working/Pacman/' + 'agent.pt')
            print("episode: {}/{}, score: {}, e: {:.2}".format(e+1, EPISODES , time, agent.epsilon))
            break
        if (len(agent.memory_p) > batch_size) & (len(agent.memory_n) > batch_size/2) :
            agent.replay(batch_size)

# Evaluate Agent

In [37]:
def evaluate_agent(env, max_steps, n_eval_episodes, policy):
  """
  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.
  :param env: The evaluation environment
  :param n_eval_episodes: Number of episode to evaluate the agent
  :param policy: The Reinforce agent
  """
  episode_rewards = []
  for episode in range(n_eval_episodes):
    state, _ = env.reset()
    step = 0
    done = False
    total_rewards_ep = 0

    for step in range(max_steps):
      action = policy.act(state)
      new_state, reward, terminated, truncated, _ = env.step(action)
      done = terminated or truncated
      total_rewards_ep += reward

      if done:
        break
      state = new_state
    episode_rewards.append(total_rewards_ep)
  mean_reward = np.mean(episode_rewards)
  std_reward = np.std(episode_rewards)

  return mean_reward, std_reward

# Record Video

In [38]:
env = gym.make('MsPacman-v4', render_mode='rgb_array')
env = gym.wrappers.RecordVideo(env, f"videos/replay-pacman")
evaluate_agent(env, 10000, 10, agent)

  logger.warn(
  logger.warn(
  if not isinstance(terminated, (bool, np.bool8)):


Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4
Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4
Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4


(217.0, 74.30343195303969)

In [39]:
def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    b64 = base64.b64encode(video)
    tag = '''
    <video width="840" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())
    return IPython.display.HTML(tag)

embed_mp4('/kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4')

# Push to HF

In [35]:
from huggingface_hub import notebook_login
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.repocard import metadata_eval_result, metadata_save

from pathlib import Path
import datetime
import json
import imageio

import tempfile

In [36]:
notebook_login()
!git config --global credential.helper store

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [54]:
def push_to_hub(repo_id,
                model,
                hyperparameters,
                eval_env,
                video_path,
                video_fps=30
                ):
  """
  Evaluate, Generate a video and Upload a model to Hugging Face Hub.
  This method does the complete pipeline:
  - It evaluates the model
  - It generates the model card
  - It generates a replay video of the agent
  - It pushes everything to the Hub

  :param repo_id: repo_id: id of the model repository from the Hugging Face Hub
  :param model: the pytorch model we want to save
  :param hyperparameters: training hyperparameters
  :param eval_env: evaluation environment
  :param video_fps: how many frame per seconds to record our video replay
  """

  _, repo_name = repo_id.split("/")
  api = HfApi()

  # Step 1: Create the repo
  repo_url = api.create_repo(
        repo_id=repo_id,
        exist_ok=True,
  )

  with tempfile.TemporaryDirectory() as tmpdirname:
    local_directory = Path(tmpdirname)

    # Step 2: Save the model
    torch.save(model, local_directory / "model.pt")

    # Step 3: Save the hyperparameters to JSON
    with open(local_directory / "hyperparameters.json", "w") as outfile:
      json.dump(hyperparameters, outfile)

    # Step 4: Evaluate the model and build JSON
    mean_reward, std_reward = evaluate_agent(eval_env,
                                            hyperparameters["max_t"],
                                            hyperparameters["n_evaluation_episodes"],
                                            model)
    # Get datetime
    eval_datetime = datetime.datetime.now()
    eval_form_datetime = eval_datetime.isoformat()

    evaluate_data = {
          "env_id": hyperparameters["env_id"],
          "mean_reward": mean_reward,
          "n_evaluation_episodes": hyperparameters["n_evaluation_episodes"],
          "eval_datetime": eval_form_datetime,
    }

    # Write a JSON file
    with open(local_directory / "results.json", "w") as outfile:
        json.dump(evaluate_data, outfile)

    # Step 5: Create the model card
    env_name = hyperparameters["env_id"]

    metadata = {}
    metadata["tags"] = [
          env_name,
          "dqn",
          "reinforcement-learning",
          "custom-implementation",
          "deep-rl-class"
      ]

    # Add metrics
    eval = metadata_eval_result(
        model_pretty_name=repo_name,
        task_pretty_name="reinforcement-learning",
        task_id="reinforcement-learning",
        metrics_pretty_name="mean_reward",
        metrics_id="mean_reward",
        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
        dataset_pretty_name=env_name,
        dataset_id=env_name,
      )

    # Merges both dictionaries
    metadata = {**metadata, **eval}

    model_card = f"""
  # **DQN** Agent playing **{env_name}**
  Details see: https://www.kaggle.com/code/syedjarullahhisham/drl-huggingface-extra-unit-3-mspacmandqn-scratch
  """

    readme_path = local_directory / "README.md"
    readme = ""
    if readme_path.exists():
        with readme_path.open("r", encoding="utf8") as f:
          readme = f.read()
    else:
      readme = model_card

    with readme_path.open("w", encoding="utf-8") as f:
      f.write(readme)

    # Save our metrics to Readme metadata
    metadata_save(readme_path, metadata)

    # Step 6: Record a video
    import shutil
    dst_path = local_directory / "replay.mp4"
    shutil.copy(video_path, dst_path)

    # Step 7. Push everything to the Hub
    api.upload_folder(
          repo_id=repo_id,
          folder_path=local_directory,
          path_in_repo=".",
    )

    print(f"Your model is pushed to the Hub. You can view your model here: {repo_url}")

In [55]:
hyperparameters = {
    "h_size": 64,
    "n_training_episodes": 5,
    "n_evaluation_episodes": 10,
    "max_t": 10000000,
    "gamma": 1.0,
    "lr": 0.1,
    "env_id": "MsPacman-v4",
    "state_space": state_size,
    "action_space": action_size,
}

repo_id = "hishamcse/DQN-MsPacman-v4"
eval_env = gym.make('MsPacman-v4', render_mode='rgb_array')
eval_env = gym.wrappers.RecordVideo(eval_env, f"videos/replay-pacman")

push_to_hub(repo_id,
                agent, # The model we want to save
                hyperparameters, # Hyperparameters
                eval_env, # Evaluation environment
                "/kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4",
                video_fps=50,
                )

  logger.warn(
  logger.warn(


Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-0.mp4
Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-1.mp4
Moviepy - Building video /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4.
Moviepy - Writing video /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /kaggle/working/videos/replay-pacman/rl-video-episode-8.mp4


model.pt:   0%|          | 0.00/2.65M [00:00<?, ?B/s]

Your model is pushed to the Hub. You can view your model here: https://huggingface.co/hishamcse/DQN-MsPacman-v4
