## 1.Library Imports

In [None]:
# Import necessary libraries
import gym  
from stable_baselines3 import PPO  
from stable_baselines3.common.vec_env import VecFrameStack  
from stable_baselines3.common.evaluation import evaluate_policy 
from stable_baselines3.common.env_util import make_atari_env 
import os 

## 2.Environment Setup

In [None]:
# Create and configure the Atari environment
env = make_atari_env('ALE/Atlantis-v5', n_envs=4, seed=0)  
env = VecFrameStack(env, n_stack=4)  # Stack frames to provide temporal context to the model

## 3.Model Initialization

In [None]:
# Define the path for logging TensorBoard information
log_path = os.path.join('Training', 'Logs')

# Initialize the PPO model with a CNN policy
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

## 4.Training the Model

In [None]:
# Train the PPO model
model.learn(total_timesteps=2000000)

## 5.Model Saving and Loading

In [None]:
# Define the path for saving the trained model
ppo_path = os.path.join('Training', 'Saved Models', 'PPO_Atlantis_2MT')

In [None]:
# Save the trained model to the specified path
model.save(ppo_path)

In [None]:
# Clean up by deleting the current model instance
del model

In [None]:
# Load the trained model from the saved path
model = PPO.load(ppo_path, env)

In [None]:
# Reinitialize the environment for evaluation with a single instance
env = make_atari_env('ALE/Atlantis-v5', n_envs=1, seed=0)  # Evaluation requires only one environment instance
env = VecFrameStack(env, n_stack=4)  

## 6. Evaluation and Rendering

In [None]:
# Evaluate the performance of the trained model
evaluate_policy(model, env, n_eval_episodes=10, render=True)

## 7.Real-Time Interaction

In [None]:
# Import time library for real-time rendering
import time

# Reset the environment and start the loop for real-time gameplay
obs = env.reset()
while True:
    # Predict the next action based on the current observation
    action, _states = model.predict(obs)
    # Take the action and receive the next observation, reward, and done flag
    obs, rewards, dones, info = env.step(action)
    # Render the environment to visualize the gameplay
    env.render('human')
    # Pause briefly to slow down the rendering loop
    time.sleep(0.1)