# 0. PIP Installs

In [None]:
#!pip install pygame
#!pip install numpy
#!pip install gymnasium
#!pip install pytorch (CHOOSE YOUR BEST OPTION ON WEB [CUDA, PLATFORM, ETC..]
#!pip install stable-baselines3[extra]

Last tested was running on Machine with this version state:

- PyGame: 2.5.2
- OS: Windows-10
- Python: 3.10.7
- Stable-Baselines3: 2.1.0
- PyTorch: 2.1.0+cu121
- GPU Enabled: True
- Numpy: 1.25.0
- Cloudpickle: 3.0.0
- Gymnasium: 0.29.1
- OpenAI Gym: 0.26.2


# 1. Imports

In [None]:
import paratroopergame
# Import environment base class for a wrapper 
from gymnasium import Env 

# Import the space shapes for the environment
from gymnasium.spaces import Discrete, Box
# Import numpy to calculate frame delta 
import numpy as np

from stable_baselines3 import DQN

from stable_baselines3.common.monitor import Monitor

from stable_baselines3.common.evaluation import evaluate_policy

from stable_baselines3.common.callbacks import BaseCallback

import os

from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack, VecTransposeImage

# 2. Directories

In [None]:
LOG_DIR = './logs/'
OPT_DIR = './models/'

# 3. (Optional) Normal Gaming (Play yourself, not necessary)

In [None]:
#Env wrapper not necessary here.  SHOOT WITH SPACEBAR!
game_instance = paratroopergame.GameSystem('Paratrooper game', paratroopergame.GAME_MODE_NORMAL, 'human')

game_instance.reset()


while(game_instance.isRunning()):
    retVal = game_instance.step()
    game_instance.render()
game_instance.close()




# 4. Game Model

In [None]:
class ParatrooperGym(Env): 
    def __init__(self, render_mode = 'human'):
        super().__init__()
        # Specify action space and observation space 
        self.render_mode = render_mode
        self.observation_space = Box(low=0.0, high=1.0, shape=(90, 160,), dtype=np.float16)
        self.action_space = Discrete(4)


        # Startup and instance of the game 
        self.game = paratroopergame.GameSystem('Paratrooper game', paratroopergame.GAME_MODE_EXT_ACTION, render_mode)
    
    def reset(self, seed = 0):
        super().reset(seed=seed)
        
        # Return the first frame 
        obs = self.game.reset(seed)
        
        self.LastDP = 0
        self.LastEP = 0
        self.LastMB = 0
        self.LastDA = 0

        info = {}

        info['none'] = 0
        
        return obs, info
    
    def step(self, action): 
        # Take a step 
        obs, done, trimmed, info = self.game.step(action)
        
        
        # Reshape the reward function
        reward = (info['DestroyedParatroopers'] - self.LastDP)*(5)
        reward += (info['EscapedParatroopers'] - self.LastEP)*(-10)


        self.LastDP = info['DestroyedParatroopers']
        self.LastEP = info['EscapedParatroopers']
        self.LastMB = info['MissedBullets']
        self.LastDA = info['DestroyedAircrafts']
        
        return obs, reward, done, trimmed, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

# 5. Callback Training

Don't worry if game window seems frozen, training is going on and can be checked on log folder during whole proccess. Don't close game window

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True


callback = TrainAndLoggingCallback(check_freq=20000, save_path=OPT_DIR)

# Create environment. AI tells game not to render or process window events, which makes FPS higher
env = ParatrooperGym('ai')
env = Monitor(env, LOG_DIR)

# Create algo 
model = DQN('MlpPolicy', env, tensorboard_log=LOG_DIR, learning_starts=10000, learning_rate=0.0001,  train_freq=1024, batch_size=1024, buffer_size = 20000)
model.learn(total_timesteps=200000, callback=callback)
env.close()

# 6. Test Model

In [None]:
# Human render tells game to render and process window event. Also render variable in evaluate_policy tells to call Env render() function periodically
env = ParatrooperGym('human')
env = Monitor(env, LOG_DIR)
model = DQN.load(os.path.join(OPT_DIR, 'best_model_200000.zip'))
mean_reward, _ = evaluate_policy(model, env, render=True, n_eval_episodes=5)
env.close()