In [1]:
from Cnn import CustomCNN
from utils.LayerActivationMonitoring import LayerActivationMonitoring, register_hooks
from utils.LayerActivationMonitoring import plot_activations
import EnvironmentConfigurations as EnvConfig

model_save_path = f"{EnvConfig.AGENT_MODEL_PATH_PREFIX}{EnvConfig.configurations[EnvConfig.CURRENT_CONFIGURATION_INDEX]['name']}"
tensorboard_log_path = f"{EnvConfig.TENSORBOARD_LOG_PATH_PREFIX}{EnvConfig.configurations[EnvConfig.CURRENT_CONFIGURATION_INDEX]['name']}"

env_params = {
    "env_config": EnvConfig.configurations[EnvConfig.CURRENT_CONFIGURATION_INDEX],
    "is_reward_shaping_on": True,
    "is_game_window_visible": False
}

evaluation_env_params = {
    "env_config": EnvConfig.configurations[EnvConfig.CURRENT_CONFIGURATION_INDEX],
    "is_reward_shaping_on": False,
    "is_game_window_visible": False
}

agent_params = {
    "tensorboard_log": tensorboard_log_path,
    "verbose": 1,
    "n_epochs": 3,
    "n_steps": 4096,
    "learning_rate": 1e-4,
    "batch_size": 64,
    "seed": 0,
    'policy_kwargs': {'features_extractor_class': CustomCNN}
}

KeyboardInterrupt: 

In [1]:
# RUN THE ENVIRONMENT IN PARALLEL MODE WITH 2 ENVS
from EnvironmentHelpers import create_vectorised_environment
from utils.AutomaticModelSavingCallback import AutomaticModelSavingCallback  
from utils.Initialisation import initialise_network_weights
from stable_baselines3 import PPO
from stable_baselines3.common import policies
from stable_baselines3.common.callbacks import EvalCallback

env = create_vectorised_environment(**env_params, n_envs=2)
evaluation_env = create_vectorised_environment(**evaluation_env_params, n_envs=1)

automatic_model_saving_callback = AutomaticModelSavingCallback(
            check_freq=EnvConfig.MODEL_SAVING_FREQUENCY,
            save_path=model_save_path)

evaluation_callback = EvalCallback(
            evaluation_env, 
            n_eval_episodes=10, 
            eval_freq=EnvConfig.EVALUATION_FREQUENCY,
            log_path=tensorboard_log_path,
            best_model_save_path=f'models/{EnvConfig.configurations[EnvConfig.CURRENT_CONFIGURATION_INDEX]["name"]}')


model = PPO(policies.ActorCriticCnnPolicy, env, device="cuda", **agent_params)
register_hooks(model)
initialise_network_weights(model.policy)
model.learn(total_timesteps=30000000, callback=[automatic_model_saving_callback, evaluation_callback])

# model = PPO.load(f"{model_save_path}/best_model", **agent_params)
# model.set_env(env)
# register_hooks(model)
# model.learn(total_timesteps=30000000, callback=[agentCallback, evaluation_callback], reset_num_timesteps=False)

NameError: name 'env_params' is not defined

In [6]:
env.close()
evaluation_env.close()

In [None]:
# EVALUATE ACTIVATIONS ON NEW ENVIRONMENT
from utils.Initialisation import initialise_network_weights

env = create_vectorised_environment(**env_params, n_envs=2)
# evaluation_env = create_vectorised_environment(**env_params, n_envs=1)

model = PPO(policies.ActorCriticCnnPolicy, env, **agent_params)

register_hooks(model)
initialise_network_weights(model.policy)

model.learn(total_timesteps=1024, callback=[LayerActivationMonitoring()])

plot_activations(model.policy.features_extractor.hooks)