CAUSAL RL
==============
DELAWARE INCORPORATION
COPYRIGHT (c) 2022. CCNets, Inc. All Rights reserved.
Author:
PARK, JunHo

"""
Causal RL Model Insights:

Explore detailed modeling results, including performance metrics and analyses, to understand the advancements in our models across various environments. 

These insights highlight algorithmic enhancements and overall progress in reinforcement learning capabilities.

For a deep dive into our latest modeling outcomes: https://wandb.ai/causal-rl/causal-rl/

This documentation serves as a guide to the significant strides made in the field, especially with GPT models in reinforcement learning.
"""


In [None]:
from __future__ import print_function

from utils.setting.env_settings import analyze_env
from utils.init import set_seed

set_seed()
ngpu = 2
from torch.utils.tensorboard import SummaryWriter

In [None]:
import torch
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

Available Environments: 
==============
    OpenAI Gymnasium(MuJoCo):
        "HalfCheetah-v4"
        "Hopper-v4"
        "Walker2d-v4"
        "Ant-v4"
        "HumanoidStandup-v4"
        "Humanoid-v4"
        "InvertedDoublePendulum-v4"
        "Reacher-v4"
        "Pusher-v4"
        
    Unity MLAgents(download link: https://drive.google.com/drive/folders/1TGSfw7IgfmVZslvmqIDLr5jAneQpsVbb?usp=sharing):
        locate the downloaded folder as below:
        your_projects/
            causal-rl/
            unity_environments/
        "3DBallHard"
        "Worm"
        "Crawler"
        "Walker"
        "Hallway"
        "PushBlock"
        "Pyramids"

In [None]:
# Analyze the specified environment to generate recommended RL parameters
# 'env_name': The name of the environment to analyze (e.g., OpenAI Gym environments or ML-Agents environments)
rl_params = analyze_env(env_name="Walker2d-v4")

 For configuration settings, check more details at utils/setting/rl_config.py

In [None]:
# Import configuration parameter classes for setting up the RL model
from utils.setting.rl_config import TrainingParameters, AlgorithmParameters, NetworkParameters, OptimizationParameters, NormalizationParameters
from nn.gpt import GPT

# Configure training parameters: adjust batch size, max steps, and buffer size for the RL training process
rl_params.training = TrainingParameters(batch_size=64, buffer_size=320000)

# Set algorithm parameters: Define sequence length, gamma, and lambda for advantage calculation in the RL algorithm
rl_params.algorithm = AlgorithmParameters(gpt_seq_length=16)

# Configure network parameters: Specify the architecture details of the neural network used in the RL model (e.g., GPT)
rl_params.network = NetworkParameters(num_layers=5, d_model=256, network_type=GPT)

# Configure optimization parameters including learning rate, minimum learning rate, and maximum gradient norm for gradient clipping
# These settings influence the speed and stability of the learning process
rl_params.optimization = OptimizationParameters(lr=1e-4, min_lr=1e-5)

# Initialize normalization parameters with default settings
# This setup affects how state, reward, and advantage values are normalized, impacting the training dynamics
rl_params.normalization = NormalizationParameters()

In [None]:
# Import the Causal RL class for running training and testing
from causal_rl import CausalRL

# Initialize the RL model with the specified parameters and options
with CausalRL(rl_params, device, use_print=True, use_wandb=False) as causal_rl:
    # Train the model with the given parameters
    # 'resume_training': Whether to resume from a saved checkpoint
    # 'use_eval': Whether to evaluate the model periodically during training
    # 'use_graphics': Whether to display graphical representations of training progress
    causal_rl.train(resume_training=False, use_eval=False, use_graphics=False)
    
    # Test the trained model over a specified number of episodes
    # 'max_episodes': The number of episodes to run during the test
    # 'use_graphics': Whether to display graphical representations of the agent's performance
    causal_rl.test(max_episodes=100, use_graphics=False)
