In [1]:
import sys
import os
import json
import torch
import argparse
import warnings

from lightsim2grid import LightSimBackend
import grid2op
import numpy as np
from agent import Track2PowerNetAgent

from ppo.ppo import PPO
from ppo.nnpytorch import FFN

In [2]:
LINES = ['0_1_0', '0_2_1', '10_11_2', '69_70_3', '23_71_4', '70_71_5',
       '70_72_6', '69_73_7', '69_74_8', '68_74_9', '73_74_10', '75_76_11',
       '68_76_12', '1_11_13', '74_76_14', '76_77_15', '77_78_16',
       '76_79_17', '76_79_18', '78_79_19', '76_81_20', '81_82_21',
       '82_83_22', '82_84_23', '2_11_24', '83_84_25', '84_85_26',
       '84_87_27', '84_88_28', '87_88_29', '88_89_30', '88_89_31',
       '89_90_32', '88_91_33', '88_91_34', '6_11_35', '90_91_36',
       '91_92_37', '91_93_38', '92_93_39', '93_94_40', '79_95_41',
       '81_95_42', '93_95_43', '79_96_44', '79_97_45', '10_12_46',
       '79_98_47', '91_99_48', '93_99_49', '94_95_50', '95_96_51',
       '97_99_52', '98_99_53', '99_100_54', '91_101_55', '100_101_56',
       '11_13_57', '99_102_58', '99_103_59', '102_103_60', '102_104_61',
       '99_105_62', '103_104_63', '104_105_64', '104_106_65',
       '104_107_66', '105_106_67', '12_14_68', '107_108_69', '102_109_70',
       '108_109_71', '109_110_72', '109_111_73', '16_112_74', '31_112_75',
       '31_113_76', '26_114_77', '113_114_78', '13_14_79', '11_116_80',
       '74_117_81', '75_117_82', '11_15_83', '14_16_84', '3_4_85',
       '15_16_86', '16_17_87', '17_18_88', '18_19_89', '14_18_90',
       '19_20_91', '20_21_92', '21_22_93', '22_23_94', '22_24_95',
       '2_4_96', '24_26_97', '26_27_98', '27_28_99', '7_29_100',
       '25_29_101', '16_30_102', '28_30_103', '22_31_104', '30_31_105',
       '26_31_106', '4_5_107', '14_32_108', '18_33_109', '34_35_110',
       '34_36_111', '32_36_112', '33_35_113', '33_36_114', '36_38_115',
       '36_39_116', '29_37_117', '5_6_118', '38_39_119', '39_40_120',
       '39_41_121', '40_41_122', '42_43_123', '33_42_124', '43_44_125',
       '44_45_126', '45_46_127', '45_47_128', '7_8_129', '46_48_130',
       '41_48_131', '41_48_132', '44_48_133', '47_48_134', '48_49_135',
       '48_50_136', '50_51_137', '51_52_138', '52_53_139', '8_9_140',
       '48_53_141', '48_53_142', '53_54_143', '53_55_144', '54_55_145',
       '55_56_146', '49_56_147', '55_57_148', '50_57_149', '53_58_150',
       '3_10_151', '55_58_152', '55_58_153', '54_58_154', '58_59_155',
       '58_60_156', '59_60_157', '59_61_158', '60_61_159', '62_63_160',
       '37_64_161', '4_10_162', '63_64_163', '48_65_164', '48_65_165',
       '61_65_166', '61_66_167', '65_66_168', '46_68_169', '48_68_170',
       '68_69_171', '23_69_172', '7_4_173', '25_24_174', '80_79_175',
       '86_85_176', '115_67_177', '29_16_178', '37_36_179', '62_58_180',
       '63_60_181', '64_65_182', '64_67_183', '67_68_184', '80_67_185']

In [3]:
def train(env, agent, state_mean, state_std, hyperparameters, actor_model, critic_model):
    """
        Trains the model.
        Parameters:
            env - the environment to train on
            hyperparameters - a dict of hyperparameters to use, defined in main
            actor_model - the actor model to load in if we want to continue training
            critic_model - the critic model to load in if we want to continue training
        Return:
            None
    """ 
    print(f"Training", flush=True)

    # Create a model for PPO.
    model = PPO(env=env, agent=agent, policy_class=FFN, state_mean=state_mean, state_std=state_std, **hyperparameters)

    # Tries to load in an existing actor/critic model to continue training on
    if actor_model != '' and critic_model != '':
        print(f"Loading in {actor_model} and {critic_model}...", flush=True)
        model.actor.load_state_dict(torch.load(actor_model))
        model.critic.load_state_dict(torch.load(critic_model))
        print(f"Successfully loaded.", flush=True)
    elif actor_model != '' or critic_model != '': # Don't train from scratch if user accidentally forgets actor/critic model
        print(f"Error: Either specify both actor/critic models or none at all. We don't want to accidentally override anything!")
        sys.exit(0)
    else:
        print(f"Training from scratch.", flush=True)

    # Train the PPO model with a specified total timesteps
    # NOTE: You can change the total timesteps here, I put a big number just because
    # you can kill the process whenever you feel like PPO is converging
    model.learn(total_timesteps=200_000_000)

In [4]:
# NOTE: Here's where you can set hyperparameters for PPO. I don't include them as part of
# ArgumentParser because it's too annoying to type them every time at command line. Instead, you can change them here.
# To see a list of hyperparameters, look in ppo.py at function _init_hyperparameters

# Environment
warnings.filterwarnings("ignore")
backend = LightSimBackend()
env_name = 'l2rpn_neurips_2020_track2_small'
env = grid2op.make(env_name, backend=backend)
warnings.filterwarnings("default")


# Agent 
agent = Track2PowerNetAgent(env.action_space)

hyperparameters = {
    'timesteps_per_batch': 2048, 
    'max_timesteps_per_episode': 200, 
    'gamma': 0.99, 
    'n_updates_per_iteration': 10,
    'lr': 3e-4, 
    'clip': 0.2,
    'lines_attacked': LINES,
    'attack_duration': 10,
    'danger': 0.9,
    'state_dim': 3198
}


# Train or test, depending on the mode specified
state_mean = torch.zeros((1, 3868))
state_std = torch.ones((1, 3868))
train(env=env, agent=agent, state_mean=state_mean, state_std=state_std,
      hyperparameters=hyperparameters, actor_model='',
      critic_model='')

[32m[05-29 15:46:38 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
[32m[05-29 15:46:38 MainThread @machine_info.py:91][0m Cannot find available GPU devices, using CPU or other devices now.
Training
Training from scratch.
Learning... Running 200 timesteps per episode, 2048 timesteps per batch for a total of 200000000 timesteps

-------------------- Iteration #1 --------------------
Average Episodic Length: 41.46
Average Episodic Return: -40409.91
Average Loss: 0.09498
Timesteps So Far: 2073
Iteration took: 0.0 secs
------------------------------------------------------


-------------------- Iteration #2 --------------------
Average Episodic Length: 7.89
Average Episodic Return: -6967.98
Average Loss: -0.01921
Timesteps So Far: 4133
Iteration took: 0.0 secs
------------------------------------------------------


-------------------- Iteration #3 --------------------
Average Episodic Length: 5.03
Average Episodic Return: -3975.9

KeyboardInterrupt: 