# PPO with stable baseline

## Setup

In [1]:
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.logger import configure
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json

%cd ..
from src.microgrid_env import MicrogridEnv
from src.get_data import get_data_dict, get_test_dict
from src.utils import RenderCallback, NormalizeObservation
from src.params import cutin_windspeed, cutoff_windspeed

C:\Code\in5460-mex1


### Specify config

In [2]:
parameters = {
    "nr_households": 100,
    "region": ("CA", "AZ", "QR", "NV", "NM", "TX", "FL", "LA", "IA"),
    "wind": True,
    "wind_generator": False,
    "alternative_cost": False,
    "random": True,
    "save_name": "ppo_q2"
}

locals().update(parameters)

In [3]:
if random:
    total_timesteps = 1_000
    ent_coef = 1
    save_name += "_random"
else:
    total_timesteps = 75_000
    ent_coef = 0

parameters.update({"total_timesteps": total_timesteps, "ent_coef": ent_coef})
with open(f"output/configs/config_{save_name}.json", 'w') as json_file:
    json.dump(parameters, json_file, indent=4)

### Load config

In [4]:
save_name = "ppo_q1.1"

In [5]:
# Load config
with open(f"output/configs/config_{save_name}.json", 'r') as json_file:
    parameters = json.load(json_file)

locals().update(parameters)

### Load data

In [6]:
# Load data
data_dict = get_data_dict(nr_households, region)

# Define Microgrid Env
env = MicrogridEnv(data_dict, wind=wind, wind_generator=wind_generator, alternative_cost=alternative_cost)
#env = NormalizeObservation(env)

In [7]:
# Define the PPO agent
# https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#stable_baselines3.ppo.PPO
model = PPO("MlpPolicy", env, verbose=1, ent_coef=ent_coef) #, learning_rate=0.003
#model = A2C("MlpPolicy", env, verbose=1, ent_coef=ent_coef)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [8]:
# Set new logger
# Logger: https://stable-baselines3.readthedocs.io/en/master/common/logger.html
logs_path = f"output/logs/{save_name}"
new_logger = configure(logs_path, ["json"])
model.set_logger(new_logger)

## Training

In [9]:
# Train the agent
render_callback = RenderCallback(env)
model.learn(total_timesteps=total_timesteps, callback=render_callback, progress_bar=True)
train_df = pd.DataFrame.from_dict(render_callback.info)
train_df.to_csv(f"output/train_data/{save_name}.csv")

Output()

In [None]:
# Plot loss
with open(f"output/logs/{save_name}/progress.json", 'r') as file:
    data = [json.loads(line) for line in file]
loss_data_list = [{key: value for key, value in entry.items() if 'loss' in key} for entry in data]
df = pd.DataFrame(loss_data_list).iloc[1:].reset_index(drop=True)

print("Training time (min.)", data[-1]["time/time_elapsed"]/60)

In [None]:
if not random:
    i = 2
    df[df.columns[i]].plot(title=df.columns[i])
    plt.show()

In [None]:
# Plot mean rewards over training
#train_df = pd.read_csv(f"train_data/{save_name}.csv")
window_size = 10
reward_array = train_df.reward.to_numpy()

num_rows = -(-len(reward_array) // window_size)
padding_elements = num_rows * window_size - len(reward_array)
reward_array_padded = np.pad(reward_array, (0, padding_elements), mode='empty')
reward_array_aggr = reward_array_padded.reshape(-1, window_size)
reward_array_mean = np.mean(reward_array_aggr, axis=1)

plt.plot(reward_array_mean)
plt.title(f"Reward moving average over training process ({window_size=})")
plt.show()

## Model saving

In [None]:
# Save the trained model
model.save(f"output/models/model_{save_name}")

## Model loading and testing

In [None]:
# Get test data
test_dict = get_test_dict(nr_households, region, seed=123, sample_size=800)

# Define Microgrid Env
env = MicrogridEnv(test_dict, wind=wind, wind_generator=wind_generator, alternative_cost=alternative_cost)
#env = NormalizeObservation(env)

In [None]:
# Load the trained model
model = PPO.load(f"output/models/model_{save_name}")
#model = A2C.load(f"output/models/model_{save_name}")

In [None]:
# Test the trained agent
test_episodes = len(test_dict["energy_demand"])

obs = env.reset()
step_info = []

for _ in range(test_episodes):  # Test for 1000 steps
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)

    step_info.append(env.render())
    
step_df = pd.DataFrame.from_dict(step_info)

## Analysis

In [None]:
step_df.drop(step_df.columns[10:19], axis=1)

In [None]:
step_df.iloc[:,10:19]

In [None]:
# Count blackouts
print("absolute:", sum(step_df.energy_demand > step_df.energy_load))
print("relative:", sum(step_df.energy_demand > step_df.energy_load) / test_episodes)

In [None]:
plt.rcParams["figure.figsize"] = (10,6)

In [None]:
# Plotting energy produced
plt.plot(step_df['energy_generated_generator'], color='darkred', label='generator', alpha=0.7)
plt.plot(step_df['discharged'], color='yellow', label='battery', alpha=0.5)
plt.plot(step_df['energy_generated_solar'], color='orange', label='solar energy', alpha=1)
plt.plot(step_df['energy_generated_wind'], color='darkblue', label='wind energy', alpha=1)

# Adding labels and title
plt.xlabel('Time')
plt.ylabel('Energy (kWh)')
plt.title('Energy mix')

# Moving legend outside to the right and centering
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Show the plot
plt.show()

In [None]:
step_df.reward.describe()

In [None]:
plt.plot(step_df["reward"])
plt.title("Reward over test set")
plt.show()

## Close

In [None]:
# Close the environment when done
env.close()