# Config and package loading

In [None]:
from flowing_basin.core import Instance
from cornflow_client.core.tools import load_json
from flowing_basin.solvers.rl import RLEnvironment, RLConfiguration
import numpy as np
from stable_baselines3.common.env_checker import check_env
from datetime import datetime
from flowing_basin.core import Instance
from cornflow_client.core.tools import load_json
from flowing_basin.solvers.rl import RLConfiguration, RLTrain
from datetime import datetime
from matplotlib import pyplot as plt
import numpy as np

# Experiment parameters

In [None]:
### DO NOT REMOVE THIS CELL ###
"""
This cell contains the default Papermill parameters to run the notebook. Parameters passed outside the template will override the default values.
In case of error, edit the metadata on this cell to add the TAG 'parameters'. Papermill will interpret that these are the values to replace/use as defaults.
"""

PATH_CONSTANTS = "../data/constants/constants_2dams.json"
PATH_TRAIN_DATA = "../data/history/historical_data_clean_train.pickle"
PATH_TEST_DATA = "../data/history/historical_data_clean_test.pickle"

PLOT_TRAINING_CURVE = True
SAVE_OBSERVATIONS = True
PATH_OBSERVATIONS = "../analyses/rl_pca/observations_data/observationsO2.npy"
PATH_OBSERVATIONS_CONFIG = "../analyses/rl_pca/observations_data/observationsO2_config.json"

experiment_name = datetime.now().strftime('%Y-%m-%d %H.%M')

# ENVIRONMENT 1 (WITH INSTANCE 1)
constants = Instance.from_dict(load_json(PATH_CONSTANTS))

startups_penalty = 50
limit_zones_penalty = 50
mode="linear"
flow_smoothing=2
flow_smoothing_penalty=25
flow_smoothing_clip=False
action_type="exiting_flows"

features = [
    "past_vols", "past_flows", "past_variations", "future_prices",
    "future_inflows", "past_turbined", "past_groups", "past_powers", "past_clipped",
]

obs_box_shape=False
feature_extractor='mlp'
unique_features=["future_prices", ]
length_episodes=24 * 4 + 3
log_ep_freq=5
eval_ep_freq=5
num_episodes=100
eval_num_episodes=10
do_history_updates=False
do_history_updates=True
update_observation_record=True

# RL Experiment configuration

In [None]:
filepath_agent = f"../studies/{experiment_name}/RL_model.zip"
filepath_config = f"../studies/{experiment_name}/RL_model__config.json"
filepath_training = f"./studies/{experiment_name}/RL_model_training.json"

constants = Instance.from_dict(load_json(PATH_CONSTANTS))
config = RLConfiguration(
    startups_penalty=startups_penalty,
    limit_zones_penalty=limit_zones_penalty,
    mode=mode,
    flow_smoothing=flow_smoothing,
    flow_smoothing_penalty=flow_smoothing_penalty,
    flow_smoothing_clip=flow_smoothing_clip,
    action_type=action_type,
    features=features,
    obs_box_shape=False,
    feature_extractor=feature_extractor,
    unique_features=unique_features,
    num_steps_sight={
        ("past_flows", "dam1"): constants.get_verification_lags_of_dam("dam1")[-1] + 1,
        ("past_flows", "dam2"): constants.get_verification_lags_of_dam("dam2")[-1] + 1,
        "past_variations": 2, "future_prices": 16, "future_inflows": 16,
        "other": 1
    },
    length_episodes=length_episodes,
    log_ep_freq=log_ep_freq,
    eval_ep_freq=eval_ep_freq,
    eval_num_episodes=eval_num_episodes,
    do_history_updates=do_history_updates,
    update_observation_record=SAVE_OBSERVATIONS,
)


# Train agent

In [None]:

train = RLTrain(
    config=config,
    path_constants=PATH_CONSTANTS,
    path_train_data=PATH_TRAIN_DATA,
    path_test_data=PATH_TEST_DATA
)

train.solve(
    num_episodes=num_episodes,
    path_agent=filepath_agent,
    periodic_evaluation=True
)
train.plot_training_curve()

# Store configuration used
config.to_json(filepath_config)
print(f"Created JSON file '{filepath_config}'.")

# Store training curve data
train.save_training_data(filepath_training)
print(f"Created JSON file '{filepath_training}'.")

# Store results

In [None]:

# Plot training curve
if PLOT_TRAINING_CURVE:
    plt.show()
    print(train.model.policy)

# Save observation record for later PCA analysis
if SAVE_OBSERVATIONS:

    print("Observation record shape:", train.train_env.observation_record.shape)
    print("Observation record:", train.train_env.observation_record)

    np.save(PATH_OBSERVATIONS, train.train_env.observation_record)
    print(f"Created .npy file '{PATH_OBSERVATIONS}'.")

    config.to_json(PATH_OBSERVATIONS_CONFIG)
    print(f"Created JSON file '{PATH_OBSERVATIONS_CONFIG}'.")
