# Obtaining data for MA-DPG evaluation form example 02b


In [1]:
# Module imports
import os

import pandas as pd
import yaml
from sqlalchemy import create_engine

# assume module imports
import examples.examples as examples
from assume import World
from assume.scenario.loader_csv import load_scenario_folder, run_learning

## 1. Running example 02b

In [2]:
example = "small_learning_2"
db_uri = "sqlite:///../local_db/assume_db.db"
inputs_dir = "../inputs"
scenario = examples.available_examples[example]["scenario"]
study_case = examples.available_examples[example]["study_case"]

In [None]:
# Run example 02b
world = World(database_uri=db_uri, export_csv_path="../" + examples.csv_path)
load_scenario_folder(world, inputs_dir, scenario, study_case)
run_learning(
    world,
    inputs_dir,
    scenario,
    study_case,
)
world.run()

## 2. Retrieving the actions of the actors from the best run

In [None]:
# Best actors directory
best_actors_dir = os.path.join(
    inputs_dir,
    scenario,
    "learned_strategies",
    study_case,
    "avg_reward_eval_policies/actors/",
)
actors = os.listdir(best_actors_dir)
actors

### 2.1 Option 1: Retrieving the actions from the database:

In [4]:
# Get the config file
config_path = os.path.join(inputs_dir, scenario, "config.yaml")

# Read the number of validation episodes from the config file
with open(config_path) as file:
    config = yaml.safe_load(file)
learning_config = config[study_case]["learning_config"]
no_of_val_episodes = (
    learning_config["training_episodes"]
    - learning_config["episodes_collecting_initial_experience"]
) // learning_config.get("validation_episodes_interval", 5)

In [None]:
# Set up the database connection
db = create_engine(db_uri)
simulation = f"{scenario}_{study_case}_eval"

# Get the average reward for each episode in order to determine the best episode.
reward_df = pd.DataFrame(columns=["avg_reward"], index=range(1, no_of_val_episodes + 1))
for episode in range(1, no_of_val_episodes + 1):
    query = f"SELECT AVG(reward) as avg_reward FROM rl_params where simulation = '{simulation}_{episode}'"
    reward_df.at[episode, "avg_reward"] = pd.read_sql(query, db).values[0][0]
reward_df.head()

In [None]:
episode = reward_df["avg_reward"].idxmax()
query = f"SELECT datetime as dt, unit, actions_0, actions_1 FROM rl_params where simulation = '{simulation}_{episode}'"
actions_df = pd.read_sql(query, db)
actions_df

### 2.2 Option 2: Getting the actions through the model parameters

In [15]:
# Initialize the model
# obs_dim = len(feature_names)
# act_dim = 2  # Adjust if your model outputs a different number of actions
# model = MLPActor(obs_dim=obs_dim, act_dim=act_dim, float_type=th.float)

# # Path to actors we want to get actions from
# for actor in actors:
#     actor_path = os.path.join(best_actors_dir, actor)

#     # Load the trained model parameters
#     model_state = th.load(actor_path, map_location=th.device("cpu"))
#     model.load_state_dict(model_state["actor"])

#     actions = []
#     for obs in input_data:
#         obs_tensor = th.tensor(obs, dtype=th.float)
#         action = model(obs_tensor)
#         actions.append(action)

## 3. Getting the demand dataframe

In [None]:
demand_df = pd.read_csv(os.path.join(inputs_dir, scenario, "demand_df.csv"))
demand_df