This is a subset of `notebooks/00_battery_sim_notebook-2020.ipynb`: train & evaluate a random agent, then save the report.

In [None]:
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

import numpy as np
from typing import List
from battery_env_sm import SimpleBattery
from nbutils import Report, ReportIO, plot_reward, evaluate_episode, plot_analysis

env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH": "../../refdata/PRICE_AND_DEMAND_2020FULL_NSW1.csv"}

# Train

In [None]:
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm

class SimpleAgent:
    """
    Random agent
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        action = np.random.choice([0, 1, 2])

        return action


if __name__ == "__main__":
    np.random.seed(1)
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    EPISODE = 100
    rewards_list = []
    history_list: List = []
    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")

    df_cols = [
        "episode",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_history = pd.DataFrame(history_list, columns=df_cols)
    print("df_history", df_history.shape)

# Evaluation

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
df_eval['reward'].sum()

In [None]:
plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent discharge (sell:1) when price is higher than cost, and charge (buy:0)

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
plot_analysis(df_eval)

In [None]:
df_eval['reward'].sum()

In [None]:
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm

# from battery import SimpleBattery


class SimpleAgent:
    """
    Buy: market price < past last x days average price
    Sell: market price > past last x days average price
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        market_price = state[2]
        past_average_price = sum(state[-5:]) / len(state[-5:])

        if market_price > past_average_price:
            action = SimpleBattery.DISCHARGE
        elif market_price < past_average_price:
            action = SimpleBattery.CHARGE
        else:
            action = SimpleBattery.HOLD

        return action


if __name__ == "__main__":
    np.random.seed(1)
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    EPISODE = 100
    rewards_list = []
    history_list: List = []

    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")
    
    df_cols = [
        "episode",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_history = pd.DataFrame(history_list, columns=df_cols)
    print("df_history", df_history.shape)

In [None]:
plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent will start selling when market price is increasing (high than last 5 days average), and buy when market price is dropping.

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
plot_analysis(df_eval)

In [None]:
df_eval['reward'].sum()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("../refdata/PRICE_AND_DEMAND_2020FULL_NSW1.csv")
df["SETTLEMENTDATE"] = pd.to_datetime(df["SETTLEMENTDATE"])
df = df.resample("1h", on="SETTLEMENTDATE").mean()
df = df.reset_index(drop=False)
df = df.rename(columns={"TOTALDEMAND": "demand", "RRP": "price", "SETTLEMENTDATE": "time"})
# Remove outlier (> $100)
df = df[df['price']<=100]
print(df.shape)

In [None]:
fig, ax = plt.subplots(figsize=(15,5))
sns.lineplot(ax=ax, data=df, x='time',y='price', )


# Save reports

In [None]:
ReportIO('reports').save2(rewards_list, df_eval, close_fig=True)