# Energy Storage System


In this notebook, we will demontrate how to train an RL agent for Energy Storage System (ESS) arbitrage. 

The simulated energy environment is created based on the paper [Arbitrage of Energy Storage in Electricity Markets with Deep Reinforcement Learning](https://arxiv.org/abs/1904.12232)

## Prerequisite

### Python Package
Ensure that you python vitural environment have installed the required python packages in `requirements.txt`

### Dataset
Download dataset from [here](https://aemo.com.au/en/energy-systems/electricity/national-electricity-market-nem/data-nem/aggregated-data) and placed into `data` folder as follows:

```
|-- data
|   `-- PRICE_AND_DEMAND_202105_NSW1.csv
```

## Battery Environment Simulator

We start by building a energy storage system environment.




You need to set `env_config={"LOCAL": True}` to use data from local src folder instead of S3.

Use full year data: `"FILEPATH":"data/PRICE_AND_DEMAND_2020FULL_NSW1.csv"`


In [None]:
import sys
sys.path.append("src")
from battery_env_sm import SimpleBattery
from typing import List

## Helper functions

In [None]:
import seaborn as sns    
import matplotlib.pyplot as plt

def plot_reward(rewards_list:List):
    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")
    plt.figure(figsize=(20,5))
    ax = sns.lineplot(data=rewards_list)
    ax.set_ylabel('Mean reward per episode', fontsize=20)
    ax.set_xlabel('Iteration', fontsize=20)
    plt.axhline(y=average_reward, color='r')
    plt.show()

    
def plot_actions(df_history, episode:List =[0,1,2]):
    df_temp = df_history[df_history['episode'].isin(episode)]

    plt.figure(figsize=(20,5))
    sns.lineplot(data=df_temp[['cost','price']])

    plt.figure(figsize=(20,3))
    sns.scatterplot(data=df_temp[['action']])
    
    plt.figure(figsize=(20,3))
    sns.lineplot(data=df_temp[['energy']])
    
def plot_analysis(df_history, episode:List=None):
    if episode is not None:
        df_temp = df_history[df_history['episode'].isin(episode)]
    else:
        df_temp = df_history

    print(f"Average reward: {df_temp['reward'].sum():.02f}")
    plt.figure(figsize=(20,3))
    sns.lineplot(data=df_temp[['cost','price']])

    plt.figure(figsize=(20,3))
    sns.scatterplot(data=df_temp[['action']])

    plt.figure(figsize=(20,3))
    sns.lineplot(data=df_temp[['reward']])

    plt.figure(figsize=(20,3))
    sns.lineplot(data=df_temp[['total_reward']])
    
    plt.figure(figsize=(20,3))
    sns.lineplot(data=df_temp[['energy']])
    
    
def evaluate_episode(agent):
    """
    Run evaluation over a single episode.

    Input:
        agent: trained agent.
    """
    evaluation_list: List = []
    done = False
    env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH": "data/PRICE_AND_DEMAND_2020FULL_NSW1.csv"}
    env = SimpleBattery(env_config)
    state = env.reset()
    print(f"Index: {env.index}")
    total_rewards = 0

    while not done:
        action = agent.get_action(state)
        next_state, reward, done, info = env.step(action)
        total_rewards += reward
        evaluation_list.append([reward] + [total_rewards] + [action] + state)
        state = next_state

    df_cols = [
        "reward",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_eval = pd.DataFrame(evaluation_list, columns=df_cols)
    return df_eval


## (0) Random Agent Baseline

In [None]:
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm

# from battery import SimpleBattery


class SimpleAgent:
    """
    Random agent
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        action = np.random.choice([0, 1, 2])

        return action


if __name__ == "__main__":
    np.random.seed(1)

    env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH":"data/PRICE_AND_DEMAND_2020FULL_NSW1.csv"}
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    EPISODE = 100
    rewards_list = []
    history_list: List = []
    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")

    df_cols = [
        "episode",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_history = pd.DataFrame(history_list, columns=df_cols)
    print("df_history", df_history.shape)


In [None]:
plot_reward(rewards_list)

## Evaluation

**Observation**

The agent action is totally random, regardless of price and cost.

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent)
plot_analysis(df_eval)

In [None]:
df_eval['reward'].sum()

## (1) Market price vs cost agent

- SELL: when market price is higher than cost
- BUY: when market price is lower than cost
- HOLD: others

In [None]:
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm

# from battery import SimpleBattery


class SimpleAgent:
    """
    What should be the initial initial energy costs?

    Buy: electric price < electric cost
    Sell: electric price > electric cost
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        electric_price = state[2]
        electric_cost = state[1]

        if electric_price > electric_cost:
            action = SimpleBattery.DISCHARGE
        elif electric_price < electric_cost:
            action = SimpleBattery.CHARGE
        else:
            action = SimpleBattery.HOLD

        return action


if __name__ == "__main__":
    np.random.seed(1)

    env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH":"data/PRICE_AND_DEMAND_2020FULL_NSW1.csv"}
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    EPISODE = 100
    rewards_list = []
    history_list: List = []

    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

        
    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")
    
    df_cols = [
        "episode",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_history = pd.DataFrame(history_list, columns=df_cols)
    print("df_history", df_history.shape)


In [None]:
plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent discharge (sell:1) when price is higher than cost, and charge (buy:0)

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent)
plot_analysis(df_eval)

In [None]:
df_eval['reward'].sum()

## (2) Market Price vs Historical price Agent

- SELL: when market price is higher than past 5 days average price
- BUY: when market price is lower than past 5 days average price
- HOLD: others

In [None]:
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm

# from battery import SimpleBattery


class SimpleAgent:
    """
    Buy: market price < past last x days average price
    Sell: market price > past last x days average price
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        market_price = state[2]
        past_average_price = sum(state[-5:]) / len(state[-5:])

        if market_price > past_average_price:
            action = SimpleBattery.DISCHARGE
        elif market_price < past_average_price:
            action = SimpleBattery.CHARGE
        else:
            action = SimpleBattery.HOLD

        return action


if __name__ == "__main__":
    np.random.seed(1)

    env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH":"data/PRICE_AND_DEMAND_2020FULL_NSW1.csv"}
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    EPISODE = 100
    rewards_list = []
    history_list: List = []

    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")
    
    df_cols = [
        "episode",
        "total_reward",
        "action",
        "energy",
        "cost",
        "price",
        "price_t1",
        "price_t2",
        "price_t3",
        "price_t4",
        "price_t5",
    ]
    df_history = pd.DataFrame(history_list, columns=df_cols)
    print("df_history", df_history.shape)

In [None]:
plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent will start selling when market price is increasing (high than last 5 days average), and buy when market price is dropping.

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent)
plot_analysis(df_eval)

In [None]:
df_eval['reward'].sum()

## (3) SageMaker RL - DQN

Next is to use DQN algorithm running on SageMaker RL. Please refer to separate notebook for more info.

