# Energy Storage System


In this notebook, we will demontrate how to train an RL agent for Energy Storage System (ESS) arbitrage. 

The simulated energy environment is created based on the paper [Arbitrage of Energy Storage in Electricity Markets with Deep Reinforcement Learning](https://arxiv.org/abs/1904.12232)

## Prerequisite

### Python Package
Ensure that you python vitural environment have installed the required python packages in `requirements.txt`

### Dataset
Download dataset from [here](https://aemo.com.au/en/energy-systems/electricity/national-electricity-market-nem/data-nem/aggregated-data) and placed into `data` folder as follows:

```
|-- data
|   `-- PRICE_AND_DEMAND_202106_NSW1.csv
```

You can choose to use one month of data or manually concatenate multiple months depending on your use cases.

In [None]:
# Execute this cell to download the sample data to a local file called data/sample-data.csv
!mkdir data/
!curl https://aemo.com.au/aemo/data/nem/priceanddemand/PRICE_AND_DEMAND_202103_NSW1.csv > data/sample-data.csv

## Battery Environment Simulator

We start by building a energy storage system environment.


You need to set `env_config={"LOCAL": True}` to use data from local src folder instead of S3.

In [None]:
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

from typing import List
from battery_env_sm import SimpleBattery
from report import Report, ReportIO, plot_reward, plot_analysis
from nbutils import evaluate_episode

import seaborn as sns    
import matplotlib.pyplot as plt
import numpy as np

env_config = {"MAX_STEPS_PER_EPISODE": 168, "LOCAL": True, "FILEPATH": "data/sample-data.csv"}

EPISODE = 3000

## (0) Random Agent Baseline

In [None]:
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm

# from battery import SimpleBattery


class SimpleAgent:
    """
    Random agent
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        action = np.random.choice([0, 1, 2])

        return action


if __name__ == "__main__":
    np.random.seed(1)
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    rewards_list = []
    history_list: List = []
    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")


In [None]:
fig = plot_reward(rewards_list)

## Evaluation

**Observation**

The agent action is totally random, regardless of price and cost.

In [None]:
df_eval

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
fig = plot_analysis(df_eval)

## (1) Market price vs cost agent

- SELL: when market price is higher than cost
- BUY: when market price is lower than cost
- HOLD: others

In [None]:
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm


class SimpleAgent:
    """
    What should be the initial initial energy costs?

    Buy: electric price < electric cost
    Sell: electric price > electric cost
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        electric_price = state[2]
        electric_cost = state[1]

        if electric_price > electric_cost:
            action = SimpleBattery.DISCHARGE
        elif electric_price < electric_cost:
            action = SimpleBattery.CHARGE
        else:
            action = SimpleBattery.HOLD

        return action


if __name__ == "__main__":
    np.random.seed(1)
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    rewards_list = []
    history_list: List = []

    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

        
    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")


In [None]:
fig = plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent discharge (sell:1) when price is higher than cost, and charge (buy:0)

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
fig = plot_analysis(df_eval)

In [None]:
df_eval.to_csv("result_price_vs_cost_agent.csv", index=False)

## (2) Market Price vs Historical price Agent

- SELL: when market price is higher than past 5 days average price
- BUY: when market price is lower than past 5 days average price
- HOLD: others

In [None]:
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm


class SimpleAgent:
    """
    Buy: market price < past last x days average price
    Sell: market price > past last x days average price
    """

    def __init__(self, actions: List):
        self.actions = actions

    def get_action(self, state):
        market_price = state[2]
        past_average_price = sum(state[-5:]) / len(state[-5:])

        if market_price > past_average_price:
            action = SimpleBattery.DISCHARGE
        elif market_price < past_average_price:
            action = SimpleBattery.CHARGE
        else:
            action = SimpleBattery.HOLD

        return action


if __name__ == "__main__":
    np.random.seed(1)
    env = SimpleBattery(env_config)
    agent = SimpleAgent([SimpleBattery.CHARGE, SimpleBattery.DISCHARGE, SimpleBattery.HOLD])
    rewards_list = []
    history_list: List = []

    for i in tqdm(range(EPISODE)):
        done = False
        state = env.reset()
        total_rewards = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            total_rewards += reward
            history_list.append([i] + [total_rewards] + [action] + state)
            state = next_state

        # print(f"Episode {i+1} ({env.counter}):{total_rewards}")
        rewards_list.append(total_rewards)

    average_reward = sum(rewards_list) / len(rewards_list)
    print(f"Average reward: {average_reward}")


In [None]:
fig = plot_reward(rewards_list)

**Observation**

    CHARGE = 0
    DISCHARGE = 1
    HOLD = 2
    
- Agent will start selling when market price is increasing (high than last 5 days average), and buy when market price is dropping.

In [None]:
np.random.seed(2)
df_eval = evaluate_episode(agent, env_config)
fig = plot_analysis(df_eval)

In [None]:
df_eval.to_csv("result_hist_price_agent.csv", index=False)

## (3) SageMaker RL - DQN

Next is to use DQN algorithm running on SageMaker RL. Please refer to separate notebook for more info.

