In [95]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
import math
import pandas as pd
import numpy as np
import random
from collections import deque
import sys

DATA_PATH = './data/sspsbpniv.csv'
LEARNING_RATE = 0.01
REPLAY_BATCH_SIZE = 32

In [96]:
def load_data():
    df = pd.read_csv(DATA_PATH)
    df['Settlement Date'] = pd.to_datetime(df['Settlement Date'], dayfirst=True)
    df.rename(columns={'System Sell Price(£/MWh)':'Sell Price'}, inplace=True)
    # df['Winter'] = df['Settlement Date'].dt.month.isin([1, 2, 12])
    # df['Spring'] = df['Settlement Date'].dt.month.isin([3, 4, 5])
    # df['Summer'] = df['Settlement Date'].dt.month.isin([6,7, 8])
    # df['Autumn'] = df['Settlement Date'].dt.month.isin([9, 10, 11])
    # df['Weekend'] = df['Settlement Date'].dt.weekday.isin([5, 6])
    df['Period (sin norm)'] = np.sin((df['Settlement Period'] - 1 )* (2 * np.pi / 48))
    df['Period (cos norm)'] = np.cos((df['Settlement Period'] - 1 )* (2 * np.pi / 48))

    # shifted Ratio of Sell Price to Rolling average 
    df['SP/SMA'] = (df['Sell Price'] / df['Sell Price'].rolling(48).mean() ) - 1
    df.dropna(inplace=True)
    df.drop(['Settlement Date', 'System Buy Price(£/MWh)', 'Net Imbalance Volume(MWh)', 'Settlement Period'], inplace=True, axis=1)

    df.head()
    return df


In [97]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=''):
        self.state_size = state_size
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if model_name != '' else self._model()
        self.target_model = self._model() if not is_eval else None

    def _model(self):
        model = Sequential()
        # input layer
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        # hidden layers
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        # output layer
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(learning_rate=LEARNING_RATE))
        return model
    
    def act(self, state):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        if not self.is_eval and random.random()<= self.epsilon:
            return random.randrange(self.action_size)
        return np.argmax(self.model.predict(state.reshape(1, 5), verbose=0)[0])

    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])
    
    def replay(self):
        batch_size = REPLAY_BATCH_SIZE
        if len(self.memory) < batch_size:
            return
        # random sampling of memory
        samples = random.sample(self.memory, batch_size)
        # TODO review loop - model.predict is intended to operate on batches
        for sample in samples:
            state, action, reward, new_state, done = sample
            # what does the target model predict
            target = self.target_model.predict(state.reshape(1, 5))
            # if no new state in sample, then Q(S_t+1) = 0
            if(done):
                target[0][action] = reward
            else:
                q_new = max(self.target_model.predict(new_state.reshape(1, 5), verbose=0)[0])
                target[0][action] = reward + q_new *self.gamma
            self.model.fit(state.reshape(1, 5), target, epochs=1, verbose=0)
            
    def update_target(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i]
        self.target_model.set_weights(target_weights)
        

In [98]:
def formatPrice(n):
    return("-£" if n<0 else "£")+"{0:.2f}".format(abs(n))

In [103]:
BATTERY_MAX_CHARGE = 1000
BATTERY_CHARGE_RATE = 100
BATTERY_CHARGE_EFF = 0.9

class Environment:
    def __init__(self, states, prices) -> None:
        self.states = states
        self.prices = prices
        self.current_charge = 0
        self.max_charge = BATTERY_MAX_CHARGE
        self.charge_eff = BATTERY_CHARGE_EFF
        self.sc = self.charge_eff # sell cost/discharge efficiency
        self.bc = 1 / self.charge_eff #buy cost/charge efficiency
        self.sum_cost = 0
        self.profit = 0
        self.rate = BATTERY_CHARGE_RATE
        self.cur_index = 0
        self.length = states.shape[0] - 1
            
    def mean_value(self)->float:
        try:
            return self.sum_cost / self.current_charge
        except ZeroDivisionError:
            return 0
    
    def cur_price(self) -> float:
        return self.prices[self.cur_index]
    
    def comp_mean_to_current(self)->float:
        try: 
            return self.cur_price()/self.mean_value()
        except ZeroDivisionError:
            return 1
    
    def get_state(self):
        state = self.states.iloc[self.cur_index].to_numpy()
        return np.append(state, 
                         [self.current_charge / self.max_charge, 
                          self.comp_mean_to_current()])
    
    def get_new_state(self):
        self.cur_index += 1
        return self.get_state()
    
    def reset(self):
        self.cur_index = 47
        self.current_charge = random.randint(0, self.max_charge)
        self.sum_cost = self.current_charge * self.cur_price()
        self.profit = -self.sum_cost
    
        
    def step(self, action):
        reward = self.simple_reward(action, self.cur_price())
        return  (self.get_new_state(),
                 reward,
                 self.cur_index >= self.length)
    
    
    def simple_reward(self, action, price) -> float:
        reward = 0.
        if action == 0: # hold
            pass
        elif action == 1 and self.current_charge < self.max_charge: # buy
            charge = self.rate if self.current_charge + self.rate <= self.max_charge \
                else self.max_charge - self.current_charge
            cost = price * charge * self.bc
            self.sum_cost += cost
            self.profit -= cost
            self.current_charge += charge
            # print("Buy: " + formatPrice(price))
        elif action == 2 and self.current_charge > 0: # sell
            charge = self.rate if self.current_charge - self.rate >= 0 \
                else self.current_charge
            # we use the average value of held charge for reward
            average = self.mean_value()
            self.sum_cost -= average * charge
            self.current_charge -= charge
            sum_sell_price = price * charge * self.sc
            self.profit += sum_sell_price
            reward = max((sum_sell_price) - average, 0)
            # print("Sell: " + formatPrice(price) + " | Profit: " + formatPrice(reward))
        return reward
            
    def print_profit(self):
        print("--------------------------------")
        print("Total Profit: " + formatPrice(self.profit))
        print("--------------------------------")
            
    
    

In [100]:
def train(states, prices):
    env = Environment(states, prices)
    agent = Agent(5, False)
    
    models_path = []
    
    episode_count = 10
    l = len(states) - 1
    
    for e in range(episode_count + 1):
        print("Episode " + str(e) + "/" + str(episode_count))
        env.reset()
        state = env.get_state()
        total_profit = 0
        for t in range(l):
            action = agent.act(state)
            new_state, reward, done = env.step(action)
            agent.remember(state, action, reward, new_state, done)
            agent.replay()
            agent.update_target()
            state = new_state
            if done:
                env.print_profit()
    if e % 10 == 0:
        agent.model.save(str(e))
        models_path.append(str(e))
    
    return models_path

In [101]:

def eval(states, prices, model_path):
    env = Environment(states, prices)
    agent = Agent(5, True, model_path)
    l = len(states) - 1
    print("Model " + str(model_path))
    env.reset()
    state = env.get_state()
    total_profit = 0
    for t in range(l):
        action = agent.act(state)
        new_state, reward, done = env.step(action)

        state = new_state
        if done:
            env.print_profit()
    
    print("--------------------------------")
    print("Model " + str(model_path))
    print("Total Profit: " + formatPrice(env.profit))
    print("--------------------------------")
    


In [104]:
# main
df = load_data()
prices = df.pop('Sell Price')
states = df
models = train(states, prices)

for name in models:
    eval(states, prices, name)





Episode 0/10


  return self.sum_cost / self.current_charge




  return self.sum_cost / self.current_charge




KeyboardInterrupt: 