# Basic gym Environment and Data

In [1]:
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiDiscrete, MultiBinary
from gym.spaces import flatdim, flatten_space, unflatten, flatten

import numpy as np
import sys
import pandas as pd
import random
from collections import OrderedDict

import logging
from importlib import reload
reload(logging)
logging.basicConfig(stream=sys.stdout, format='')

## Data Preparation

In [22]:
# Renewables 
renewables_df = pd.read_csv("data/clean/renewables.csv", sep = ";").set_index("time")

# Tenders (Demand) 
tenders_df = pd.read_csv("data/clean/tenders_all.csv", sep = ";", index_col = 0)
tenders_df.index = pd.to_datetime(tenders_df.index)
tenders_df = tenders_df.rename(columns={'TOTAL_DEMAND_[MW]': "total"})
#TODO: outsourcing of data cleaning to data cleaning notebook 

# Bids (Offers)
bids_df = pd.read_csv("data/clean/bids_all.csv", sep = ";", index_col = 0).set_index("SLOT_START", drop = True)
bids_df.index = pd.to_datetime(bids_df.index)
bids_df["indivisible"] = bids_df['NOTE'].str.contains(r'INDIVISIBLE', na=False)
bids_df = bids_df.rename(columns={'OFFERED_CAPACITY_PRICE_[EUR/MW]': 'price', 'OFFERED_CAPACITY_[MW]': 'size', "COUNTRY" : "country", "SETTLEMENTCAPACITY_PRICE_[EUR/MW]": "settlement_price"})
bids_df["size"] = bids_df["size"].astype(int)
bids_df = bids_df[["size", "price", "country", "settlement_price",  "indivisible"]]

# get time features
time_features_df = pd.read_csv("data/clean/time_features.csv", sep = ";", index_col = 0)

hist_window_size = 1 # in days
forecast_window_size = 1 # in days
first_slot_index = "2020-12-31 22:00:00+00:00"
first_slot_index = "2021-10-30 22:00:00+00:00"

last_slot_index = "2021-12-29 19:00:00+00:00"
frame_bound = (first_slot_index, last_slot_index)

## Environment Definition

In [23]:
class VPPBiddingEnv(Env):
    
    def __init__(self,
                 renewables_df, 
                 bids_df,
                 tenders_df,
                 time_features_df,
                 hist_window_size,
                 forecast_window_size,
                 frame_bound,
                 log_level
                ):
        
        logger = logging.getLogger()
        logger.setLevel(log_level)
        logging.debug("log level = debug")
        logging.info("log level = info")
        logging.warning("log level = warning")

        
        # data 
        self.renewables_df = renewables_df
        self.tenders_df = tenders_df
        self.bids_df = bids_df
        self.time_features_df = time_features_df
        self.total_slot_FCR_demand = None
        
        # window_size
        self.hist_window_size = hist_window_size
        self.forecast_window_size = forecast_window_size
        
        self.hydro_df, self.wind_df = self._process_data()
        
        # episode
        self.first_slot_date = pd.to_datetime(frame_bound[0])
        self.last_slot_date = pd.to_datetime(frame_bound[1])
        
        # slot start , gate closure, auction time 
        self.lower_slot_start_boundary = self.first_slot_date 
        self.gate_closure = pd.to_datetime(self.tenders_df[self.lower_slot_start_boundary:]["GATE_CLOSURE_TIME"][0])
        self.slot_start = self.tenders_df[self.lower_slot_start_boundary:].index[0]
        self.bid_submission_time = self.gate_closure - pd.offsets.DateOffset(hours = 1)
        
        self.initial = True
        self.done = None
        self.total_reward = 0.
        self.total_profit = 0.
        self.history = None
        
        # Slots 
        self.slots_won = [0, 0, 0, 0, 0, 0]
        self.slot_prices_DE = [0., 0., 0., 0., 0., 0.]
                
        # Spaces
        
        # Observation Space
        obs_low = np.float32(np.array([0.0] * 96)) #96 timesteps to min 0.0
        obs_high = np.float32(np.array([1.0] * 96)) #96 timesteps to max 1.0
        
        # Create a nested observation space with all observations inside
        self.observation_space = Dict({
            "hydro_historic": Box(obs_low, obs_high, dtype=np.float32),
            "wind_historic":  Box(obs_low, obs_high, dtype=np.float32),
            "hydro_forecast": Box(obs_low, obs_high, dtype=np.float32),
            "wind_forecast": Box(obs_low, obs_high, dtype=np.float32),
            "predicted_market_prices":  Box(low=0.0, high=np.float32(4257.07), shape=(6,), dtype=np.float32), # for each slot, can be prices of same day last week 
            "weekday": Discrete(7), # for the days of the week
            "week": Discrete(54),  # for week of the year
            "month": Discrete(12),
            "isHoliday": Discrete(2), # holiday = 1, no holiday = 0
            "followsHoliday": Discrete(2), # followsHoliday = 1, no followsHoliday = 0
            "priorHoliday": Discrete(2), # priorHoliday = 1, no priorHoliday = 0
            "slots_won": MultiBinary(6), #boolean for each slot, 0 if loss , 1 if won 
            "slot_prices_DE": Box(low=0.0, high=np.float32(4257.07), shape=(6,), dtype=np.float32)
            })
        
        self.observation = None
        
        
        # Action Space
        
        # VERSION 3
        
        # Convert complex action space to flattended space
        
        # 12 values from  min 0.0
        action_low = np.float32(np.array([0.0] * 12)) 
        # 6 values to max 25.0 = the bid sizes 
        # 6 values to max 100. = the bid prices
        action_high = np.float32(np.array([25.0] * 6 + [100.0]*6)) 
        self.action_space = Box(low=action_low, high=action_high, shape=(12,), dtype=np.float32)
        
        # VERSION 2 
        
        '''# Convert complex action space to flattended space
        # bid sizes =  6 DISCRETE slots from 0 to 25  = [ 25, 25, 25, 25, 25 , 25]  = in flattened = 150 values [0,1]
        # bid prizes = 6 CONTINUOUS slots from 0 to 100  = [ 100., 100., 100., 100., 100. , 100.]  = in flattened = 150 values [0,1]

        # 156 values from  min 0.0
        action_low = np.float32(np.array([0.0] * 156)) 
        #150 values to max 1.0 = the bid sizes 
        # +6 values to max 100. = the bid prices
        action_high = np.float32(np.array([1.0] * 150 + [100.0]*6)) 
        self.action_space = Box(low=action_low, high=action_high, shape=(156,), dtype=np.float32)'''

        # VERSION 1
        
        
        '''
        self.complex_action_space = Tuple((
            # INFO: TSOs allow divisible and indivisible bids. Biggest divisible bid was 188 MW , maximum price was 4257.07 
            #MultiDiscrete([ 188, 188, 188, 188, 188 , 188]),
            MultiDiscrete([ 25, 25, 25, 25, 25 , 25]),
            #Box(low=0.0, high=np.float32(4257.07), shape=(6,), dtype=np.float32)))
            Box(low=0.0, high=np.float32(100.), shape=(6,), dtype=np.float32)))
        
        #flatten_action_space_64 = flatten_space(self.complex_action_space)
        #self.action_space = flatten_action_space_64

        
        #logging.debug(flatten_action_space_64)
        #logging.debug(type(flatten_action_space_64))
        #logging.debug("#" *42)
        
        #flattened_action = flatten(self.complex_action_space, self.complex_action_space.sample())
        #logging.debug(flattened_action)

        #unflattened_action = unflatten(self.complex_action_space, flattened_action)
        #logging.debug(unflattened_action)'''


    
    
    def reset(self):
        
        if self.initial is False: 
            self.lower_slot_start_boundary = self.lower_slot_start_boundary  + pd.offsets.DateOffset(days=1)
            self.gate_closure = pd.to_datetime(self.tenders_df[self.lower_slot_start_boundary:]["GATE_CLOSURE_TIME"][0])
            self.slot_start = self.tenders_df[self.lower_slot_start_boundary:].index[0]
            self.bid_submission_time = self.gate_closure - pd.offsets.DateOffset(hours = 1)
            
            logging.info("new self.lower_slot_start_boundary = " + str(self.lower_slot_start_boundary))
            logging.info("self.gate_closure = " + str(self.gate_closure))
            logging.info("self.slot_start = " + str(self.slot_start))
            logging.info("self.bid_submission_time = " + str(self.bid_submission_time))

        self.total_slot_FCR_demand = self.tenders_df[str(self.slot_start):]["total"][0] 
        self.done = False

        # reset for each episode 
        self._get_new_timestamps()
        
        # get new observation
        self._get_observation()
        
        self.slots_won = [0, 0, 0, 0, 0, 0]
        self.slot_prices_DE = [0., 0., 0., 0., 0., 0.]
        
        # self.slots_won = np.array([0, 0, 0, 0, 0, 0], dtype=np.int32)
        # self.slot_prices_DE = np.array([0., 0., 0., 0., 0., 0.], dtype=np.float32)
        
        # when first Episode is finished, set boolean.  
        self.initial = False
        
        return self.observation
                
    
    def _get_new_timestamps(self):
                
        self.historic_data_start = self.bid_submission_time - pd.offsets.DateOffset(days=self.hist_window_size)
        self.historic_data_end =  self.bid_submission_time - pd.offsets.DateOffset(minutes = 15)
        logging.debug("self.historic_data_start = " + str(self.historic_data_start))
        logging.debug("self.historic_data_end = " + str(self.historic_data_end))
        
        self.forecast_start = self.slot_start
        self.forecast_end = self.forecast_start + pd.offsets.DateOffset(days=self.forecast_window_size) - pd.offsets.DateOffset(minutes=15) 
        logging.debug("self.forecast_start = " + str(self.forecast_start))
        logging.debug("self.forecast_end = " + str(self.forecast_end))

        self.market_start = self.slot_start
        self.market_end = self.market_start + pd.offsets.DateOffset(hours=20)
        logging.debug("self.market_start = " + str(self.market_start))
        logging.debug("self.market_end = " + str(self.market_end))

        self.slot_date_list = self.tenders_df[self.market_start:][0:6].index
        
        '''self.slot_date_list = []
        slot_date = self.market_start 
        for i in range(0,6):
            self.slot_date_list.append(str(slot_date))
            slot_date = slot_date + pd.offsets.DateOffset(hours=4)  '''
            
        logging.debug(" self.slot_date_list = " + str( self.slot_date_list))
    
    def _get_observation(self):
        
        if (self.done is False) and (self.initial is False):
            self.observation["slots_won"] = np.array(self.slots_won, dtype=np.int32)
            self.observation["slot_prices_DE"] = np.array(self.slot_prices_DE, dtype=np.float32)
            
            
        if (self.done is True) or (self.initial is True):
            hydro_historic = self.hydro_df[str(self.historic_data_start) : str(self.historic_data_end)].to_numpy(dtype=np.float32)
            wind_historic = self.wind_df[str(self.historic_data_start) : str(self.historic_data_end)].to_numpy(dtype=np.float32)
        
            hydro_forecast = self.hydro_df[str(self.forecast_start) : str(self.forecast_end)].to_numpy(dtype=np.float32)
            wind_forecast =  self.wind_df[str(self.forecast_start) : str(self.forecast_end)].to_numpy(dtype=np.float32)
        
            logging.debug("daylightsaving")
            logging.debug(self.hydro_df[str(self.forecast_start) : str(self.forecast_end)].head(20))
            logging.debug(self.wind_df[str(self.forecast_start) : str(self.forecast_end)].head(20))

            predicted_market_prices = np.array([ 10.0, 30.0, 20.0, 30.0, 10.0, 10.0], dtype=np.float32) # TODO: naive prediction: retrieve price of same day last week 
            # or: self.observation_space["predicted_market_prices"].sample()
            time_features = self.time_features_df[str(self.market_start) : str(self.market_end)]
            logging.debug(self.time_features_df[str(self.market_start) : str(self.market_end)])
            
            weekday = int(time_features["weekday"][0])
            week = int(time_features["week"][0])
            month = int(time_features["month"][0])
            isHoliday = int(time_features["is_holiday"][0])
            followsHoliday = int(time_features["followsHoliday"][0])
            priorHoliday = int(time_features["priorHoliday"][0])
        
            slots_won =  np.array(self.slots_won, dtype=np.int32)
            slot_prices_DE = np.array(self.slot_prices_DE, dtype=np.float32)
        
            self.observation = OrderedDict({
                "hydro_historic": hydro_historic,
                "wind_historic": wind_historic,
                "hydro_forecast": hydro_forecast,
                "wind_forecast": wind_forecast,
                "predicted_market_prices": predicted_market_prices,
                "weekday": weekday, 
                "week": week, 
                "month": month,
                "isHoliday": isHoliday, 
                "followsHoliday": followsHoliday,
                "priorHoliday": priorHoliday,
                "slots_won": slots_won,
                "slot_prices_DE": slot_prices_DE
                })
            
    
    
    def step(self, action):
        
        # convert action list with shape (12,) into dict
        action_dict = {
            "size": action[0:6], 
            "price": action[6:]
        }
        
        # take the bid out of the action of the agent and resimulate the market clearing algorithm
        self._simulate_market(action_dict)
        
        # calculate reward from state and action 
        step_reward = self._calculate_reward(action_dict)
        
        self.total_reward += step_reward
    
        info = dict(
            bid_submission_time = str(self.bid_submission_time),
            step_reward = round(step_reward,2),
            total_reward = round(self.total_reward,2),
            total_profit = round(self.total_profit,2)
        )
        self._update_history(info)
        
        self.done = True
        self._get_observation()

        
        return self.observation, step_reward, self.done, info
    
    
    def _calculate_reward(self, action_dict):        
        # Step 1 of Reward Function: The Auction
        # did the agent win the auction? 
        # what was the revenue ?
        
        step_reward = 0
        
        # per slot won: + 100
        # per slot won: + (bid size *  marginal prize)
        # per slot lost: -100

        
        for slot in range(0, len(self.slots_won)):
            if self.slots_won[slot] == 0:
                step_reward -= 100
            if self.slots_won[slot] == 1:
                # award the agent for a won slot
                step_reward += 100
                # further etract the bid size of the agent 
                agents_bid_size = action_dict["size"][slot]
                # and calculate the reward by multiplying the bid size with the settlement price of the slot
                step_profit = (agents_bid_size * self.slot_prices_DE[slot])
                self._update_profit(step_profit)
                step_reward +=  step_profit
            logging.debug("step_reward = " + str(step_reward))
        
        # further rewards? 
            # diff to the settlement price
            # diff to the max. forecasted capacity of the VPP
            # incentive to go nearer to settlement price or forecasted capacity can be: 1- (abs(diff_to_capacity)/max_diff_to_capacity)^0.5
        
        
        # Step 2 of Reward Function: The Provision
        # could the VPP provide the required capacity in the Provision phase? 
        
        # ALternative solution: 
        # A reward function, that combines penalty and delivered FCR: 
        # compensation = (60 minutes   - penalty minutes / 60 ) * price * size 
        # penalty  = (penalty minutes / 60 ) * price * size 
        # reputation_damage = reputation_factor *  penalty_min/ 60 * size
            # penalty_min = number of minutes where capacity could not be provided
        # in total: r = compensation − penalty − reputation_damage,
        
        return step_reward
    
    
    def _update_profit(self, step_profit):
        
        self.total_profit += step_profit
        
    
    def _update_history(self, info):
        if not self.history:
            self.history = {key: [] for key in info.keys()}

        for key, value in info.items():
            self.history[key].append(value)

            
    def render(self):
        # TODO: Implement visulisation
        pass
    
    
    def _process_data(self):
        hydro_df = self.renewables_df.loc[:, 'Hydro1']
        wind_df = self.renewables_df.loc[:, 'WP1']
        # TODO: add more power plants
        return hydro_df, wind_df
    

    def _simulate_market(self, action_dict):
        
        auction_bids = self.bids_df[self.market_start : self.market_end]
        logging.debug("auction_bids = ")        
        logging.debug(self.bids_df[self.market_start : self.market_end])
        
        logging.info("Bid Submission time (D-1) = %s" % (self.bid_submission_time))
        logging.info("Gate Closure time (D-1) = %s" % (self.gate_closure))
        logging.info("Historic Data Window: from %s to %s " % (self.historic_data_start, self.historic_data_end))
        logging.info("Forecast Data Window: from %s to %s " % (self.forecast_start, self.forecast_end))

        
        for slot in range(0, len(self.slot_date_list)):
            slot_date = self.slot_date_list[slot]
            logging.info("Current Slot Time: (D) = %s" % (slot_date)) 
            slot_bids = auction_bids[slot_date : slot_date].reset_index(drop=True).reset_index(drop=False)
            logging.debug("slot_bids = " + str(slot_bids))
            slot_bids_list = slot_bids.to_dict('records')
            logging.debug("slot_bids_list = " + str(slot_bids_list))
            # extract the bid size out of the agents action
            # ROUND TO FULL INTEGER
            agents_bid_size = round(action_dict["size"][slot])
            # extract the bid price out of the agents action
            agents_bid_price = action_dict["price"][slot]
            logging.info("agents_bid_size = %s" % (agents_bid_size))
            logging.info("agents_bid_price = %s" % (agents_bid_price))            
            # get settlement price
            settlement_price_DE = [bid['settlement_price'] for bid in slot_bids_list if bid['country']== "DE"][0] 
            logging.info( "settlement_price_DE : " + str(settlement_price_DE))
            
            # First check if agents bid price is higher than the settlement price of Germany 
            # OR if agents bid size is 0 
            if (agents_bid_price > settlement_price_DE) or (agents_bid_size == 0):
                # if it is higher, the slot is lost. 
                self.slots_won[slot] = 0
                # set settlement price for the current auctioned slot in slot_prices_DE list
                self.slot_prices_DE[slot] = settlement_price_DE
            else: 
                # If agents bid price is lower than settlement price (bid could be in awarded bids)
                # get CBMP of countries without LMP
                unique_country_bids = list({v['country']:v for v in slot_bids_list}.values())
                grouped_prices = [x['settlement_price'] for x in unique_country_bids]
                cbmp = max(set(grouped_prices), key = grouped_prices.count)
                logging.info( "cbmp : " + str(cbmp))
                # check if settlement_price_DE is same as CBMP (no limit constraints where hit)
                if cbmp == settlement_price_DE:
                    price_filter = cbmp
                    logging.debug("DE has CBMP")
                else: 
                    # if Germany has a price based on limit constraints
                    price_filter = settlement_price_DE
                    logging.debug("DE has LMP")
                                
                # as the probability is high that the agents bid moved the last bid out of the list, 
                # we have to check which bids moved out of the list and what is the new settlement price
                
                # sort the bid list based on the price
                slot_bids_list_sorted_by_price = sorted(slot_bids_list, key=lambda x: x['price'])
                # filter the bid list by the settlement price of either the CBMP or the LMP of germany 
                #slot_bids_prices_filtered = [bid['price'] for bid in slot_bids_list_sorted_by_price if bid['settlement_price']== price_filter]
                #logging.debug(slot_bids_prices_filtered)
                slot_bids_filtered = [bid for bid in slot_bids_list_sorted_by_price if bid['settlement_price']== price_filter]
                accumulated_replaced_capacity = 0
                
                slot_bids_filtered_size_sum = sum([bid['size'] for bid in slot_bids_filtered])
                    # for the case the action_dict space is not dynamic and agent can choose any bid size,
                    # it needs to be checked here if 
                if agents_bid_size >= slot_bids_filtered_size_sum:
                    logging.debug("unrealistic bid size")
                    # set auction won to false
                    self.slots_won[slot] = 0
                    # set settlement price to zero as it is an unrealistic auciton
                    self.slot_prices_DE[slot] = 0
                else:
                    for bid in range(0, len(slot_bids_filtered)): 
                        logging.debug("bid size = " + str(slot_bids_filtered[-(bid+1)]["size"]))
                        logging.debug("bid price = " + str(slot_bids_filtered[-(bid+1)]["price"]))
                        bid_capacity = slot_bids_filtered[-(bid+1)]["size"]
                        accumulated_replaced_capacity += bid_capacity
                        logging.debug("accumulated_replaced_capacity = " + str( accumulated_replaced_capacity))
                            
                        if accumulated_replaced_capacity >= agents_bid_size:
                            logging.debug("realistic bid size")
                            if slot_bids_filtered[-(bid+1)]["indivisible"] is False:
                                logging.debug("bid is divisible, so current bids price is new settlement price")
                                new_settlement_price_DE = slot_bids_filtered[-(bid+1)]["price"]
                            else:
                                logging.debug("bid is INDIVISIBLE, so move one bids further is new settlement price")
                                accumulated_replaced_capacity -= bid_capacity
                                continue
                            logging.info("new_settlement_price_DE = " + str( new_settlement_price_DE))
                            # set boolean for auction win
                            self.slots_won[slot] = 1
                            # set settlement price for the current auctioned slot in slot_prices_DE list
                            self.slot_prices_DE[slot] = new_settlement_price_DE
                            break

            logging.info("self.slots_won = ")
            logging.info("\n".join("won: \t{}".format(k) for k in self.slots_won))
            logging.info("     agents bid_size = ")
            logging.info("\n".join("size: \t{}".format(round(k) )for k in action_dict["size"]))            
            logging.info("self.slot_prices_DE = ")
            logging.info("\n".join("price: \t{}".format(k) for k in self.slot_prices_DE))
                            
           

### Initilize Environment

In [24]:
env = VPPBiddingEnv(renewables_df = renewables_df,
                    bids_df = bids_df,
                    tenders_df = tenders_df,
                    time_features_df = time_features_df,
                    hist_window_size = hist_window_size,
                    forecast_window_size = forecast_window_size,
                    frame_bound = frame_bound,
                    log_level = "DEBUG" # "DEBUG" , "INFO" or  "WARNING"
                   )

log level = debug
log level = info


### Run Episodes

In [25]:
episodes = 5
score = 0


for episode in range(1, episodes+1):
    logging.info('Start of Episode:{} '.format(episode))
    observation = env.reset()

    
    # timestep defined as: 1 step = 1 day.
    for timestep in range(1):
        #env.render()
        #logging.info(observation)
        action = env.action_space.sample()

        observation, reward, done, info = env.step(action)
        score+=reward
        if done:
            logging.warning('Episode: {}  Score: {}  Info: {}'.format(episode, round(score,2), info))
            break
env.close()

Start of Episode:1 
self.historic_data_start = 2021-10-29 05:00:00+00:00
self.historic_data_end = 2021-10-30 04:45:00+00:00
self.forecast_start = 2021-10-30 22:00:00+00:00
self.forecast_end = 2021-10-31 21:45:00+00:00
self.market_start = 2021-10-30 22:00:00+00:00
self.market_end = 2021-10-31 18:00:00+00:00
 self.slot_date_list = DatetimeIndex(['2021-10-30 22:00:00+00:00', '2021-10-31 03:00:00+00:00',
               '2021-10-31 07:00:00+00:00', '2021-10-31 11:00:00+00:00',
               '2021-10-31 15:00:00+00:00', '2021-10-31 19:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='SLOT_START', freq=None)
daylightsaving
time
2021-10-30 22:00:00+00:00    0.007877
2021-10-30 22:15:00+00:00    0.007877
2021-10-30 22:30:00+00:00    0.007502
2021-10-30 22:45:00+00:00    0.007502
2021-10-30 23:00:00+00:00    0.007877
2021-10-30 23:15:00+00:00    0.007502
2021-10-30 23:30:00+00:00    0.007877
2021-10-30 23:45:00+00:00    0.007877
2021-10-31 00:00:00+00:00    0.007502
2021-10-31 00:

IndexError: list index out of range

## Stable Baselines

In [None]:
from stable_baselines3.common.env_checker import check_env

env = VPPBiddingEnv(renewables_df = renewables_df,
                    bids_df = bids_df,
                    tenders_df = tenders_df,
                    time_features_df = time_features_df,
                    hist_window_size = hist_window_size,
                    forecast_window_size = forecast_window_size,
                    frame_bound = frame_bound,
                   )
# It will check your custom environment and output additional warnings if needed
check_env(env)


In [None]:
from stable_baselines3 import PPO
from stable_baselines3.ppo import MultiInputPolicy

model = PPO(MultiInputPolicy, env, verbose=0)

In [None]:
def evaluate(model, episodes=100, deterministic=True):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    env = model.get_env()
    all_episode_rewards = []
    score = 0

    for episode in range(1, episodes+1):
        logging.info('Start of Episode:{} '.format(episode))
        episode_rewards = []
        done = False
        obs = env.reset()
        while not done:
            action, _states = model.predict(obs, deterministic=deterministic)
            obs, reward, done, info = env.step(action)
            episode_rewards.append(reward)
        logging.info('Episode:{} Score:{} Info:{}'.format(episode, sum(episode_rewards), info))

        all_episode_rewards.append(sum(episode_rewards))

    mean_episode_reward = np.mean(all_episode_rewards)
    logging.info("Mean reward:", mean_episode_reward, "Num episodes:", num_episodes)
    return mean_episode_reward

In [None]:
evaluate(model)
env.close()

# 2. Create a Deep Learning Model with Keras

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
states_hydro_historic = env.observation_space["historic_data"]["hydro_historic"].shape
states_wind_historic = env.observation_space["historic_data"]["wind_historic"].shape

actions = env.action_space[0].shape

In [None]:
display(states_hydro_historic)
display(states_wind_historic)
display(actions)

In [None]:
def build_model(states, actions):
    model = Sequential()
    # flatten? 
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
del model 


In [None]:
model = build_model(states, actions)


In [None]:
model.summary()


# 3. Build Agent with Keras-RL


In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=True)


# 4. Reloading Agent from Memory


In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)


In [None]:
del model
del dqn
del env

In [None]:
from gym.envs.registration import register

register(
    id='vpp-v0',
    entry_point='gym_foo.envs:FooEnv',
)


In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')


In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)


# Archive


In [None]:
flatdim(env.observation_space)
flatten_space(env.complex_action_space)


flattened_datapoint = flatten(env.complex_action_space, env.complex_action_space.sample())
display(flattened_datapoint)

unflattened_datapoint = unflatten(env.complex_action_space, env.action_space.sample())
display(unflattened_datapoint)

# check if flattened data point is in space

flatten(env.observation_space, env.observation_space.sample())  in flatten_space(env.observation_space)


In [None]:
    def _simulate_market(self, action):
        
        # market clearing algorithm:
        
        # for each slot 
        # get all bids
        # bids to dict
        # add bid from action 
        # bring in order by price 
        # accumulate capacities until demand is filled 
        # check if bid is in bid list 
            # if yes, set auciton_won = True and get SETTLEMENTCAPACITY_PRICE
            # if no, set auciton_won = False
        
        
        
        
        
        
        
        ################################################
        
        
        # TODO: Market clearing algorithmus neu schreiben, Angebote aller Länder (ausser DÄNEMARK ?? ) müssen berücksichtigt werden, um gesatm demand zu füllen , erst dann steht preis für slot fest. 
        
        
        
        # for each slot 
        # get all bids
        # bids to dict
        # add bid from action 
        # bring in order by price
        # accumulate capacities until demand is filled  FOR ALL COUNTRIES
            # indivisible flag needs to be included and checked: indivisible offer needs to be fully included
            # check for every country, if Core Portion and export limit are satisfied 
                # for each country 
                    # at least the core portion needs to be satisfied
                    # if capacity is less than total demand, the settlement price for an underfilled country is the price of the last accepted bid
                # at most the export limit needs to be satisfied (all bids for Country - demand of country)
        
        # set prices for all countries
        
        
        ################################################
        
        # The optimisation algorithm calculates the optimal combination of FCR bids to be awarded under consideration of core shares and the maximum exchangeable FCR volumes (export limits of a country) with the goal to reduce total procurement cost of the cooperation. 
        
        # 1. If no export limits or core share constraint are hit, one cross-border marginal price (CBMP) will be determined equalling the most expensive awarded bid in the overall cooperation. 
        # Exceptions from having one CBMP may occur once export limits or core share constraint of one or more countries of the cooperation are hit. In this case, an LMP will be determined based on the local awarded bids within a country.
        
        

        def add_bid_to_acceppted_bids(bid,
                                     accepted_bids,
                                     sorted_bids_list_by_price,
                                     accumulated_capacities,
                                     LMPi=False):
            country_prefix = bid["country"]
            # add the capacity of the bid to the accumulated capacity of a single country
            accumulated_capacities[country_prefix + "_capacity"]  += bid["offered"]

            # add the capacity of each bid to the accumulated capacity
            accumulated_capacities["total_capacity"] += bid["offered"]
            if LMPi: 
                # if bid is evaluated for an LMPi
                accumulated_capacities[country_prefix + "_LMPi"] = bid["price"]
            else:
                # if it is a normal bid, the bids price is the new CBMP 
                accumulated_capacities["CBMP"] = bid["price"]
            # add the bid to the accepted bids list
            bid["allocated"] = bid["offered"]
            accepted_bids.append(bid)
            #print('bid["index"] = %s ' % (bid["index"]))
            #print("agents_bid_index = %s" % (agents_bid_index))
            #print("accumulated_capacities["total_capacity"] = %s" % (accumulated_capacities["total_capacity"]))

            # remove bid from list to not iterate over it again when searching for limit constraint replacement bids
            sorted_bids_list_by_price.remove(bid)

            return accepted_bids, sorted_bids_list_by_price, accumulated_capacities
            
        
        auction_bids = self.bids_df[self.market_start : self.market_end]
        country_constraints = self.tenders_df[self.market_start : self.market_end]

        
        
        for slot in range(0, len(self.slot_date_list)):
            slot_date = self.slot_date_list[slot]
            print("slot_date = %s" % (slot_date))
            slot_bids = auction_bids[slot_date : slot_date].reset_index(drop=True).reset_index(drop=False)
            bids_list = slot_bids.to_dict('records')
            
            slot_constraints = country_constraints[slot_date : slot_date].reset_index(drop=True).reset_index(drop=False)
            slot_constraints = slot_constraints.to_dict('records')[0]            

            #print("bids_list = ")
            #print("\n".join(" \t{}".format(k) for k in bids_list))
            
            # get the lenght of the list ot create an index fo the agents bid that now will be added
            agents_bid_index = len(bids_list)
            
            
            # extract the bid size out of the agents action
            # agents_bid_size = action[0][slot]
            agents_bid_size = 10

            
            # extract the bid price out of the agents action
            #agents_bid_price = action[1][slot]
            agents_bid_price = 0
            
            
            print("agents_bid_size = %s" % (agents_bid_size))
            print("agents_bid_price = %s" % (agents_bid_price))
            print("agents_bid_index = %s" % (agents_bid_index))
            # add the selected bid from the agent to the list of all bids
            bids_list.append({'index': agents_bid_index, 'offered': agents_bid_size, 'price': agents_bid_price, "country": "DE", "indivisible": False})
            # sort the list based on the price to later accumulate all bids' capacity (but ordered on price)
            sorted_bids_list_by_price = sorted(bids_list, key=lambda x: x['price'])
            
            #print("sorted_bids_list_by_price = ")
            #print("\n".join(" \t{}".format(k) for k in sorted_bids_list_by_price))
            
            country_list = list(set([x['country'] for x in sorted_bids_list_by_price]))
            LMPi_list = []
            accepted_bids = []
            slot_finished = False
            
            # CBMP = cross-border marginal price
            # LMPi = Local Marginal Price of importing country
            accumulated_capacities = {
                 'total_capacity': 0,
                 'CBMP': 0,
                 'DE_capacity': 0,
                 'DE_export': 0,
                 'DE_core': 0,
                 'DE_LMPi': 0,
                 'BE_capacity': 0,
                 'BE_export': 0,
                 'BE_core': 0,
                 'BE_LMPi': 0,
                 'FR_capacity': 0,
                 'FR_export': 0,
                 'FR_core': 0,
                 'FR_LMPi': 0,
                 'NL_capacity': 0,
                 'NL_export': 0,
                 'NL_core': 0,
                 'NL_LMPi': 0,
                 'AT_capacity': 0,
                 'AT_export': 0,
                 'AT_core': 0,
                 'AT_LMPi': 0,
                 'CH_capacity': 0,
                 'CH_export': 0,
                 'CH_core': 0,
                 'CH_LMPi': 0,
                 'SI_capacity': 0,
                 'SI_export': 0,
                 'SI_core': 0,
                 'SI_LMPi': 0,
                 'DK_capacity': 0,
                 'DK_export': 0,
                 'DK_core': 0,
                 'DK_LMPi': 0,
            }

            for bid in sorted_bids_list_by_price[:]:
                # check if LMPi_list containts countries that need to be checked
                if LMPi_list:
                    # check if current bid is from country
                    if bid["country"] != LMPi_list[0]:
                        # if not, go to next bid 
                        continue
                    # if bid is from country
                    else:
                        # add bid to accepted bids
                        accepted_bids, sorted_bids_list_by_price, accumulated_capacities = add_bid_to_acceppted_bids(bid,
                                                                                                                     accepted_bids,
                                                                                                                     sorted_bids_list_by_price,
                                                                                                                     accumulated_capacities,
                                                                                                                     LMPi = True
                                                                                                                    )
                        # after bid was added, remove country from LMPi list 
                        LMPi_list.pop(0)
                        
                        
                        
                # add bid to accepted bids
                accepted_bids, sorted_bids_list_by_price, accumulated_capacities = add_bid_to_acceppted_bids(bid,
                                                                                                            accepted_bids,
                                                                                                            sorted_bids_list_by_price,
                                                                                                            accumulated_capacities,
                                                                                                            LMPi = False)
                    
                # 2.1 Case of hitting a limit constraint
                # It is important to understand that an export limit or core share constraint is hit whenever it influences the solution and not only when the quantity awarded in a country is exactly equal to the respective limit quantity of that country.
                
            
                if accumulated_capacities["total_capacity"] >= self.total_slot_FCR_demand:
                    # if accumulated_capacities["total_capacity"] is bigger than the demand, the last indvisible offer(s) need to be reduced
                    
                    # 2.1.2 Check if core share of every country is hit
                    for country in country_list: 
                        print("accumulated_capacities for " + country + ": " +  str(accumulated_capacities[country + "_capacity"]))
                        print("core constraint for " + country + ": " +  str(slot_constraints[country + "_core"]))
                        if accumulated_capacities[country + "_capacity"] < slot_constraints[country + "_core"]: 
                            print("CORE SHARE TOO SMALL FOR COUNTRY: " + country)
                            
                            del accepted_bids[-1]
                            accumulated_capacities["total_capacity"] -= bid["offered"]
                            LMPi_list.append(country)
                            
                            # TODO: set CBMP for all other countries. 
                            
                            # continue step continues for loop 
                            continue
                            
                            # TODO: for every core-underfilled country the capacity needs to be filled and the LMPi has to be found 
                        
                    # TODO: set allocated price of all bids :   bid["allocated"
                    
                    # TODO: Check Over Procurement 
                    if accumulated_capacities["total_capacity"] > self.total_slot_FCR_demand: 
                        # get the overfilled capacity (difference)
                        overfilled_capacity = accumulated_capacities["total_capacity"] - self.total_slot_FCR_demand
                        # get list of accepted bids that are divisible (= that is not indivisible)
                        accepted_bids_divisible = [bid for bid in accepted_bids if not bid['indivisible']]
                        # get last accepted bid that is divisible
                        accepted_bids_divisible[-1]
                        
                        # TODO: proceed with over procurement 
                        
                        # TODO: place slot_finished boolean somewhere
                        slot_finished = True
                    
                    # TODO: check if a country has a CBMP or LMPi 
                    
                    # last accepted bid sets settlement price of auction
                    settlement_price = bid["price"]
                    # set settlement price for the current auctioned slot in slot_prices list
                    self.slot_prices[slot] = settlement_price
                    
                    if agents_bid_index in [x['index'] for x in accepted_bids]:
                        # set boolean for auction win
                        self.slots_won[slot] = 1
                    
                    print("accumulated_capacities['total_capacity'] = %s" % (accumulated_capacities["total_capacity"]))
                    print("self.slots_won = ")
                    print("\n".join("won: \t{}".format(k) for k in self.slots_won))
                    print("self.slot_prices = ")
                    print("\n".join("price: \t{}".format(k) for k in self.slot_prices))
                    
                    
                    
                if slot_finished: 
                    break
                    

In [None]:
 '''
                # add the selected bid from the agent to the list of all bids
                bids_list.append({'index': agents_bid_index, 'offered': agents_bid_size, 'price': agents_bid_price, "country": "DE", "indivisible": False})
                # sort the list based on the price to later accumulate all bids' capacity (but ordered on price)
                sorted_bids_list_by_price = sorted(bids_list, key=lambda x: x['price'])


                # as we dont habe enought data to simulate a realistic market clearing algorithm,
                # we calcualte a new settlement price with
                #prices = [x['price'] for x in accepted_bids]
                #diffs = np.diff(np.array(list_a))
                #mean_diff = sum(diffs) / len(diffs)
                '''

In [None]:
# ----------------------
# other way of representing observation

'''
next_observation = Dict({
    'historic_data': Dict({
        "hydro_historic": Box(low, high, dtype=np.float32)
        "wind_historic":  Box(low, high, dtype=np.float32)
    }),
    'forecast_data':  Dict({
        "hydro_forecast": Box(low, high, dtype=np.float32),
        "wind_forecast": Box(low, high, dtype=np.float32),
        "soc_forecast": Box(low, high, dtype=np.float32)
        # TODO should I keep the Battery state of charge? 
    }),
    'market_data':  Dict({
        "market_demand": Discrete(3), # for the demands 573, 562 and 555 MW
        # TODO for 2021 its always 562, how to handle differetn years? maybe set it as a global constant? 

        "predicted_market_prices":  Box(low=0.0, high=1634.52, shape=(6, 1), dtype=np.float32), # for each slot, can be prices of same day last week 
    }),
    'time_features':  Dict({
        "weekday": Discrete(7), # for the days of the week
        "holiday": Discrete(2), # holiday = 1, no holiday = 0
        "month": Discrete(12), # for the month
    })
})
'''


In [None]:
demand = [{'index': 0, 'total': 1409.0, 'DE_demand': 562.0, 'DE_export': 168.0, 'DE_core': 169.0, 'BE_demand': 87.0, 'BE_export': 100.0, 'BE_core': 27.0, 'FR_demand': 508.0, 'FR_export': 152.0, 'FR_core': 153.0, 'NL_demand': 114.0, 'NL_export': 100.0, 'NL_core': 35.0, 'AT_demand': 71.0, 'AT_export': 100.0, 'AT_core': 22.0, 'CH_demand': 67.0, 'CH_export': 100.0, 'CH_core': 21.0, 'SI_demand': "nan", 'SI_export': "nan", 'SI_core': "nan", 'DK_demand': "nan", 'DK_export': "nan", 'DK_core': "nan"}]
demand = demand[0]
demand["DE_demand"]

In [None]:
demand

In [None]:
accepted_bids = [{'index': 0, 'offered': 11, "allocated" : 10, 'price': 0.0, 'country': 'DE', 'indivisible': False}
,{'index': 1, 'offered': 1, "allocated" : 10,'price': 0.0, 'country': 'DE', 'indivisible': True}
,{'index': 2, 'offered': 1,"allocated" : 10, 'price': 0.0, 'country': 'FR', 'indivisible': True}
,{'index': 3, 'offered': 4,"allocated" : 10, 'price': 0.0, 'country': 'DE', 'indivisible': False}]

country_tenders = [{"DE": 
                    {"total": 0, "export": 11, "core" : 10},
                    "BE": 
                    {"total": 0, "export": 11, "core" : 10}}]

country_capacity =  {"DE": 555, "BE": 200}        

# go through all accepted bids in reversed order sorted by price
for bid in reversed(accepted_bids):
    # if an order is divisible and can be divided...
    if not bid["indivisible"]:
        # check if the offered capacity of the bid is a minimum of 1 bigger than the overfilled_capacity (so it can be substracted)
        if bid["offered"] > overfilled_capacity: 
            difference_to_core = country_tenders[bid["country"]]["core"]
            overfilled_capacity
        bid["allocated"] = bid["allocated"]-1
        
        break

print(accepted_bids)


In [None]:
accepted_bids = [{'index': 0, 'offered': 11, "allocated" : 10, 'price': 1.0, 'country': 'DE', 'indivisible': False}
,{'index': 1, 'offered': 1, "allocated" : 10,'price': 1.0, 'country': 'BE', 'indivisible': True}
,{'index': 2, 'offered': 1,"allocated" : 10, 'price': 2.0, 'country': 'FR', 'indivisible': True}
,{'index': 3, 'offered': 4,"allocated" : 10, 'price': 1.0, 'country': 'DE', 'indivisible': False}]


In [None]:
accepted_bids

In [None]:
display([bid['price'] for bid in accepted_bids if bid['country']== "DE"])
display([bid['price'] for bid in accepted_bids if bid['country']== "DE"][-1])


In [None]:
set(([x['country'] for x in accepted_bids]))
(([x['country'] for x in accepted_bids]))
(([x['price'] for x in accepted_bids]))

In [None]:
unique_country_bids = list({v['country']:v for v in accepted_bids}.values())
display(unique_country_bids)
all_prices = [x['price'] for x in unique_country_bids]
display(all_prices)

cbmp = max(set(all_prices), key = all_prices.count)
display(cbmp)


In [None]:
unique_country_bids = [{'index': 665, 'offered': 1, 'price': 11.0, 'country': 'DE', 'settlement_price': 13.3, 'indivisible': False}, {'index': 598, 'offered': 1, 'price': 4.48, 'country': 'FR', 'settlement_price': 4.48, 'indivisible': False}, {'index': 677, 'offered': 23, 'price': 217.76, 'country': 'BE', 'settlement_price': 217.76, 'indivisible': True}, {'index': 671, 'offered': 1, 'price': 12.5, 'country': 'NL', 'settlement_price': 13.3, 'indivisible': False}, {'index': 673, 'offered': 5, 'price': 13.3, 'country': 'CH', 'settlement_price': 13.3, 'indivisible': False}, {'index': 670, 'offered': 2, 'price': 12.4, 'country': 'AT', 'settlement_price': 13.3, 'indivisible': False}, {'index': 675, 'offered': 1, 'price': 50.0, 'country': 'DK', 'settlement_price': 50.0, 'indivisible': True}]

In [None]:
display(unique_country_bids)
all_prices = [d['settlement_price'] for d in unique_country_bids]
display(all_prices)

cbmp = max(set(all_prices), key = all_prices.count)
display(cbmp)


In [None]:
[bid['settlement_price'] for bid in accepted_bids if bid['country']== "DE"][0]
