In [1]:
%load_ext autoreload
%autoreload 2

#import sys
#sys.path.append('../../')

import pandas as pd
import numpy as np
from datetime import datetime
from src.data.helper_closed_transactions import read_epex_file, filter_lead_time, extract_transactions
import matplotlib.pyplot as plt

#set maximum columns display pandas
pd.options.display.max_columns = 50

In [23]:
#Functions:::
#Read from expex

def clean_transactions(x):
    df=x.drop(['Executed Volume_S','Execution Price_S', 'Delivery Start_S', 'Instrument Type_S'], 1)
    df.rename(columns={'Delivery Start_B':'Delivery Start',
                       'Executed Volume_B': 'Executed Volume',
                       'Execution Price_B': 'Execution Price',
                       'Instrument Type_B': 'Instrument Type'
                      }, inplace=True)

    return df

#Read weekly prices

def read_weekly_prices_file(path):
    df = pd.read_csv(path,
                         sep=';',
                         decimal=",",
                         parse_dates=['End Date'],
                         date_parser=lambda col: pd.to_datetime(col,
                                                                utc=True,
                                                                format='%d/%m/%Y %H:%M',
                                                                ),
                         nrows=None
                         )
    return df
#Read NTC 

def read_NTC_file(path):
    df = pd.read_csv(path,
                         sep=',',
                         decimal=".",
                         parse_dates=['Date from'],
                         date_parser=lambda col: pd.to_datetime(col,
                                                                utc=True,
                                                                format='%d.%m.%Y',
                                                                )
                         )
    return df

### Read from EPEX

In [3]:
df=read_epex_file(
    "../data/external/DE Continuous Orders 20191001.csv",
    fast_load=False)

In [4]:
df_filtered=filter_lead_time(df)

In [17]:
pivoted, pivoted_levels=extract_transactions(df_filtered, new_data_type=False)

In [18]:
pivoted_levels.reset_index(inplace=True)

In [19]:
pivoted_levels.head()


Unnamed: 0,End Validity Date,Executed Price & Volume,Delivery Start_B,Executed Volume_B,Execution Price_B,Initial ID_B,Instrument Type_B,Is Executed_B,Is block_B,Order ID_B,Parent ID_B,Price_B,Volume_B,lead_time_B,Delivery Start_S,Executed Volume_S,Execution Price_S,Initial ID_S,Instrument Type_S,Is Executed_S,Is block_S,Order ID_S,Parent ID_S,Price_S,Volume_S,lead_time_S
0,2019-09-30 23:16:56.314000+00:00,"(34.4, 0.25)",2019-10-01 00:15:00+00:00,0.25,34.4,10524151567,Quarter Hour,2,N,10524153932,10524153547.0,35.4,0.5,0 days 00:58:03.686000,2019-10-01 00:15:00+00:00,0.25,34.4,10524153440,Quarter Hour,1,N,10524153480,10524153440.0,34.4,0.25,0 days 00:59:30.068000
1,2019-09-30 23:16:56.314000+00:00,"(34.52, 0.2)",2019-10-01 00:15:00+00:00,0.2,34.52,10524151567,Quarter Hour,2,N,10524153932,10524153547.0,35.4,0.25,0 days 00:58:03.686000,2019-10-01 00:15:00+00:00,0.2,34.52,10524152127,Quarter Hour,1,N,10524153770,10524153474.0,34.52,0.2,0 days 00:58:30.896000
2,2019-09-30 23:16:56.314000+00:00,"(34.6, 0.05)",2019-10-01 00:15:00+00:00,0.05,34.6,10524151567,Quarter Hour,1,N,10524153932,10524153547.0,35.4,0.05,0 days 00:58:03.686000,2019-10-01 00:15:00+00:00,0.05,34.6,10524152775,Quarter Hour,2,N,10524153548,10524153479.0,34.6,0.175,0 days 00:58:51.766000
3,2019-09-30 23:16:56.398000+00:00,"(34.22, 0.25)",2019-10-01 00:15:00+00:00,0.25,34.22,10524153934,Quarter Hour,1,N,10524153934,,35.4,0.25,0 days 00:58:03.602000,2019-10-01 00:15:00+00:00,0.25,34.22,10524153933,Quarter Hour,1,N,10524153933,,34.22,0.25,0 days 00:58:03.609000
4,2019-09-30 23:17:20.690000+00:00,"(34.6, 0.125)",2019-10-01 00:15:00+00:00,0.125,34.6,10524154088,Quarter Hour,2,N,10524154088,,34.8,0.275,0 days 00:57:39.310000,2019-10-01 00:15:00+00:00,0.125,34.6,10524153936,Quarter Hour,1,N,10524153936,,34.6,0.125,0 days 00:58:03.452000


In [24]:
pivoted_levels=clean_transactions(pivoted_levels)

In [26]:
pivoted_levels['Instrument Type'].unique()

array(['Quarter Hour', 'Half Hour'], dtype=object)

### Read from file - Weekly prices

In [8]:
wp=read_weekly_prices_file("../data/external/Hydro Generation and Price_CH_2019.csv")

In [9]:
wp['start_date']=wp['End Date']-pd.Timedelta(7, unit='days')

### Read from file - NTC

In [10]:
NTC=read_NTC_file("../data/external/NTC_DEandCH_2019.csv")

In [11]:
def NTC_preparation(NTC):

    ###START TIME 
    NTC['Time from hour']=NTC['Time from'].apply(lambda x: int(x.split(':')[0]))
    NTC['Time from min']=NTC['Time from'].apply(lambda x: int(x.split(':')[1]))
    pd.to_timedelta(NTC['Time from hour'], unit='hours')+pd.to_timedelta(NTC['Time from min'], unit='minutes')
    NTC['start_time']=NTC['Date from']+pd.to_timedelta(NTC['Time from hour'], unit='hours')+pd.to_timedelta(NTC['Time from min'], unit='minutes')

    ###END TIME 
    NTC['Time to hour']=NTC['Time to'].apply(lambda x: int(x.split(':')[0]))
    NTC['Time to min']=NTC['Time to'].apply(lambda x: int(x.split(':')[1]))
    NTC['Time to hour'].loc[(NTC['Time to hour']==0)&
        (NTC['Time to min']==0)]=24
    pd.to_timedelta(NTC['Time to hour'], unit='hours')+pd.to_timedelta(NTC['Time to min'], unit='minutes')
    NTC['end_time']=NTC['Date from']+pd.to_timedelta(NTC['Time to hour'], unit='hours')+pd.to_timedelta(NTC['Time to min'], unit='minutes')

    #create a column to store update capacity
    NTC['CH to DE_Actual value (MW) update']=NTC['CH to DE_Actual value (MW)'] 
    
    return NTC

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


### Weekly price and capacity - skimmer

In [12]:


#sort the pivoted file 
pivoted_levels_sort=pivoted_levels.sort_values(by=['Execution Price'], ascending=False)

#get the weekly price

date=pivoted_levels['End Validity Date'].iloc[0] #this is the first date of the day, it is then used to find the week for the price
weekly_price=[wp['Average Weekly Price [Euro/MWh]'][(wp['start_date']<date)&
                                                  (wp['End Date']>=date)].item(),wp['Max Weekly Pumping Price [Euro/MWh]'][(wp['start_date']<date)&
                                                  (wp['End Date']>=date)].item()] #weekly_price selling,pumping


In [16]:
'''# get actual capacity _ we don't need this function
def get_capacity(NTC, x):
    
    capacity=[]
    #selling capacity CH->DE
    capacity.append(NTC['CH to DE_Actual value (MW)'][(NTC['start_time']<x)&
                                                      (NTC['end_time']>=x)].item())
    #pumping capacity DE->CH
    capacity.append(NTC['DE to CH_Actual value (MW)'][(NTC['start_time']<x)&
                                     (NTC['end_time']>=x)].item())
    
        
    
    return capacity

pivoted_levels_sort['Actual Nominal Capacity CH-DE']=pivoted_levels_sort['End Validity Date'].apply(lambda x: get_capacity(NTC, x)[0])'''

In [42]:

[0]*2
                                                                             

[0, 0]

In [40]:
c=np.array([-1,1, 3])<0
np.product(c)

0

In [43]:
def optimize (NTC, ex_vol, time, instru_type):
    
    multiplier_instr_type={'Hour':1, 'Half Hour':2, 'Quarter Hour':4}
    
    # get the corresponding index
    index=NTC[(NTC['start_time']==time)&(NTC['end_time']==time+pd.Timedelta(60/multiplier_instr_type[instru_type], unit='mins'))].index
    
    # check the difference
    diff=np.array([NTC['CH to DE_Actual value (MW) update'].iloc[i]-ex_vol*multiplier_instr_type[instru_type] for i in index])
    
    # update the value
    if np.product(diff>=0):
        NTC['CH to DE_Actual value (MW) update'].iloc[index]=diff
        
        return 1
    else:
        NTC['CH to DE_Actual value (MW) update'].iloc[index]=[0]*len(diff)
        
        return 0
    
#drop contracts lower thank weekly price an
pivoted_levels_sort_sel=pivoted_levels_sort[pivoted_levels_sort['Execution Price']>=weekly_price[0]]
pivoted_levels_sort_sel['binary_outcome']=pivoted_levels_sort_sel.apply(lambda x: optimize(NTC,
                                                                                           x['Executed Volume'],
                                                                                           x['Delivery Start'],
                                                                                           x['Instrument Type']
                                                                                          ), axis=1)


NameError: name 'pivoted_levels_sort' is not defined

In [None]:
#Check work in progress

'''NTC[NTC['CH to DE_Actual value (MW)']>0]

NTC[NTC['CH to DE_Actual value (MW)']!=NTC['CH to DE_Actual value (MW) update']]

np.sum(pivoted_levels_sort_sel['Executed Volume_B'][(pivoted_levels_sort_sel['binary_outcome']==1)&
                       (pivoted_levels_sort_sel['End Validity Date'].dt.tz_localize(None)>np.datetime64(datetime(2019, 10, 1, 17, 0)))&
                       (pivoted_levels_sort_sel['End Validity Date'].dt.tz_localize(None)<=np.datetime64(datetime(2019, 10, 1, 17, 15)))]*4)'''