In [1]:
import pandas as pd
import numpy as np
import src.in_out as io
import src.info as info
import src.auxiliary_functions as aux
import src.condition as con
from itertools import combinations

In [2]:
#Veriler, tarih ve ürün adları ile okunuyor ve birleştiriliyor
path_list=io.get_path('data')
cols= ["Time","BID price","ASK price"]
master_data=pd.concat((pd.read_excel(file,usecols=cols).\
                       assign(name=info.get_productName(file),date=info.get_productDate(file)
                             ) for file in path_list)).reset_index(drop=True)
master_data.columns=['time','bid_price','ask_price','name','date']

In [3]:
#date sütunu Timestamp'e dönüştürülüyor
master_data['date']=pd.to_datetime(master_data['date'], errors='coerce')

In [4]:
#tarih ve saat bilgisi birleştiriliyor
master_data['date'] += pd.to_timedelta(master_data.pop('time').astype(str))

In [5]:
#zaman dilimi ekleniyor
master_data['time_period']=master_data.date.dt.hour

In [6]:
# mid price hesaplanıyor
master_data['mid_price']=aux.get_mid_price(master_data)

In [7]:
# spread hesaplama için master_data, uygun bir forma dönüştürülüyor
df_mid_price=master_data.pivot(index='date', columns='name', values='mid_price')

## Spread
<img src="img/spread_flowchart.png">

In [None]:
values={'a_PNLTICK':1,
'a_TICKSIZE':1,
'b_PNLTICK':1,
'b_TICKSIZE':1}

In [223]:
# pair'lar alınıyor
pairs=combinations(df_mid_price.columns, 2)

In [224]:
spread_list=[get_spread(df_mid_price.loc[:,pair],values) for pair in pairs]
df_spread=pd.concat(spread_list,axis=1)

In [226]:
df_spread

Unnamed: 0_level_0,6AU8_6BU8,6AU8_6CU8,6BU8_6CU8
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-09-06 00:00:00,0.000000,0.000000,0.000000
2018-09-06 00:00:01,0.000000,0.000000,0.000000
2018-09-06 00:00:02,0.000000,0.000000,0.000000
2018-09-06 00:00:03,0.000000,0.000000,0.000000
2018-09-06 00:00:04,-0.000109,0.000000,0.000109
...,...,...,...
2018-09-07 23:59:55,-0.000138,0.000962,0.001100
2018-09-07 23:59:56,-0.000138,0.000962,0.001100
2018-09-07 23:59:57,-0.000138,0.000962,0.001100
2018-09-07 23:59:58,-0.000138,0.000962,0.001100


In [191]:
def get_spread(df_mp,values):
    df_mp=df_mp.reset_index()
    mp_group=df_mp.groupby([df_mp.date.dt.floor('d'),df_mp.date.dt.hour])
    spread_series=mp_group.apply(lambda x : find_spread(x,values))
    df_spread=pd.DataFrame(spread_series).droplevel([0,1])
    return df_spread

In [161]:
def find_spread(pair,values):
    """6A ve 6B verisinden SPREAD verisini üretir.
    Parameters:
        pair(pandas.DataFrame):
        values(dict): ticksize değerleri
    Returns:
        pd.Series: Hesaplanmış spread verisi
    """
    a_series=pair.iloc[:,1]
    b_series=pair.iloc[:,2]
    atick = values['a_PNLTICK']/values['a_TICKSIZE']
    btick = values['b_PNLTICK']/values['b_TICKSIZE']
    size = len(a_series)
    spread = size*[0]
    for i in range(size): 
        try:
            spread[i+1] = (((a_series.iat[i+1] - a_series.iat[i])*atick) - ((b_series.iat[i+1] - b_series.iat[i])*btick)) +spread[i] 
        except:            
            pass        
    return pd.Series(data=spread,index=pair.date,name=a_series.name+'_'+b_series.name)