## tech

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm

In [2]:
df = pd.read_json('data/data_save.txt',lines=True,dtype={'E':'datetime64[ms]'},chunksize=100000)

In [3]:
du = pd.DataFrame()
trade = pd.DataFrame()
for data in tqdm(df):
    du = du.append(data.query('e=="depthUpdate"').dropna(axis=1))
    trade = trade.append(data.query('e!="depthUpdate"').dropna(axis=1))


12it [00:15,  1.30s/it]


In [4]:
trade['E'].agg(['min','max']).diff().iloc[-1]

Timedelta('0 days 16:24:39.354000')

### data transform

In [4]:
trade['type'] = trade['p'].diff().where(lambda x: x!=0).fillna(method='ffill').mul(100).clip(-1,1).map({-1:'sell',1:'buy'})

In [5]:
n_ticks= 500
trade[f'ticks_{n_ticks}'] = np.arange(0,trade.shape[0]) // n_ticks
du[f'ticks_{n_ticks}'] = np.arange(0,du.shape[0]) // n_ticks
trade[f'ticks_{n_ticks}_dt'] = trade.groupby(f'ticks_{n_ticks}')['E'].transform('first')
du[f'ticks_{n_ticks}_dt'] = du.groupby(f'ticks_{n_ticks}')['E'].transform('first')


In [6]:
n_ticks= 10
trade[f'ticks_{n_ticks}'] = np.arange(0,trade.shape[0]) // n_ticks
du[f'ticks_{n_ticks}'] = np.arange(0,du.shape[0]) // n_ticks
trade[f'ticks_{n_ticks}_dt'] = trade.groupby(f'ticks_{n_ticks}')['E'].transform('first')
du[f'ticks_{n_ticks}_dt'] = du.groupby(f'ticks_{n_ticks}')['E'].transform('first')

In [7]:
du['b_first_q']=du['b'].explode().dropna().apply(lambda x: x[1] if x[1]!='0.00000000' else np.nan).groupby(level=0).first().astype(float)
du['a_first_q']=du['a'].explode().dropna().apply(lambda x: x[1] if x[1]!='0.00000000' else np.nan).groupby(level=0).first().astype(float)

du['b_first_p']=du['b'].explode().dropna().apply(lambda x: x[0] if x[1]!='0.00000000' else np.nan).groupby(level=0).first().astype(float)
du['a_first_p']=du['a'].explode().dropna().apply(lambda x: x[0] if x[1]!='0.00000000' else np.nan).groupby(level=0).first().astype(float)

In [8]:
du['diff_p']=du[['b_first_p','a_first_p']].diff(axis=1).iloc[:,-1]
du['diff_q']=du[['b_first_q','a_first_q']].diff(axis=1).iloc[:,-1]

In [9]:
du['is_seller_pres_bid']=du['b_first_q'].diff().apply(lambda x: x if x>0 else 0)
du['is_buyer_pres_bid']=du[['b_first_q','b_first_p']].diff().apply(lambda x: abs(x['b_first_q']) *-1  if x['b_first_p'] <0 else 0,axis=1)

du['is_seller_pres_ask']=du['a_first_q'].diff().apply(lambda x: x if x>0 else 0)
du['is_buyer_pres_ask']=du[['a_first_q','a_first_p']].diff().apply(lambda x: abs(x['a_first_q']) *-1  if x['a_first_p'] <0 else 0,axis=1)

In [10]:
ttl_pres_df = du.groupby('ticks_500_dt')[['is_buyer_pres_bid','is_seller_pres_bid']].sum().melt(ignore_index=False).reset_index()\
            .append(du.groupby('ticks_500_dt')[['is_buyer_pres_ask','is_seller_pres_ask']].sum().melt(ignore_index=False).reset_index())
ttl_pres_df['type']=ttl_pres_df['variable'].str.contains('bid').map({True:'bid',False:'ask'})

## Plots

### 1

In [12]:
px.line(trade.groupby(pd.Grouper(key='E',freq='10S'))['t'].nunique(),title='Индикатор скорости сделок за 10 секунд').write_html('plots/1_speed.html')

In [14]:
plot_1_df = pd.DataFrame()
for i in [10,30,60,120,500,1000,2000,3000,5000]:
    plot_1_df[i]=trade.groupby(pd.Grouper(key='E',freq=f'{i}S'))['t'].nunique()

In [21]:
px.line(plot_1_df.melt(ignore_index=False).reset_index().dropna(),x='E',y='value',animation_frame='variable').write_html('plots/1_speed.html')

### 2

In [13]:
px.line(trade.groupby([pd.Grouper(key='E',freq='10S')])['type'].value_counts().unstack(),title='Индикатор интенсивности продаж/покупок').write_html('plots/2_buysell_value_count.html')

In [29]:
plot_2_df = pd.DataFrame()
for i in tqdm([10,30,60,120,500,1000,2000,3000,5000]):
    plot_2_df[i] = trade.groupby(pd.Grouper(key='E',freq=f'{i}S'))['type'].value_counts()
px.line(plot_2_df.melt(ignore_index=False).reset_index().dropna(),x='E',y='value',color='type',animation_frame='variable').write_html('plots/2_buysell_value_count.html')

100%|██████████| 9/9 [00:03<00:00,  2.65it/s]


### 3

In [14]:
px.line(du.groupby(pd.Grouper(key='E',freq='10S'))['diff_p'].mean(),title='Индикатор спреда').write_html('plots/3_spread.html')

In [32]:
plot_3_df = pd.DataFrame()
for i in tqdm([10,30,60,120,500,1000,2000,3000,5000]):
    plot_3_df[i] = du.groupby(pd.Grouper(key='E',freq=f'{i}S'))['diff_p'].mean()

100%|██████████| 9/9 [00:00<00:00, 118.60it/s]


In [37]:
px.line(plot_3_df.melt(ignore_index=False).reset_index().dropna(),title='Индикатор спреда',x='E',y='value',animation_frame='variable').write_html('plots/3_spread.html')

In [15]:
px.line(du.groupby(pd.Grouper(key='E',freq='10S'))[['b_first_p','a_first_p']].mean()\
    .join(trade.groupby(pd.Grouper(key='E',freq='10S'))['p'].last())\
        ,title='Линии поведение бида и аска относительно цены последней сделки').write_html('plots/3_bidask_price_comp.html')

### 4

In [16]:
px.bar(du.groupby('ticks_500_dt')[['is_buyer_pres_bid','is_seller_pres_bid']].sum().melt(ignore_index=False).reset_index(),x='ticks_500_dt',y='value',color='variable',title='Чистые бары давлений').write_html('plots/4_raw_bar_pressure_bid.html')
px.bar(du.groupby('ticks_500_dt')[['is_buyer_pres_ask','is_seller_pres_ask']].sum().melt(ignore_index=False).reset_index(),x='ticks_500_dt',y='value',color='variable',title='Чистые бары давлений').write_html('plots/4_raw_bar_pressure_ask.html')

In [17]:
px.bar(ttl_pres_df,x='ticks_500_dt',y='value',color='variable',facet_row='type',title='Чистые бары давлений').write_html('plots/4_raw_bar_pressure.html')

In [18]:
px.bar(du.groupby('ticks_500_dt')[['is_buyer_pres_bid','is_seller_pres_bid']].sum().sum(axis=1),title='Разница давлений').write_html('plots/4_diff_bar_pressure_bid.html')
px.bar(du.groupby('ticks_500_dt')[['is_buyer_pres_ask','is_seller_pres_ask']].sum().sum(axis=1),title='Разница давлений').write_html('plots/4_diff_bar_pressure_ask.html')

In [19]:
px.bar(ttl_pres_df.groupby(['ticks_500_dt','type'])['value'].sum().reset_index(),x='ticks_500_dt',y='value',facet_row='type',color='type',title='Разница давлений').write_html('plots/4_diff_bar_pressure.html')

### 5

In [20]:
olhc = trade.groupby('ticks_10_dt')['p'].agg(['first','last','min','max'])
olhc_500 = trade.groupby('ticks_500_dt')['p'].agg(['first','last','min','max'])

In [21]:
fig = go.Figure(data=go.Ohlc(x=olhc.index,open=olhc['first'],close=olhc['last'],low=olhc['min'],high=olhc['max']))
fig.write_html('plots/5_ohlc.html')

In [22]:
fig = go.Figure(data=go.Ohlc(x=olhc_500.index,open=olhc_500['first'],close=olhc_500['last'],low=olhc_500['min'],high=olhc_500['max']))
fig.write_html('plots/5_ohlc_500.html')