In [2]:
import pandas as pd
import numpy as np
import csv
from scipy import stats
import math
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
pd.options.display.max_rows = 50
pd.options.display.float_format = '{:.2f}'.format

In [5]:
df = pd.read_csv("/Users/zss/OneDrive - The University of Chicago/DRW_Trading/self_edit/sushan_intraday_data.csv")
daily_data = pd.read_csv('/Users/zss/OneDrive - The University of Chicago/DRW_Trading/self_edit/sushan_daily.csv', parse_dates=['DATE'])
df['str_date'] = df['date']
df['datetime'] = df['date'] + ' ' + df['time']
df['datetime'] = pd.to_datetime(df['datetime'])
df['date'] = pd.to_datetime(df['date'])
df['symbol'] = df['sym_root'] + df['sym_suffix']
df.loc[df['sym_suffix'].isnull(), 'symbol'] = df['sym_root']

In [6]:
data = df.merge(daily_data[['DATE', 'symbol', 'total_vol']], how='left', left_on=['date', 'symbol'], right_on = ['DATE', 'symbol'])
data.index = pd.DatetimeIndex(df['datetime'])
data = data.drop(['DATE','datetime'],axis=1)
data = data.between_time('09:31:00', '15:59:00')
data['perc_vol'] = data['size']/data['total_vol']
NKE = data.loc[data.symbol == 'NKE'].copy()

In [7]:
NKE.resample('60s').asfreq()
NKE = NKE.between_time('09:31:00', '15:59:00')
NKE['cum_vol'] = np.nan
trading_days = list(pd.to_datetime(NKE.date.unique()))
for day in trading_days:
    NKE.loc[day==NKE.date, 'cum_vol'] = NKE.loc[day==NKE.date,'perc_vol'].cumsum()

In [8]:
minutes = NKE.loc[NKE.date==trading_days[0]].index
avg_cum_dist = pd.Series(dtype='float64')
for minute in minutes:
    avg_cum_dist.loc[minute] = NKE.loc[NKE.index.time == minute.time(), 'cum_vol'].mean()
    NKE.loc[NKE.index.time == minute.time(), 'avg_cum_vol'] = avg_cum_dist.loc[minute]

In [9]:
avg_per_dist = pd.Series(dtype = 'float64')
for minute in minutes:
    if minute.time() == minutes[0].time():
        avg_per_dist[minute] = avg_cum_dist[minute]
    else:
        avg_per_dist[minute] = avg_cum_dist[minute] - avg_cum_dist[minute + timedelta(minutes = -1)]
    NKE.loc[NKE.index.time == minute.time(), 'avg_perc_vol'] = avg_per_dist.loc[minute]

In [13]:
NKE['Z(t)'] = None
NKE['Z(t)'] = np.log(NKE['total_vol']) - np.log(NKE['cum_vol'])

In [22]:
NKE.tail()

Unnamed: 0_level_0,date,time,sym_root,sym_suffix,size,str_date,symbol,total_vol,perc_vol,cum_vol,avg_cum_vol,avg_perc_vol,Z(t),disper_mean
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-12-31 15:55:00,2021-12-31,15:55:00,NKE,,20385.0,2021-12-31,NKE,1904513.0,0.01,0.88,0.91,0.01,14.58,
2021-12-31 15:56:00,2021-12-31,15:56:00,NKE,,32585.0,2021-12-31,NKE,1904513.0,0.02,0.9,0.92,0.01,14.56,
2021-12-31 15:57:00,2021-12-31,15:57:00,NKE,,40493.0,2021-12-31,NKE,1904513.0,0.02,0.92,0.93,0.01,14.54,
2021-12-31 15:58:00,2021-12-31,15:58:00,NKE,,34772.0,2021-12-31,NKE,1904513.0,0.02,0.94,0.94,0.01,14.52,
2021-12-31 15:59:00,2021-12-31,15:59:00,NKE,,71870.0,2021-12-31,NKE,1904513.0,0.04,0.98,0.97,0.03,14.48,


In [23]:
def generate_u_curve(data):
    '''Generate u_curve based on 180 day average intraday volume'''

    # groupby time to calculate 180-day rolling average volume
    df = data.groupby(data.index.time)['total_vol'].rolling(180).mean()

    # ungroup, then sort df by date and time
    df = df.reset_index()
    df.rename(columns={'level_0': 'time', 'total_vol': 'total_vol'}, inplace=True)
    df['date'] = df['datetime'].dt.date
    df = df.sort_values(['date', 'time'], ascending=(True, True)).set_index(['date', 'time'])
    df.drop(columns=['datetime'], inplace=True)

    # convert date from row index to column index
    df = df.unstack().transpose()
    df.index = df.index.droplevel(0)

    return df

In [21]:
NKE_z = generate_u_curve(NKE)
NKE_z = NKE_z.dropna(axis=1)

date,2019-09-20,2019-09-23,2019-09-24,2019-09-25,2019-09-26,2019-09-27,2019-09-30,2019-10-01,2019-10-02,2019-10-03,...,2021-12-16,2021-12-17,2021-12-21,2021-12-22,2021-12-23,2021-12-27,2021-12-28,2021-12-29,2021-12-30,2021-12-31
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
09:31:00,20.95,20.94,20.95,20.94,20.93,20.93,20.92,20.91,20.90,20.90,...,20.77,20.77,20.77,20.76,20.76,20.75,20.75,20.75,20.74,20.72
09:32:00,20.17,20.17,20.17,20.17,20.16,20.15,20.15,20.15,20.14,20.14,...,20.07,20.07,20.07,20.06,20.06,20.05,20.05,20.04,20.03,20.02
09:33:00,19.77,19.77,19.77,19.76,19.76,19.76,19.75,19.75,19.74,19.75,...,19.65,19.65,19.65,19.65,19.64,19.63,19.63,19.62,19.61,19.60
09:34:00,19.52,19.51,19.51,19.51,19.50,19.50,19.50,19.50,19.49,19.49,...,19.36,19.36,19.36,19.36,19.35,19.35,19.34,19.34,19.32,19.31
09:35:00,19.26,19.26,19.26,19.25,19.25,19.25,19.24,19.24,19.24,19.24,...,19.12,19.12,19.12,19.12,19.11,19.11,19.11,19.10,19.08,19.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15:55:00,15.49,15.49,15.49,15.50,15.50,15.50,15.50,15.49,15.50,15.49,...,15.47,15.48,15.48,15.48,15.48,15.48,15.47,15.47,15.46,15.46
15:56:00,15.48,15.48,15.48,15.48,15.49,15.49,15.48,15.48,15.48,15.48,...,15.47,15.47,15.47,15.47,15.47,15.47,15.47,15.46,15.46,15.45
15:57:00,15.47,15.46,15.46,15.47,15.47,15.47,15.47,15.47,15.47,15.47,...,15.45,15.45,15.46,15.46,15.46,15.46,15.45,15.45,15.44,15.44
15:58:00,15.45,15.45,15.45,15.46,15.46,15.46,15.45,15.45,15.46,15.45,...,15.44,15.44,15.45,15.45,15.45,15.44,15.44,15.44,15.43,15.42


In [24]:
NKE_tv = generate_u_curve(NKE)
NKE_tv = NKE_tv.dropna(axis=1)
NKE_tv

date,2019-09-20,2019-09-23,2019-09-24,2019-09-25,2019-09-26,2019-09-27,2019-09-30,2019-10-01,2019-10-02,2019-10-03,...,2021-12-16,2021-12-17,2021-12-21,2021-12-22,2021-12-23,2021-12-27,2021-12-28,2021-12-29,2021-12-30,2021-12-31
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
09:31:00,5104492.26,5089811.78,5098632.11,5192449.66,5202313.97,5183622.46,5165432.10,5180879.21,5183064.70,5183025.46,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
09:32:00,5104492.26,5089811.78,5098632.11,5192449.66,5202313.97,5183622.46,5165432.10,5180879.21,5183064.70,5183025.46,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
09:33:00,5104492.26,5089811.78,5098632.11,5192449.66,5202313.97,5183622.46,5165432.10,5180879.21,5183064.70,5183025.46,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
09:34:00,5104492.26,5089811.78,5098632.11,5192449.66,5202313.97,5183622.46,5165432.10,5180879.21,5183064.70,5183025.46,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
09:35:00,5104492.26,5089811.78,5098632.11,5192449.66,5202313.97,5183622.46,5165432.10,5180879.21,5183064.70,5183025.46,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15:55:00,5122409.11,5103590.46,5108147.66,5207226.37,5223592.10,5212475.80,5195875.96,5182826.94,5192590.23,5186413.05,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
15:56:00,5122409.11,5103590.46,5108147.66,5207226.37,5223592.10,5212475.80,5195875.96,5182826.94,5192590.23,5186413.05,...,5233548.66,5232910.68,5280577.36,5288438.65,5287338.51,5264137.41,5249534.00,5242975.38,5217117.07,5191387.98
15:57:00,5122409.11,5103590.46,5108147.66,5207226.37,5223592.10,5212475.80,5195875.96,5182826.94,5192590.23,5186413.05,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39
15:58:00,5122409.11,5103590.46,5108147.66,5207226.37,5223592.10,5212475.80,5195875.96,5182826.94,5192590.23,5186413.05,...,5228968.13,5234302.83,5282374.43,5292147.41,5269942.33,5260662.12,5247584.61,5227282.70,5204081.03,5183548.39


In [25]:
new_data = NKE_z - NKE_tv
new_data

date,2019-09-20,2019-09-23,2019-09-24,2019-09-25,2019-09-26,2019-09-27,2019-09-30,2019-10-01,2019-10-02,2019-10-03,...,2021-12-16,2021-12-17,2021-12-21,2021-12-22,2021-12-23,2021-12-27,2021-12-28,2021-12-29,2021-12-30,2021-12-31
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
09:31:00,-5104471.31,-5089790.84,-5098611.16,-5192428.72,-5202293.04,-5183601.53,-5165411.18,-5180858.30,-5183043.80,-5183004.56,...,-5228947.36,-5234282.06,-5282353.66,-5292126.64,-5269921.58,-5260641.36,-5247563.86,-5227261.95,-5204060.30,-5183527.66
09:32:00,-5104472.09,-5089791.62,-5098611.93,-5192429.49,-5202293.82,-5183602.31,-5165411.95,-5180859.06,-5183044.56,-5183005.31,...,-5228948.06,-5234282.76,-5282354.36,-5292127.34,-5269922.28,-5260642.07,-5247564.56,-5227262.66,-5204061.00,-5183528.37
09:33:00,-5104472.49,-5089792.02,-5098612.33,-5192429.89,-5202294.21,-5183602.70,-5165412.35,-5180859.46,-5183044.96,-5183005.71,...,-5228948.48,-5234283.18,-5282354.78,-5292127.76,-5269922.70,-5260642.48,-5247564.98,-5227263.08,-5204061.43,-5183528.79
09:34:00,-5104472.75,-5089792.27,-5098612.59,-5192430.15,-5202294.47,-5183602.96,-5165412.60,-5180859.71,-5183045.21,-5183005.96,...,-5228948.77,-5234283.47,-5282355.07,-5292128.05,-5269922.98,-5260642.77,-5247565.26,-5227263.36,-5204061.71,-5183529.08
09:35:00,-5104473.00,-5089792.53,-5098612.85,-5192430.40,-5202294.72,-5183603.21,-5165412.86,-5180859.97,-5183045.46,-5183006.22,...,-5228949.01,-5234283.71,-5282355.31,-5292128.29,-5269923.22,-5260643.01,-5247565.50,-5227263.60,-5204061.95,-5183529.31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15:55:00,-5122393.61,-5103574.97,-5108132.17,-5207210.88,-5223576.60,-5212460.30,-5195860.47,-5182811.45,-5192574.74,-5186397.56,...,-5228952.66,-5234287.36,-5282358.95,-5292131.92,-5269926.85,-5260646.64,-5247569.13,-5227267.23,-5204065.57,-5183532.93
15:56:00,-5122393.63,-5103574.98,-5108132.18,-5207210.89,-5223576.61,-5212460.31,-5195860.48,-5182811.46,-5192574.75,-5186397.57,...,-5233533.20,-5232895.21,-5280561.89,-5288423.18,-5287323.04,-5264121.94,-5249518.53,-5242959.91,-5217101.61,-5191372.53
15:57:00,-5122393.64,-5103575.00,-5108132.20,-5207210.90,-5223576.63,-5212460.33,-5195860.49,-5182811.48,-5192574.76,-5186397.58,...,-5228952.68,-5234287.38,-5282358.97,-5292131.94,-5269926.87,-5260646.66,-5247569.15,-5227267.25,-5204065.59,-5183532.95
15:58:00,-5122393.65,-5103575.01,-5108132.21,-5207210.92,-5223576.64,-5212460.34,-5195860.51,-5182811.49,-5192574.78,-5186397.60,...,-5228952.69,-5234287.39,-5282358.98,-5292131.96,-5269926.89,-5260646.67,-5247569.17,-5227267.26,-5204065.60,-5183532.97


In [27]:
sum_subtract = new_data.sum(axis=0)/389
sum_subtract

date
2019-09-20   -5109677.86
2019-09-23   -5094000.41
2019-09-24   -5101562.68
2019-09-25   -5196764.19
2019-09-26   -5208528.20
                 ...    
2021-12-27   -5260989.27
2021-12-28   -5247747.02
2021-12-29   -5228928.97
2021-12-30   -5205440.47
2021-12-31   -5184346.78
Length: 386, dtype: float64