In [1]:
import pandas as pd
import numpy as np
from barsampler import  barsampler as bs
from ExponentialSmoother import ExponentialSmoother as es
import matplotlib.pyplot as plt
def get_log_return(df):
    log_returns = pd.Series(np.log((df['Close'] - df['Open'])/df['Open'] + 1),name='log returns')
    return pd.concat([df,log_returns], axis=1)

In [2]:
# # Get the file name and preprocess the data for required stocks 
# # Store the tick data into csv file for future use

# filepath = 'EQY_US_ALL_TRADE_20191008'
# bs.prepare_data(filepath)
# tdata = pd.read_csv(filepath+'.csv', delimiter='|')
# AAPL = bs.get_ticks_symbols(tdata, 'AAPL')
# AMZN = bs.get_ticks_symbols(tdata, 'AMZN')
# MSFT = bs.get_ticks_symbols(tdata, 'MSFT')
# AMZN.to_csv('AMZN.csv')

# AAPL.to_csv('AAPL.csv')
# MSFT.to_csv('MSFT.csv')

In [3]:
# del tdata, AAPL, AMZN, MSFT

#### Read Data from stock CSV files into Pandas Data Frame

In [4]:
AMZNdf = pd.read_csv("./AMZN.csv").iloc[:,1:]
AAPLdf = pd.read_csv("./AAPL.csv").iloc[:,1:]
MSFTdf = pd.read_csv("./MSFT.csv").iloc[:,1:]

In [5]:
AMZNdf.head(2)

Unnamed: 0,Time,Exchange,Symbol,Sale Condition,Trade Volume,Trade Price,Trade Stop Stock Indicator,Trade Correction Indicator,Sequence Number,Trade Id,Source of Trade,Trade Reporting Facility,Participant Timestamp,Trade Reporting Facility TRF Timestamp,Trade Through Exempt Indicator
0,40000031614134,P,AMZN,@ TI,5,1732.86,,0,1145,1,N,,40000029500672,,0
1,40008758072535,Q,AMZN,@FTI,10,1732.02,,0,1152,1,N,,40008758049797,,1


In [6]:
## Clean data i.e Delete trades outside trading hours ( given Time Period ) or for given tradeType 
timePeriod = ["1900-01-01 09:30:00", "1900-01-01 16:00:00"]
AMZN = es.clean_data(AMZNdf, tradeType = None, timePeriod = timePeriod)
AAPL = es.clean_data(AAPLdf, tradeType = None, timePeriod = timePeriod)
MSFT = es.clean_data(MSFTdf, tradeType = None, timePeriod = timePeriod)

#### Creating Minute Bars

In [7]:
AMZN.head(5)

Unnamed: 0_level_0,Exchange,Symbol,Sale Condition,Trade Volume,Trade Price,Trade Stop Stock Indicator,Trade Correction Indicator,Sequence Number,Trade Id,Source of Trade,Trade Reporting Facility,Participant Timestamp,Trade Reporting Facility TRF Timestamp,Trade Through Exempt Indicator
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1900-01-01 09:30:00.113432261,Y,AMZN,@ I,7,1721.02,,0,13612,6,N,,93000113220000,,0
1900-01-01 09:30:00.165981559,Q,AMZN,@FTI,12,1722.49,,0,13732,288,N,,93000165953740,,1
1900-01-01 09:30:00.318257097,Q,AMZN,@O X,23133,1722.49,,0,14048,289,N,,93000318235506,,1
1900-01-01 09:30:00.318500147,Q,AMZN,@ Q,23133,1722.49,,0,14049,290,N,,93000318480641,,0
1900-01-01 09:30:00.318619304,Q,AMZN,@,200,1722.49,,0,14050,291,N,,93000318600978,,0


In [8]:
AMmbsdf = bs.min_resampler(AMZN, units = 'T', size = 1, volume = True).reset_index()
APmbsdf = bs.min_resampler(AAPL, units = 'T', size = 1, volume = True).reset_index()
MSmbsdf = bs.min_resampler(MSFT, units = 'T', size = 1, volume = True).reset_index()

In [9]:
AMmbs = AMmbsdf.rename(columns = {"open":"Open", "high":"High", "low":"Low", "close":"Close"})
APmbs = APmbsdf.rename(columns = {"open":"Open", "high":"High", "low":"Low", "close":"Close"})
MSmbs = MSmbsdf.rename(columns = {"open":"Open", "high":"High", "low":"Low", "close":"Close"})

In [10]:
AMmbs = es.get_log_return(AMmbs).set_index('Date_Time')
APmbs = es.get_log_return(APmbs).set_index('Date_Time')
MSmbs = es.get_log_return(MSmbs).set_index('Date_Time')

In [11]:
AMmbs.head()

Unnamed: 0_level_0,Open,High,Low,Close,Trade Volume,log returns
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1900-01-01 09:30:00,1721.02,1722.49,1717.71,1718.63,67763,-0.00139
1900-01-01 09:31:00,1718.6,1719.2599,1717.05,1717.9502,12760,-0.000378
1900-01-01 09:32:00,1717.61,1720.0,1717.04,1719.71,15063,0.001222
1900-01-01 09:33:00,1719.52,1722.73,1718.31,1721.4,14545,0.001093
1900-01-01 09:34:00,1721.0781,1722.28,1719.41,1721.04,11100,-2.2e-05


#### Aligning Calendar

In [12]:
# AMmbs = AMmbs.align(APmbs, 'outer',0)[0]

#### Create Db

In [13]:
symbol = ['AAPL', 'AMZN', 'MSFT']
dbLogReturns = es.create_db([APmbs, AMmbs, MSmbs], symbol, 'log returns')
dbLogReturns

  res.append(np.vstack((np.array(db[Metric]) for db in listdb)).T)


[DatetimeIndex(['1900-01-01 09:30:00', '1900-01-01 09:31:00',
                '1900-01-01 09:32:00', '1900-01-01 09:33:00',
                '1900-01-01 09:34:00', '1900-01-01 09:35:00',
                '1900-01-01 09:36:00', '1900-01-01 09:37:00',
                '1900-01-01 09:38:00', '1900-01-01 09:39:00',
                ...
                '1900-01-01 15:50:00', '1900-01-01 15:51:00',
                '1900-01-01 15:52:00', '1900-01-01 15:53:00',
                '1900-01-01 15:54:00', '1900-01-01 15:55:00',
                '1900-01-01 15:56:00', '1900-01-01 15:57:00',
                '1900-01-01 15:58:00', '1900-01-01 15:59:00'],
               dtype='datetime64[ns]', name='Date_Time', length=390, freq=None),
 ['AAPL', 'AMZN', 'MSFT'],
 array([[ 0.00181348, -0.00138968, -0.00277555],
        [ 0.00013256, -0.00037817,  0.00175387],
        [ 0.0011485 ,  0.00122188,  0.00196944],
        ...,
        [-0.00048984, -0.00110733, -0.00044189],
        [ 0.00095716,  0.00029893,  0.0002

In [14]:
symbol = ['AAPL', 'AMZN', 'MSFT']
dbTradeVolume = es.create_db([APmbs, AMmbs, MSmbs], symbol, 'Trade Volume')

In [15]:
dbLogReturns[2].transpose()

array([[ 0.00181348,  0.00013256,  0.0011485 , ..., -0.00048984,
         0.00095716, -0.00151407],
       [-0.00138968, -0.00037817,  0.00122188, ..., -0.00110733,
         0.00029893, -0.00090854],
       [-0.00277555,  0.00175387,  0.00196944, ..., -0.00044189,
         0.00029459, -0.00110509]])

#### Smooth till volatility is ~1

In [16]:
mnVarSmoothedLogReturns = []
for i in range(3):
    VarSmoothed = (dbLogReturns[2].transpose()[i][1:-1])/np.sqrt(es.exponential_smoothing(dbLogReturns[2].transpose()[i][0:-2]**2,
                                                                                         lambda_=0.0146))

    mnVarSmoothedLogReturns.append(VarSmoothed)
mnVarSmoothedLogReturnsm = np.array(mnVarSmoothedLogReturns)
np.std(mnVarSmoothedLogReturnsm, axis = 1)

array([1.17764672, 0.93624919, 0.98666885])

In [17]:
dbSmoothedLogReturnsm = [dbLogReturns[0][:mnVarSmoothedLogReturnsm.shape[1]], 
                         dbLogReturns[1], mnVarSmoothedLogReturnsm.T]

In [18]:
dbSmoothedLogReturnsm

[DatetimeIndex(['1900-01-01 09:30:00', '1900-01-01 09:31:00',
                '1900-01-01 09:32:00', '1900-01-01 09:33:00',
                '1900-01-01 09:34:00', '1900-01-01 09:35:00',
                '1900-01-01 09:36:00', '1900-01-01 09:37:00',
                '1900-01-01 09:38:00', '1900-01-01 09:39:00',
                ...
                '1900-01-01 15:48:00', '1900-01-01 15:49:00',
                '1900-01-01 15:50:00', '1900-01-01 15:51:00',
                '1900-01-01 15:52:00', '1900-01-01 15:53:00',
                '1900-01-01 15:54:00', '1900-01-01 15:55:00',
                '1900-01-01 15:56:00', '1900-01-01 15:57:00'],
               dtype='datetime64[ns]', name='Date_Time', length=388, freq=None),
 ['AAPL', 'AMZN', 'MSFT'],
 array([[ 0.07309635, -0.27212805,  0.63190179],
        [ 0.63796099,  0.88526086,  0.71269943],
        [ 0.02462298,  0.79294318,  0.54209918],
        ...,
        [ 0.22154227,  0.01305575,  0.23550005],
        [-0.49087755, -1.71525962, -0.4743

#### Creating Tick Bars

Note here we created a new function

In [19]:
AMtbs =  es.tick_resampler_aligned(AMZN, 500, volume = True).reset_index()
APtbs =  es.tick_resampler_aligned(AAPL, 500, volume = True).reset_index()
MStbs =  es.tick_resampler_aligned(MSFT, 500, volume = True).reset_index()

AMtbs = es.get_log_return(AMtbs)
APtbs = es.get_log_return(APtbs)
MStbs = es.get_log_return(MStbs)

#### Create Db

In [20]:
symbol = ['AAPL', 'AMZN', 'MSFT']
dbLogReturns = es.create_db([ APtbs, AMtbs, MStbs], symbol, 'log returns')

In [21]:
dbLogReturns

[RangeIndex(start=0, stop=500, step=1),
 ['AAPL', 'AMZN', 'MSFT'],
 array([[ 3.09851053e-04,  5.69267693e-04,  2.91715288e-04],
        [ 2.35865078e-04, -9.18177656e-04, -6.56479107e-04],
        [ 1.31195170e-03, -2.96552147e-04, -2.04513986e-03],
        ...,
        [-8.90194508e-05,  2.93216438e-05,  3.68344476e-05],
        [-3.56157069e-04, -5.86441473e-05,  0.00000000e+00],
        [-8.01853215e-04,  1.23156682e-04, -5.89535758e-04]])]

#### Smooth till the volatility ~ 1

In [22]:
mnVarSmoothedLogReturns = []
for i in range(3):
    VarSmoothed = (dbLogReturns[2].transpose()[i][1:-1])/np.sqrt(es.exponential_smoothing(dbLogReturns[2].transpose()[i][0:-2]**2,
                                                                                         lambda_=0.085))
    mnVarSmoothedLogReturns.append(VarSmoothed)
mnVarSmoothedLogReturnst = np.array(mnVarSmoothedLogReturns)
np.std(mnVarSmoothedLogReturnst, axis = 1)

array([1.06325449, 1.01856076, 1.05351586])

In [23]:
dbSmoothedLogReturnst = [dbLogReturns[0][:mnVarSmoothedLogReturnst.shape[1]], dbLogReturns[1],
                        mnVarSmoothedLogReturnst.T]

In [24]:
dbSmoothedLogReturnst

[RangeIndex(start=0, stop=498, step=1),
 ['AAPL', 'AMZN', 'MSFT'],
 array([[ 0.76122084, -1.61291018, -2.25041036],
        [ 4.31190495, -0.48873263, -6.04403398],
        [-0.94350804, -0.93182845,  1.8318802 ],
        ...,
        [-0.25586457, -1.64416978, -0.70969586],
        [-0.1778034 ,  0.09605904,  0.12087067],
        [-0.74259049, -0.20076037,  0.        ]])]