# Data Replication Paper
## Informed Trading in the Bitcoin Market
### Fintech Research Project
<p><b>MSc Banking and Finance</b></p>
<b>Explanation of the Notebook</b><br></br>
The Notebook is structured in such a way that every Table can be run independently from each other. This means that it is <i>not</i> required to run all the code from top to bottom before analysing our results. We make use of 'nbextensions' (for Jupyter Notebook) to structure our Notebook with collapsable headers which generates a great overview. (https://github.com/ipython-contrib/jupyter_contrib_nbextensions)

We hope you enjoy reading through our Notebook and enjoy the massive chunks of code we have written!

<i> Always run below's imports before starting</i>

In [3]:
# Libraries Required to Run Everything
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import statsmodels
import statsmodels.formula.api as smf
import datetime as dt
from datetime import datetime

# Purely meant for Lay-Out
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

# Gathering Data & Shaping It for Replication Paper
From: http://api.bitcoincharts.com/v1/csv/ --> bitstampUSD.csv.gz
<p><b> Perform these tasks once and continue to work with the new file. There is NO NEED to run this code after you have the new file.</b></p>

In [4]:
chunksize = 10**5 # define a chunksize -> read 100.000 rows per chunk

# text_file_reader represents all our chunks
text_file_reader = pd.read_csv('bitstampUSD.csv.gz', header=None, names=['Date', 'Price', 'Volume'],
                               chunksize=chunksize, iterator=True)

df = pd.concat(text_file_reader, ignore_index=True)

# Show all the Raw Data
df

Unnamed: 0,Date,Price,Volume
0,1315922016,5.80,1.000000
1,1315922024,5.83,3.000000
2,1315922029,5.90,1.000000
3,1315922034,6.00,20.000000
4,1315924373,5.95,12.452100
5,1315924504,5.88,7.458000
6,1315924614,5.88,0.176882
7,1315925663,5.76,2.267000
8,1315927898,5.65,2.542000
9,1315942379,5.92,0.450000


In [5]:
# Format Date to Date + Time
df['Date'] = pd.to_datetime(df['Date'], unit='s') 

# Remove Time 
df['Date'] = df['Date'].dt.date 

In [6]:
# Set Date to Correct Type
df['Date'] = df['Date'].astype('datetime64[ns]')

In [7]:
# Change Index to Date
df.set_index(["Date"], inplace=True)

In [8]:
# Put all data before 2017-07-18 in new DataFrame, remember: exclusive.
df_new = df.loc[(df.index < datetime(2017, 7, 18))] 

In [9]:
# Check out the new Data Format
df_new[:15]

Unnamed: 0_level_0,Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-09-13,5.8,1.0
2011-09-13,5.83,3.0
2011-09-13,5.9,1.0
2011-09-13,6.0,20.0
2011-09-13,5.95,12.4521
2011-09-13,5.88,7.458
2011-09-13,5.88,0.176882
2011-09-13,5.76,2.267
2011-09-13,5.65,2.542
2011-09-13,5.92,0.45


In [17]:
# Create a CSV file with all new information
df_new.to_csv("ReplicationPaper.csv") 

# Tables

## Table 1

In [18]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 

# Show Head
df.head()

Unnamed: 0_level_0,Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-09-13,5.8,1.0
2011-09-13,5.83,3.0
2011-09-13,5.9,1.0
2011-09-13,6.0,20.0
2011-09-13,5.95,12.4521


In [19]:
# Create a New DataFrame that Calculates Daily Values
dfDaily = df.groupby(by='Date').agg({'Price':'mean',
                                     'Volume': 'sum'})

# Create new Column that calculates Returns
dfDaily['Returns (in %)'] = dfDaily['Price'].apply(np.log)

# Change Logs to % Returns
dfDaily['Returns (in %)'] = dfDaily['Returns (in %)'].diff().mul(100)

# Rounding
dfDaily['Returns (in %)'] = dfDaily['Returns (in %)'].round(2)

# Drop First Row Since it Contains NaN
dfDaily = dfDaily[1:]

# Show Table Head
dfDaily.head()

Unnamed: 0_level_0,Price,Volume,Returns (in %)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-09-14,5.582143,61.145984,-5.1
2011-09-15,5.12,80.140795,-8.64
2011-09-16,4.835,39.914007,-5.73
2011-09-17,4.87,0.3,0.72
2011-09-18,4.84,119.8128,-0.62


In [20]:
#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEventsList = ['2011-12-19',
                      '2013-03-25',
                      '2013-06-29',
                      '2013-11-18',
                      '2014-01-08',
                      '2014-07-18',
                      '2014-12-11',
                      '2015-01-26',
                      '2015-06-03',
                      '2015-09-22',
                      '2015-10-22',
                      '2015-10-31',
                      '2016-02-24',
                      '2016-04-27',
                      '2016-05-25',
                      '2016-11-29',
                      '2017-03-24']


# Create List with All Negative Events
NegativeEventsList = ['2012-03-01', 
                      '2012-08-17',
                      '2012-09-05',
                      '2013-03-12',
                      '2013-05-14',
                      '2013-10-02',
                      '2013-10-23',
                      '2013-12-05',
                      '2013-12-18',
                      '2014-01-27',
                      '2014-02-07',
                      '2014-02-24',
                      '2014-03-07',
                      '2014-03-26',
                      '2014-04-10',
                      '2014-07-04',
                      '2015-01-04',
                      '2015-02-14',
                      '2015-08-01',
                      '2015-08-15',
                      '2016-01-14',
                      '2016-08-02',
                      '2017-01-11',
                      '2017-02-09',
                      '2017-03-10']

In [21]:
def Table1():
    
    # Create DataFrames for the Large Events
    dfPositive = dfDaily[dfDaily.index.isin(PositiveEventsList)]
    dfNegative = dfDaily[dfDaily.index.isin(NegativeEventsList)]
    
    # Count Total Events, Positive and Negative Events
    EventsCounted = len(Events)
    PositiveEvents = dfDaily[dfDaily.index.isin(PositiveEventsList)].count()['Returns (in %)']
    NegativeEvents = dfDaily[dfDaily.index.isin(NegativeEventsList)].count()['Returns (in %)']

    # Event Day Returns for Positive and Negative Events
    PosEventReturnsMean = dfDaily[dfDaily.index.isin(PositiveEventsList)].mean()['Returns (in %)']
    NegEventReturnsMean = dfDaily[dfDaily.index.isin(NegativeEventsList)].mean()['Returns (in %)']
    PosEventReturnsMedian = dfDaily[dfDaily.index.isin(PositiveEventsList)].median()['Returns (in %)']
    NegEventReturnsMedian = dfDaily[dfDaily.index.isin(NegativeEventsList)].median()['Returns (in %)']
    
    # Selects all Large Positive Events with Return Above 5% and apply Count/Mean/Median 
    LargePositiveEventsCount = dfPositive[dfPositive["Returns (in %)"] >= 5.0].count()['Returns (in %)']
    LargePositiveEventsMean = dfPositive[dfPositive["Returns (in %)"] >= 5.0].mean()['Returns (in %)']
    LargePositiveEventsMedian = dfPositive[dfPositive["Returns (in %)"] >= 5.0].median()['Returns (in %)']

    # Selects all Events with a Return Below -5% and Apply Count/Mean
    LargeNegativeEventsCount = dfNegative[dfNegative["Returns (in %)"] < -5.0].count()['Returns (in %)']
    LargeNegativeEventsMean = dfNegative[dfNegative["Returns (in %)"] < -5.0].mean()['Returns (in %)']
    LargeNegativeEventsMedian = dfNegative[dfNegative["Returns (in %)"] < -5.0].median()['Returns (in %)']
    
    # Gives all Output
    print("======== General ========")
    print("The Number of Events is:", EventsCounted)
    print("The Number of Positive Events is:",PositiveEvents)
    print("The Number of Negative Events is:",NegativeEvents)
    print("The Mean of Positive Event Day Returns is:",round(PosEventReturnsMean, 2),'%')
    print("The Mean of Negative Event Day Returns is:",round(NegEventReturnsMean, 2),'%')
    print("The Median of PositiveEvent Day Returns is:",round(PosEventReturnsMedian, 2),'%')
    print("The Median of NegativeEvent Day Returns is:",round(NegEventReturnsMedian, 2),'%')
    print("")
    print("======== Large Events ========")    
    print("The Number of Large Positive Events is:",round(LargePositiveEventsCount, 2))
    print("The Number of Large Negative Events is:",round(LargeNegativeEventsCount, 2))
    print("The Mean of Positive Large Event Day Returns is:",round(LargePositiveEventsMean, 2),'%')
    print("The Mean of Negative Large Event Day Returns is:",round(LargeNegativeEventsMean, 2),'%')
    print("The Median of Positive Large Event Day Returns is:",round(LargePositiveEventsMedian, 2),'%')
    print("The Median of Negative Large Event Day Returns is:",round(LargeNegativeEventsMedian, 2),'%')
    
Table1()

The Number of Events is: 42
The Number of Positive Events is: 17
The Number of Negative Events is: 25
The Mean of Positive Event Day Returns is: 2.06 %
The Mean of Negative Event Day Returns is: -4.35 %
The Median of PositiveEvent Day Returns is: 0.52 %
The Median of NegativeEvent Day Returns is: -4.11 %

The Number of Large Positive Events is: 4
The Number of Large Negative Events is: 10
The Mean of Positive Large Event Day Returns is: 12.38 %
The Mean of Negative Large Event Day Returns is: -11.06 %
The Median of Positive Large Event Day Returns is: 11.84 %
The Median of Negative Large Event Day Returns is: -9.59 %


## Table 2

<b>Quote that explains how Buy and Sell Volume is Defined:</b> When the data of transactions’ directions and historical bid-ask quotes are unavailable, a common way to discern a trade’s direction is the tick rule (e.g. Bernile et al., 2016). Namely, a trade whose trading price is higher (lower) than the previous trade, will be classified as buyer- (seller-) initiated; a trade whose trading price is the same as the previous trade will be classified as the same type of the previous trade. This rule is shown to perform remarkably well when order book data are unavailable (Lee and Ready, 1991).

In [48]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 
# df.head()

In [49]:
# Create Variable that differentiate Buy and Sell
PriceDiff = df['Price'].diff(periods=-1)

# Create new column that says 'Buy' or 'Sell' based on PriceDiff
df['Order'] = np.where(PriceDiff >= 0, 'Buy', 'Sell')

# Create new column that shows the difference in price of the previous moment in time
df['Difference'] = PriceDiff

# Show Table Head
# df.head()

In [50]:
#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEvents = ['2011-12-19',
                  '2013-03-25',
                  '2013-06-29',
                  '2013-11-18',
                  '2014-01-08',
                  '2014-07-18',
                  '2014-12-11',
                  '2015-01-26',
                  '2015-06-03',
                  '2015-09-22',
                  '2015-10-22',
                  '2015-10-31',
                  '2016-02-24',
                  '2016-04-27',
                  '2016-05-25',
                  '2016-11-29',
                  '2017-03-24']


# Create List with All Negative Events
NegativeEvents = ['2012-03-01', 
                  '2012-08-17',
                  '2012-09-05',
                  '2013-03-12',
                  '2013-05-14',
                  '2013-10-02',
                  '2013-10-23',
                  '2013-12-05',
                  '2013-12-18',
                  '2014-01-27',
                  '2014-02-07',
                  '2014-02-24',
                  '2014-03-07',
                  '2014-03-26',
                  '2014-04-10',
                  '2014-07-04',
                  '2015-01-04',
                  '2015-02-14',
                  '2015-08-01',
                  '2015-08-15',
                  '2016-01-14',
                  '2016-08-02',
                  '2017-01-11',
                  '2017-02-09',
                  '2017-03-10',]

In [51]:
# Create Columns that Display the Volume of Sell and Buy Moments
df['Sell Volume'] = np.where(df['Difference'] < 0, df['Volume'], 0)
df['Buy Volume'] = np.where(df['Difference'] >= 0, df['Volume'], 0)

In [52]:
# Define Function to Select Quantiles
def percentile(n):
    def percentile_(x):
        return np.quantile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [53]:
# Create a new DataFrame Containing Daily Trade Information Based on the 90th Quantile
dfDaily = df.groupby(by='Date').agg({'Price': percentile(.90),
                                    'Volume': percentile(.90),
                                    'Sell Volume': percentile(.90),
                                    'Buy Volume': percentile(.90)})

# Create the Novel Indicator OSI and add New Column
dfDaily['OSI'] = 100 * ((dfDaily['Buy Volume'] - dfDaily['Sell Volume']) / (dfDaily['Buy Volume'] + dfDaily['Sell Volume']))

In [54]:
# Adds a Column that gives False/True if Event is in Index
dfDaily['Events'] = dfDaily.index.isin(Events)
dfDaily['Positive Events'] = dfDaily.index.isin(PositiveEvents)
dfDaily['Negative Events'] = dfDaily.index.isin(NegativeEvents)

In [55]:
# Create Dictionary
EventNum = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42]
Dictionary = dict(zip(EventNum, Events))
# Dictionary

In [87]:
# Create New DataFrame Containing 42 Columns with Buy Volume
dfControlPeriod = dfDaily[[ 'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',]]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,
                           11,12,13,14,15,16,17,18,19,20,
                           21,22,23,24,25,26,27,28,29,30,
                           31,32,33,34,35,36,37,38,39,40,
                           41,42]

# Create Control Window [-15,-5]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 15) : (index - 6), v].reset_index(drop=True)
    new_set[v] = my_set
    
Buy = pd.DataFrame(new_set)
Buy.index = Buy.index - 15

In [62]:
# Create New DataFrame Containing 42 Columns with Sell Volume
dfControlPeriod = dfDaily[[ 'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume']]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,
                           11,12,13,14,15,16,17,18,19,20,
                           21,22,23,24,25,26,27,28,29,30,
                           31,32,33,34,35,36,37,38,39,40,
                           41,42]

# Create Control Window [-15,-5]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 15) : (index - 6), v].reset_index(drop=True)
    new_set[v] = my_set
    
Sell = pd.DataFrame(new_set)
Sell.index = Sell.index - 15

In [63]:
# Create New DataFrame Containing 42 Columns with Sell Volume
dfControlPeriod = dfDaily[[ 'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI']]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,
                           11,12,13,14,15,16,17,18,19,20,
                           21,22,23,24,25,26,27,28,29,30,
                           31,32,33,34,35,36,37,38,39,40,
                           41,42]

# Create Control Window [-15,-5]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 15) : (index - 6), v].reset_index(drop=True)
    new_set[v] = my_set
    
OSI = pd.DataFrame(new_set)
OSI.index = OSI.index - 15

In [89]:
# Create Dictionary
EventNum = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
Dictionary = dict(zip(EventNum, PositiveEvents))
# Dictionary

In [90]:
# Create New DataFrame Containing 17 Columns with Buy Volume
dfControlPeriod = dfDaily[[ 'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',]]
# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]

# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
Buy2 = pd.DataFrame(new_set)
Buy2.index = Buy2.index - 3

In [94]:
# Create New DataFrame Containing 17 Columns with Sell Volume
dfControlPeriod = dfDaily[[ 'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',]]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]

# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
Sell2 = pd.DataFrame(new_set)
Sell2.index = Sell2.index - 3

In [95]:
# Create New DataFrame Containing 17 Columns with Sell Volume
dfControlPeriod = dfDaily[[ 'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',]]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]


# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
OSI2 = pd.DataFrame(new_set)
OSI2.index = OSI2.index - 3

In [96]:
# Create Dictionary
EventNum = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
Dictionary = dict(zip(EventNum, NegativeEvents))
# Dictionary

In [97]:
# Create New DataFrame Containing 42 Columns with Buy Volume
dfControlPeriod = dfDaily[[ 'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume', 
                            'Buy Volume',
                            'Buy Volume', 
                            'Buy Volume', ]]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]

# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
Buy3 = pd.DataFrame(new_set)
Buy3.index = Buy3.index - 3

In [98]:
# Create New DataFrame Containing 42 Columns with Buy Volume
dfControlPeriod = dfDaily[[ 'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume', 
                            'Sell Volume',
                            'Sell Volume', 
                            'Sell Volume']]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]

# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
Sell3 = pd.DataFrame(new_set)
Sell3.index = Sell3.index - 3

In [99]:
# Create New DataFrame Containing 42 Columns with Buy Volume
dfControlPeriod = dfDaily[[ 'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI', 
                            'OSI',
                            'OSI', 
                            'OSI']]

# Reset Index and Rename All Columns
dfControlPeriod = dfControlPeriod.reset_index()
dfControlPeriod.columns = ['Date',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]

# Create Control Window [-3,0]
new_set = {}

for v, Event in Dictionary.items():
    row = dfControlPeriod.loc[dfControlPeriod['Date'] == Event]
    index = row.index[0]
    
    my_set = dfControlPeriod.loc[(index - 3) : (index - 1), v].reset_index(drop=True)
    new_set[v] = my_set
    
OSI3 = pd.DataFrame(new_set)
OSI3.index = OSI3.index - 3

In [100]:
# Function for the Control Period part of Table 2

def Table2ControlPeriod():
    
    # Buy Side
    BuyTotal = len(Buy) * 42
    BuyMean = Buy.mean().mean()
    BuyStd = Buy.std().std()
    BuyMedian = Buy.median().median()
    BuyMin = Buy.min().min()
    BuyMax = Buy.max().max()
    #BuyADF = adfuller(Buy.mean())
    
    # Sell Side
    SellTotal = len(Sell) * 42
    SellMean = Sell.mean().mean()
    SellStd = Sell.std().std()
    SellMedian = Sell.median().median()
    SellMin = Sell.min().min()
    SellMax = Sell.max().max()
    #SellADF = adfuller(Sell.mean())
    
    # OSI Side
    OSITotal = len(OSI) * 42
    OSIMean = OSI.mean().mean()
    OSIStd = OSI.std().std()
    OSIMedian = OSI.median().median()
    OSIMin = OSI.min().min()
    OSIMax = OSI.max().max()
    #OSIADF = adfuller(OSI.mean())

    print("======== Buy ========")
    print("Number of Buy Orders are:",BuyTotal)
    print("The Mean of the Buy Orders is:",round(BuyMean, 4))
    print("The Std of the Buy Orders is:",round(BuyStd, 4))
    print("The Median of the Buy Orders is:",round(BuyMedian, 4))
    print("The Min of the Buy Orders is:",round(BuyMin, 4))
    print("The Max of the Buy Orders is:",round(BuyMax, 4))
    #print("The ADF is: %f" % BuyADF[1])
    print("")
    print("======== Sell ========")
    print("Number of Sell Orders are:",SellTotal)
    print("The Mean of the Sell Orders is:",round(SellMean, 4))
    print("The Std of the Sell Orders is:",round(SellStd, 4))
    print("The Median of the Sell Buy Orders is:",round(SellMedian, 4))
    print("The Min of the Sell Orders is:",round(SellMin, 4))
    print("The Max of the Sell Orders is:",round(SellMax, 4))
    #print("The ADF is: %f" % SellADF[1])
    print("")
    print("======== OSI ========")
    print("Number of OSI Orders are:",OSITotal)
    print("The Mean of the OSI Orders is:",round(OSIMean, 4))
    print("The Std of the OSI Orders is:",round(OSIStd, 4))
    print("The Median of the OSI Buy Orders is:",round(OSIMedian, 4))
    print("The Min of the OSI Orders is:",round(OSIMin, 4))
    print("The Max of the OSI Orders is:",round(OSIMax, 4)) 
    #print("The ADF is: %f" % OSIADF[1]) #[1] is so you grab only the p-value

In [101]:
# Function for the Pre Positive part of Table 2

def Table2PrePositive():
    
    # Buy Side
    BuyTotal = len(Buy2) * 17
    BuyMean = Buy2.mean().mean()
    BuyStd = Buy2.std().std()
    BuyMedian = Buy2.median().median()
    BuyMin = Buy2.min().min()
    BuyMax = Buy2.max().max()
    #BuyADF = adfuller(Buy2.mean())
    
    # Sell Side
    SellTotal = len(Sell2) * 17
    SellMean = Sell2.mean().mean()
    SellStd = Sell2.std().std()
    SellMedian = Sell2.median().median()
    SellMin = Sell2.min().min()
    SellMax = Sell2.max().max()
    #SellADF = adfuller(Sell2.mean())
    
    # OSI Side
    OSITotal = len(OSI2) * 17
    OSIMean = OSI2.mean().mean()
    OSIStd = OSI2.std().std()
    OSIMedian = OSI2.median().median()
    OSIMin = OSI2.min().min()
    OSIMax = OSI2.max().max()
    #OSIADF = adfuller(OSI2.mean())

    print("======== Buy ========")
    print("Number of Buy Orders are:",BuyTotal)
    print("The Mean of the Buy Orders is:",round(BuyMean, 4))
    print("The Std of the Buy Orders is:",round(BuyStd, 4))
    print("The Median of the Buy Orders is:",round(BuyMedian, 4))
    print("The Min of the Buy Orders is:",round(BuyMin, 4))
    print("The Max of the Buy Orders is:",round(BuyMax, 4))
    #print("The ADF is: %f" % BuyADF[1])
    print("")
    print("======== Sell ========")
    print("Number of Sell Orders are:",SellTotal)
    print("The Mean of the Sell Orders is:",round(SellMean, 4))
    print("The Std of the Sell Orders is:",round(SellStd, 4))
    print("The Median of the Sell Buy Orders is:",round(SellMedian, 4))
    print("The Min of the Sell Orders is:",round(SellMin, 4))
    print("The Max of the Sell Orders is:",round(SellMax, 4))
    #print("The ADF is: %f" % SellADF[1])
    print("")
    print("======== OSI ========")
    print("Number of OSI Orders are:",OSITotal)
    print("The Mean of the OSI Orders is:",round(OSIMean, 4))
    print("The Std of the OSI Orders is:",round(OSIStd, 4))
    print("The Median of the OSI Buy Orders is:",round(OSIMedian, 4))
    print("The Min of the OSI Orders is:",round(OSIMin, 4))
    print("The Max of the OSI Orders is:",round(OSIMax, 4)) 
    #print("The ADF is: %f" % OSIADF[1]) #[1] is so you grab only the p-value

In [102]:
# Function for the Pre-Negative Period part of Table 2

def Table2PreNegative():
    
    # Buy Side
    BuyTotal = len(Buy3) * 25
    BuyMean = Buy3.mean().mean()
    BuyStd = Buy3.std().std()
    BuyMedian = Buy3.median().median()
    BuyMin = Buy3.min().min()
    BuyMax = Buy3.max().max()
    #BuyADF = adfuller(Buy3.mean())
    
    # Sell Side
    SellTotal = len(Sell3) * 25
    SellMean = Sell3.mean().mean()
    SellStd = Sell3.std().std()
    SellMedian = Sell3.median().median()
    SellMin = Sell3.min().min()
    SellMax = Sell3.max().max()
    #SellADF = adfuller(Sell3.mean())
    
    # OSI Side
    OSITotal = len(OSI3) * 25
    OSIMean = OSI3.mean().mean()
    OSIStd = OSI3.std().std()
    OSIMedian = OSI3.median().median()
    OSIMin = OSI3.min().min()
    OSIMax = OSI3.max().max()
    #OSIADF = adfuller(OSI3.mean())
    
    print("======== Buy ========")
    print("Number of Buy Orders are:",round(BuyTotal, 4))
    print("The Mean of the Buy Orders is:",round(BuyMean, 4))
    print("The Std of the Buy Orders is:",round(BuyStd, 4))
    print("The Median of the Buy Orders is:",round(BuyMedian, 4))
    print("The Min of the Buy Orders is:",round(BuyMin, 4))
    print("The Max of the Buy Orders is:",round(BuyMax, 4))
    #print("The ADF is: %f" % BuyADF[1])
    print("")
    print("======== Sell ========")
    print("Number of Sell Orders are:",SellTotal)
    print("The Mean of the Sell Orders is:",round(SellMean, 4))
    print("The Std of the Sell Orders is:",round(SellStd, 4))
    print("The Median of the Sell Buy Orders is:",round(SellMedian, 4))
    print("The Min of the Sell Orders is:",round(SellMin, 4))
    print("The Max of the Sell Orders is:",round(SellMax, 4))
    #print("The ADF is: %f" % SellADF[1])
    print("")
    print("======== OSI ========")
    print("Number of OSI Orders are:",OSITotal)
    print("The Mean of the OSI Orders is:",round(OSIMean, 4))
    print("The Std of the OSI Orders is:",round(OSIStd, 4))
    print("The Median of the OSI Buy Orders is:",round(OSIMedian, 4))
    print("The Min of the OSI Orders is:",round(OSIMin, 4))
    print("The Max of the OSI Orders is:",round(OSIMax, 4)) 
    #print("The ADF is: %f" % OSIADF[1]) #[1] is so you grab only the p-value

In [103]:
print(color.BOLD + "Control Period" + color.END)
print("")
Table2ControlPeriod()
print("")
print(color.BOLD + "Pre-Positive Period" + color.END)
print("")
Table2PrePositive()
print("")
print(color.BOLD + "Pre-Negative Period" + color.END)
print("")
Table2PreNegative()

[1mControl Period[0m

Number of Buy Orders are: 420
The Mean of the Buy Orders is: 3.7451
The Std of the Buy Orders is: 1.5579
The Median of the Buy Orders is: 2.0015
The Min of the Buy Orders is: 0.423
The Max of the Buy Orders is: 25.3807

Number of Sell Orders are: 420
The Mean of the Sell Orders is: 1.2989
The Std of the Sell Orders is: 2.099
The Median of the Sell Buy Orders is: 0.7019
The Min of the Sell Orders is: 0.0
The Max of the Sell Orders is: 38.4524

Number of OSI Orders are: 420
The Mean of the OSI Orders is: 54.2603
The Std of the OSI Orders is: 6.8623
The Median of the OSI Buy Orders is: 56.9585
The Min of the OSI Orders is: 0.3346
The Max of the OSI Orders is: 100.0

[1mPre-Positive Period[0m

Number of Buy Orders are: 51
The Mean of the Buy Orders is: 4.3662
The Std of the Buy Orders is: 3.3757
The Median of the Buy Orders is: 2.2506
The Min of the Buy Orders is: 1.0
The Max of the Buy Orders is: 37.8326

Number of Sell Orders are: 51
The Mean of the Sell Orders 

## Table 3

<b>Quote that explains the Table:</b> This table reports the robust ordinary least square regressions of the daily buy-sell order size imbalance ratio (OSI) in Bitstamp, on the event indicators. Panel A examined all 42 selected events, while Panel B examined only 20 large events. A large event is an event which leads to an Bitcoin price appreciation (depreciation) of over 5% (less than −5%) on the positive (negative) event day. The samples consist of the three days in the pre-positive-event windows (Posevent Negevent = = 1, 0), the three days in the pre-negative-event windows (Posevent Negevent = = 0, 1), and the days in the control periods (Posevent Negevent = = 0, 0), namely [d d i i − − 15, 5] or [d d − + − 3, 5], i i 1 whichever is shorter, with [d d i i , 1] + being the ith event day. OSI is the daily buy-sell order size imbalance defined by OSI B S B S q t =× − + 100 ( )/( ) qt qt qt qt , , , ,, , where Bq, t (Sq.t) is the size of the q-quantile orders of all buyer- (seller-) initiated orders at day t, regarding to the order sizes (in BTC). q is set as 90% in this regression. In column (1) of both Panel A and B, the dependent variables are calculated daily in the three-day pre-event window and the in the control days. In column (2)-(4) of both Panel A and B, the dependent variables are calculated daily for a specific pre-event day ([−1,0], [− − 2, 1], or [− − 3, 2]), and each days in the control periods. The robust t-statistics are reported in the parentheses.

In [3]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 
# df.head()

In [4]:
# RE-RUNNING ALL CODE WE HAVE CREATED PREVIOUSLY

# Create Variable that differentiate Buy and Sell
PriceDiff = df['Price'].diff(periods=-1)

# Create new column that says 'Buy' or 'Sell' based on PriceDiff
df['Order'] = np.where(PriceDiff >= 0, 'Buy', 'Sell')

# Create new column that shows the difference in price of the previous moment in time
df['Difference'] = PriceDiff

# Create Columns that Display the Volume of Sell and Buy Moments
df['Sell Volume'] = np.where(df['Difference'] < 0, df['Volume'], 0)
df['Buy Volume'] = np.where(df['Difference'] >= 0, df['Volume'], 0)

# Define Function to Select Quantiles
def percentile(n):
    def percentile_(x):
        return np.quantile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

# Create a new DataFrame Containing Daily Trade Information Based on the 90th Quantile
dfDaily = df.groupby(by='Date').agg({
                                    'Price': percentile(.90)
                                    'Volume': percentile(.90),
                                    'Sell Volume': percentile(.90),
                                    'Buy Volume': percentile(.90)})

# Create the Novel Indicator OSI and add New Column
dfDaily['OSI'] = 100 * ((dfDaily['Buy Volume'] - dfDaily['Sell Volume']) / (dfDaily['Buy Volume'] + dfDaily['Sell Volume']))

#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEvents = ['2011-12-19',
                  '2013-03-25',
                  '2013-06-29',
                  '2013-11-18',
                  '2014-01-08',
                  '2014-07-18',
                  '2014-12-11',
                  '2015-01-26',
                  '2015-06-03',
                  '2015-09-22',
                  '2015-10-22',
                  '2015-10-31',
                  '2016-02-24',
                  '2016-04-27',
                  '2016-05-25',
                  '2016-11-29',
                  '2017-03-24']


# Create List with All Negative Events
NegativeEvents = ['2012-03-01', 
                  '2012-08-17',
                  '2012-09-05',
                  '2013-03-12',
                  '2013-05-14',
                  '2013-10-02',
                  '2013-10-23',
                  '2013-12-05',
                  '2013-12-18',
                  '2014-01-27',
                  '2014-02-07',
                  '2014-02-24',
                  '2014-03-07',
                  '2014-03-26',
                  '2014-04-10',
                  '2014-07-04',
                  '2015-01-04',
                  '2015-02-14',
                  '2015-08-01',
                  '2015-08-15',
                  '2016-01-14',
                  '2016-08-02',
                  '2017-01-11',
                  '2017-02-09',
                  '2017-03-10',]

# Adds a Column that gives False/True if Event is in Index
dfDaily['Events'] = dfDaily.index.isin(Events)
dfDaily['Positive Events'] = dfDaily.index.isin(PositiveEvents)
dfDaily['Negative Events'] = dfDaily.index.isin(NegativeEvents)

In [5]:
# Create new DataFrame for OLS Calculations
dfRegression = dfDaily

# Dropping stuff doesn't change a thing in the regression
dfRegression = dfRegression.drop(['Price','Volume','Sell Volume', 'Buy Volume', 'Events'], axis=1)
dfRegression.head()

Unnamed: 0_level_0,OSI,Positive Events,Negative Events
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-09-13,60.364637,False,False
2011-09-14,35.152317,False,False
2011-09-15,-28.321436,False,False
2011-09-16,69.096736,False,False
2011-09-17,100.0,False,False


In [8]:
def Table3Column1():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table3Column1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        12:59:20                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           54.6482      0.550  

In [10]:
def Table3Column2():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
        
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table3Column2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:00:31                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           54.7141      0.539  

In [35]:
def Table3Column3():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 2
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 2
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Sat, 12 Jan 2019                                         
Time:                        15:55:07                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           54.6912      0.545  

In [11]:
def Table3Column4():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:00:51                                         
No. Iterations:                    13                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           54.7546      0.540  

In [12]:
def Table3Column5():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column5()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:00:58                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [13]:
def Table3Column6():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column6()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:01:25                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [14]:
def Table3Column7():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column7()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:01:31                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [40]:
def Table3Column8():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table3Column8()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Sat, 12 Jan 2019                                         
Time:                        15:55:07                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

## Table 4

### q = 80%

In [15]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 
# df.head()

In [16]:
# RE-RUNNING ALL CODE WE HAVE CREATED PREVIOUSLY

# Create Variable that differentiate Buy and Sell
PriceDiff = df['Price'].diff(periods=-1)

# Create new column that says 'Buy' or 'Sell' based on PriceDiff
df['Order'] = np.where(PriceDiff >= 0, 'Buy', 'Sell')

# Create new column that shows the difference in price of the previous moment in time
df['Difference'] = PriceDiff

# Create Columns that Display the Volume of Sell and Buy Moments
df['Sell Volume'] = np.where(df['Difference'] < 0, df['Volume'], 0)
df['Buy Volume'] = np.where(df['Difference'] >= 0, df['Volume'], 0)

# Define Function to Select Quantiles
def percentile(n):
    def percentile_(x):
        return np.quantile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

# Create a new DataFrame Containing Daily Trade Information Based on the 90th Quantile
dfDaily = df.groupby(by='Date').agg({
                                    'Price':percentile(.80),
                                    'Volume': percentile(.80),
                                    'Sell Volume': percentile(.80),
                                    'Buy Volume': percentile(.80)})

# Create the Novel Indicator OSI and add New Column
dfDaily['OSI'] = 100 * ((dfDaily['Buy Volume'] - dfDaily['Sell Volume']) / (dfDaily['Buy Volume'] + dfDaily['Sell Volume']))

#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEvents = ['2011-12-19',
                  '2013-03-25',
                  '2013-06-29',
                  '2013-11-18',
                  '2014-01-08',
                  '2014-07-18',
                  '2014-12-11',
                  '2015-01-26',
                  '2015-06-03',
                  '2015-09-22',
                  '2015-10-22',
                  '2015-10-31',
                  '2016-02-24',
                  '2016-04-27',
                  '2016-05-25',
                  '2016-11-29',
                  '2017-03-24']

# Create List with All Negative Events
NegativeEvents = ['2012-03-01', 
                  '2012-08-17',
                  '2012-09-05',
                  '2013-03-12',
                  '2013-05-14',
                  '2013-10-02',
                  '2013-10-23',
                  '2013-12-05',
                  '2013-12-18',
                  '2014-01-27',
                  '2014-02-07',
                  '2014-02-24',
                  '2014-03-07',
                  '2014-03-26',
                  '2014-04-10',
                  '2014-07-04',
                  '2015-01-04',
                  '2015-02-14',
                  '2015-08-01',
                  '2015-08-15',
                  '2016-01-14',
                  '2016-08-02',
                  '2017-01-11',
                  '2017-02-09',
                  '2017-03-10',]

# Adds a Column that gives False/True if Event is in Index
dfDaily['Events'] = dfDaily.index.isin(Events)
dfDaily['Positive Events'] = dfDaily.index.isin(PositiveEvents)
dfDaily['Negative Events'] = dfDaily.index.isin(NegativeEvents)

In [17]:
# Create new DataFrame for Robustness Check
dfRegression = dfDaily

# Dropping stuff doesn't change a thing in the regression
dfRegression = dfRegression.drop(['Price','Volume','Sell Volume', 'Buy Volume', 'Events'], axis=1)
dfRegression.head()

Unnamed: 0_level_0,OSI,Positive Events,Negative Events
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-09-13,50.852425,False,False
2011-09-14,8.559292,False,False
2011-09-15,-30.059586,False,False
2011-09-16,75.161694,False,False
2011-09-17,100.0,False,False


In [18]:
def Table4PanelAColumn1():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelAColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    10                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           78.9309      0.428  

In [19]:
def Table4PanelAColumn2():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
        
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelAColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    12                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           78.9450      0.419  

In [20]:
def Table4PanelAColumn3():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelAColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    12                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           78.9619      0.419  

In [21]:
def Table4PanelAColumn4():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelAColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           78.8823      0.419  

In [22]:
def Table4PanelBColumn1():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelBColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    12                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [23]:
def Table4PanelBColumn2():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelBColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:39                                         
No. Iterations:                    17                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [24]:
def Table4PanelBColumn3():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelBColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:40                                         
No. Iterations:                    12                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [25]:
def Table4PanelBColumn4():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelBColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:03:40                                         
No. Iterations:                    13                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

### q = 85%

In [26]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 
# df.head()

In [27]:
# RE-RUNNING ALL CODE WE HAVE CREATED PREVIOUSLY

# Create Variable that differentiate Buy and Sell
PriceDiff = df['Price'].diff(periods=-1)

# Create new column that says 'Buy' or 'Sell' based on PriceDiff
df['Order'] = np.where(PriceDiff >= 0, 'Buy', 'Sell')

# Create new column that shows the difference in price of the previous moment in time
df['Difference'] = PriceDiff

# Create Columns that Display the Volume of Sell and Buy Moments
df['Sell Volume'] = np.where(df['Difference'] < 0, df['Volume'], 0)
df['Buy Volume'] = np.where(df['Difference'] >= 0, df['Volume'], 0)

# Define Function to Select Quantiles
def percentile(n):
    def percentile_(x):
        return np.quantile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

# Create a new DataFrame Containing Daily Trade Information Based on the 90th Quantile
dfDaily = df.groupby(by='Date').agg({
                                    'Price':percentile(.85),
                                    'Volume': percentile(.85),
                                    'Sell Volume': percentile(.85),
                                    'Buy Volume': percentile(.85)})

# Create the Novel Indicator OSI and add New Column
dfDaily['OSI'] = 100 * ((dfDaily['Buy Volume'] - dfDaily['Sell Volume']) / (dfDaily['Buy Volume'] + dfDaily['Sell Volume']))

#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEvents = ['2011-12-19',
                  '2013-03-25',
                  '2013-06-29',
                  '2013-11-18',
                  '2014-01-08',
                  '2014-07-18',
                  '2014-12-11',
                  '2015-01-26',
                  '2015-06-03',
                  '2015-09-22',
                  '2015-10-22',
                  '2015-10-31',
                  '2016-02-24',
                  '2016-04-27',
                  '2016-05-25',
                  '2016-11-29',
                  '2017-03-24']

# Create List with All Negative Events
NegativeEvents = ['2012-03-01', 
                  '2012-08-17',
                  '2012-09-05',
                  '2013-03-12',
                  '2013-05-14',
                  '2013-10-02',
                  '2013-10-23',
                  '2013-12-05',
                  '2013-12-18',
                  '2014-01-27',
                  '2014-02-07',
                  '2014-02-24',
                  '2014-03-07',
                  '2014-03-26',
                  '2014-04-10',
                  '2014-07-04',
                  '2015-01-04',
                  '2015-02-14',
                  '2015-08-01',
                  '2015-08-15',
                  '2016-01-14',
                  '2016-08-02',
                  '2017-01-11',
                  '2017-02-09',
                  '2017-03-10',]

# Adds a Column that gives False/True if Event is in Index
dfDaily['Events'] = dfDaily.index.isin(Events)
dfDaily['Positive Events'] = dfDaily.index.isin(PositiveEvents)
dfDaily['Negative Events'] = dfDaily.index.isin(NegativeEvents)

In [28]:
# Create new DataFrame for Robustness Check
dfRegression = dfDaily

# Dropping stuff doesn't change a thing in the regression
dfRegression = dfRegression.drop(['Price','Volume','Sell Volume', 'Buy Volume', 'Events'], axis=1)
dfRegression.head()

Unnamed: 0_level_0,OSI,Positive Events,Negative Events
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-09-13,54.614601,False,False
2011-09-14,19.741592,False,False
2011-09-15,-29.163581,False,False
2011-09-16,71.704573,False,False
2011-09-17,100.0,False,False


In [29]:
def Table4PanelCColumn1():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelCColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    12                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           66.9951      0.569  

In [30]:
def Table4PanelCColumn2():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
        
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelCColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    14                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           67.0647      0.556  

In [31]:
def Table4PanelCColumn3():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelCColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    12                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           67.0682      0.558  

In [32]:
def Table4PanelCColumn4():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelCColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    12                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           67.0307      0.558  

In [33]:
def Table4PanelDColumn1():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelDColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    14                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [34]:
def Table4PanelDColumn2():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelDColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    12                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [35]:
def Table4PanelDColumn3():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelDColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:48                                         
No. Iterations:                    14                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [36]:
def Table4PanelDColumn4():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelDColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:04:49                                         
No. Iterations:                    12                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

### q = 95%

In [37]:
# Read CSV file and return 'Date' as Index
df = pd.read_csv('ReplicationPaper.csv', parse_dates=True, index_col=0) 
# df.head()

In [38]:
# RE-RUNNING ALL CODE WE HAVE CREATED PREVIOUSLY

# Create Variable that differentiate Buy and Sell
PriceDiff = df['Price'].diff(periods=-1)

# Create new column that says 'Buy' or 'Sell' based on PriceDiff
df['Order'] = np.where(PriceDiff >= 0, 'Buy', 'Sell')

# Create new column that shows the difference in price of the previous moment in time
df['Difference'] = PriceDiff

# Create Columns that Display the Volume of Sell and Buy Moments
df['Sell Volume'] = np.where(df['Difference'] < 0, df['Volume'], 0)
df['Buy Volume'] = np.where(df['Difference'] >= 0, df['Volume'], 0)

# Define Function to Select Quantiles
def percentile(n):
    def percentile_(x):
        return np.quantile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

# Create a new DataFrame Containing Daily Trade Information Based on the 90th Quantile
dfDaily = df.groupby(by='Date').agg({
                                    'Price': percentile(.95),
                                    'Volume': percentile(.95),
                                    'Sell Volume': percentile(.95),
                                    'Buy Volume': percentile(.95)})

# Create the Novel Indicator OSI and add New Column
dfDaily['OSI'] = 100 * ((dfDaily['Buy Volume'] - dfDaily['Sell Volume']) / (dfDaily['Buy Volume'] + dfDaily['Sell Volume']))

#Create List with all the Dates
Events = ['2011-12-19',
          '2012-03-01', 
          '2012-08-17',
          '2012-09-05',
          '2013-03-12',
          '2013-03-25',
          '2013-05-14',
          '2013-06-29',
          '2013-10-02',
          '2013-10-23',
          '2013-11-18',
          '2013-12-05',
          '2013-12-18',
          '2014-01-08',
          '2014-01-27',
          '2014-02-07',
          '2014-02-24',
          '2014-03-07',
          '2014-03-26',
          '2014-04-10',
          '2014-07-04',
          '2014-07-18',
          '2014-12-11',
          '2015-01-04',
          '2015-01-26',
          '2015-02-14',
          '2015-06-03',
          '2015-08-01',
          '2015-08-15',
          '2015-09-22',
          '2015-10-22',
          '2015-10-31',
          '2016-01-14',
          '2016-02-24',
          '2016-04-27',
          '2016-05-25',
          '2016-08-02',
          '2016-11-29',
          '2017-01-11',
          '2017-02-09',
          '2017-03-10',
          '2017-03-24']

# Create List with All Positive Events
PositiveEvents = ['2011-12-19',
                  '2013-03-25',
                  '2013-06-29',
                  '2013-11-18',
                  '2014-01-08',
                  '2014-07-18',
                  '2014-12-11',
                  '2015-01-26',
                  '2015-06-03',
                  '2015-09-22',
                  '2015-10-22',
                  '2015-10-31',
                  '2016-02-24',
                  '2016-04-27',
                  '2016-05-25',
                  '2016-11-29',
                  '2017-03-24']

# Create List with All Negative Events
NegativeEvents = ['2012-03-01', 
                  '2012-08-17',
                  '2012-09-05',
                  '2013-03-12',
                  '2013-05-14',
                  '2013-10-02',
                  '2013-10-23',
                  '2013-12-05',
                  '2013-12-18',
                  '2014-01-27',
                  '2014-02-07',
                  '2014-02-24',
                  '2014-03-07',
                  '2014-03-26',
                  '2014-04-10',
                  '2014-07-04',
                  '2015-01-04',
                  '2015-02-14',
                  '2015-08-01',
                  '2015-08-15',
                  '2016-01-14',
                  '2016-08-02',
                  '2017-01-11',
                  '2017-02-09',
                  '2017-03-10',]

# Adds a Column that gives False/True if Event is in Index
dfDaily['Events'] = dfDaily.index.isin(Events)
dfDaily['Positive Events'] = dfDaily.index.isin(PositiveEvents)
dfDaily['Negative Events'] = dfDaily.index.isin(NegativeEvents)

In [39]:
# Create new DataFrame for Robustness Check
dfRegression = dfDaily

# Dropping stuff doesn't change a thing in the regression
dfRegression = dfRegression.drop(['Price','Volume','Sell Volume', 'Buy Volume', 'Events'], axis=1)
dfRegression.head()

Unnamed: 0_level_0,OSI,Positive Events,Negative Events
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-09-13,65.876124,False,False
2011-09-14,40.259837,False,False
2011-09-15,-27.528436,False,False
2011-09-16,67.059463,False,False
2011-09-17,100.0,False,False


In [40]:
def Table4PanelEColumn1():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelEColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           45.1586      0.487  

In [41]:
def Table4PanelEColumn2():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
        
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()

    print(Regression_Result)

Table4PanelEColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           45.1219      0.477  

In [42]:
def Table4PanelEColumn3():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelEColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           45.1303      0.477  

In [43]:
def Table4PanelEColumn4():
    # Create new DataFrame for the Positive Events
    dfPositive = dfDaily
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['Positive Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Positive'] = dfDaily.index.isin(dfPositive['Date'])

    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['Negative Events']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['Negative'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ Positive + Negative", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelEColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept           45.0734      0.476  

In [44]:
def Table4PanelFColumn1():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Pre-Event Window [-3, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelFColumn1()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [45]:
def Table4PanelFColumn2():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-1, 0]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 1
    num_after = -1
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelFColumn2()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    14                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [46]:
def Table4PanelFColumn3():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-2, -1]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 2
    num_after = -2
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelFColumn3()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [47]:
def Table4PanelFColumn4():
    # Create List of Large Positive Events
    LargePositiveEvents = ['2011-12-19',
                           '2012-08-17',
                           '2013-03-25',
                           '2013-11-18',
                           '2015-01-26',
                           '2015-02-15']
    
    # Create new DataFrame for the Negative Events
    dfPositive = dfDaily
    dfPositive['LargePositiveEvents'] = dfDaily.index.isin(LargePositiveEvents)  
    dfPositive = dfPositive.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfPositive[dfPositive['LargePositiveEvents']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfPositive = dfPositive.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargePositiveEvents'] = dfDaily.index.isin(dfPositive['Date'])
    
    # Create List of Large Negative Events
    LargeNegativeEvents = ['2013-03-12',
                           '2013-10-02',
                           '2013-12-05',
                           '2013-12-18',
                           '2014-01-08',
                           '2014-02-07',
                           '2014-02-24',
                           '2014-04-10',
                           '2015-01-04',
                           '2016-08-02',
                           '2017-01-11',
                           '2017-03-24']
    
    # Create new DataFrame for the Negative Events
    dfNegative = dfDaily
    dfNegative['LargeNegativeEvents'] = dfDaily.index.isin(LargeNegativeEvents)  
    dfNegative = dfNegative.reset_index()

    # Create the Zoom in Pre-Event Window [-3, -2]
    ranges = dfNegative[dfNegative['LargeNegativeEvents']].index.values
    num_before = 3
    num_after = -3
    indexes = [range(x-num_before, x+num_after+1) for x in ranges]
    x = [list(rang) for rang in indexes]
    i = np.array(x).reshape(-1)

    # Locate All Rows that Fall in Above's Window
    dfNegative = dfNegative.iloc[i]

    # Create Column that Gives True if in Estimation Window
    dfRegression['LargeNegativeEvents'] = dfDaily.index.isin(dfNegative['Date'])
    
    model = smf.rlm(formula = "OSI ~ LargePositiveEvents + LargeNegativeEvents", data=dfRegression)
    fit = model.fit()
    Regression_Result = fit.summary()
    
    print(Regression_Result)
    
Table4PanelFColumn4()

                    Robust linear Model Regression Results                    
Dep. Variable:                    OSI   No. Observations:                 2114
Model:                            RLM   Df Residuals:                     2111
Method:                          IRLS   Df Model:                            2
Norm:                          HuberT                                         
Scale Est.:                       mad                                         
Cov Type:                          H1                                         
Date:                Mon, 14 Jan 2019                                         
Time:                        13:05:57                                         
No. Iterations:                    11                                         
                                  coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         