In [1]:
############### Import packages
import os, numpy as np, pandas as pd, time, glob, re, math, statsmodels.api as sm, patsy as ps
from tqdm import tqdm
from time import process_time
from datetime import datetime
from datetime import date
from openpyxl import load_workbook
from patsy import dmatrices
from scipy.stats import skew

##########################################################
##################### parameter ##########################
##########################################################
obj_type = '10-Q'
data_type_text = 'text_data'
data_type_id = 'id_data'

############### Set working directory to parent directory
if os.getcwd() != 'F:\\github\\narrative_conservatism\\code':
    os.chdir('F:\\github\\narrative_conservatism\\code')
    
############### Set pandas column printing constraint
pd.set_option('display.max_columns', None)

In [2]:
# ##################### Append a variable to the raw dataset ###############################
# comp_xrdq_cols = ['gvkey', 'datadate', 'tic']
# comp_xrdq = pd.read_csv(r'C:\Users\fengzhi\Desktop\ZFZGPK4WUK5PMUOD.csv', usecols = comp_xrdq_cols)
# # comp_xrdq = comp_vol[comp_vol.duplicated(subset=['PERMNO', 'PERMCO', 'date']) == False]
# # comp_cols = ['PERMNO','date', 'PERMCO', 'CUSIP', 'RET', 'vwretd']
# comp = pd.read_csv('..\\filings\\compustat.csv')
# comp = pd.merge(comp, comp_xrdq, on = ['gvkey', 'datadate'], how='left', validate = '1:1')
# comp = comp.drop(columns=['saley'])
# comp.to_csv('..\\filings\\compustat.csv', index = 0)
# comp

In [3]:
########### Read CRSP raw data files
crsp_cols = ['date', 'PERMCO', 'CUSIP', 'RET', 'vwretd', 'VOL']
crsp = pd.read_csv('..\\filings\\crsp.csv', usecols = crsp_cols)
crsp.columns = ['date', 'permco', 'cusip', 'ret', 'vwretd', 'vol']

### Delete the two 'day digits' of crsp filings' data date and create the date_key
crsp['date_key'] = crsp['date'].astype(str).str[:-2]

### Drop CRSP raw rows that contains non-numeric returns ('B' and 'C'), fill NA with 0
print('number of monthly data in CRSP: ' + str(len(crsp.index)))
crsp = crsp[(crsp['ret'] != 'B') & (crsp['ret'] != 'C')]
crsp['ret'] = crsp['ret'].fillna(0)
print('number of monthly data in CRSP that contains only numeric returns: ' + str(len(crsp.index)))

### Mutate adjusted monthly returns and delete ret and vwretd
crsp = crsp.assign(adj_ret_m = crsp['ret'].astype(float) - crsp['vwretd'].astype(float))
crsp = crsp.drop(columns=['ret', 'vwretd'])

### Mutate age: number of days elapsed since the first entry date of the firm into CRSP monthly database
crsp['date'] = crsp['date'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
birth = crsp.groupby(['permco'])['date'].min()
crsp = pd.merge(crsp, birth, on = ['permco'], how='left', validate = 'm:1')
crsp = crsp.rename(columns={'date_x': 'date', 'date_y': 'birth'})
crsp['age'] = (crsp['date'] - crsp['birth']).dt.days

crsp

number of monthly data in CRSP: 4606907
number of monthly data in CRSP that contains only numeric returns: 4511394


Unnamed: 0,date,permco,cusip,vol,date_key,adj_ret_m,birth,age
0,1985-12-31,7952,68391610,,198512,-0.043061,1985-12-31,0
1,1986-02-28,7952,68391610,828.0,198602,-0.329643,1985-12-31,59
2,1986-03-31,7952,68391610,1078.0,198603,0.311500,1985-12-31,90
3,1986-04-30,7952,68391610,957.0,198604,-0.090689,1985-12-31,120
4,1986-05-30,7952,68391610,1074.0,198605,-0.273500,1985-12-31,150
...,...,...,...,...,...,...,...,...
4511389,2019-08-30,53453,88160R10,1340932.0,201908,-0.045951,2010-05-28,3381
4511390,2019-09-30,53453,88160R10,1365915.0,201909,0.051615,2010-05-28,3412
4511391,2019-10-31,53453,88160R10,2351125.0,201910,0.288162,2010-05-28,3443
4511392,2019-11-29,53453,88160R10,1578851.0,201911,0.012724,2010-05-28,3472


In [4]:
########################################################################################
############ Merge COMPUSTAT quarterly data with CRSP monthly data #####################
########################################################################################

########### Read compustat raw data files
comp_cols = ['gvkey', 'datadate', 'fyearq', 'fqtr', 'fyr', 'cusip', 'conm', 'actq', \
             'atq', 'aqcy', 'ceqq', 'cheq', 'cshoq', 'dlcq', 'dlttq', 'dpq', 'ibq', 'intanq', 'lctq', 'ppegtq', 'rectq', 'sstky', 'saleq', 'xrdq', \
             'revtq', 'txditcq', 'xsgaq', 'iby', 'oancfy', 'xidocy', 'exchg', 'cik', 'costat', 'prccq', 'addzip', 'incorp', 'sic', 'ipodate']
comp = pd.read_csv('..\\filings\\compustat.csv', usecols = comp_cols)

### Reorder compustat column
# 1st line: merge keys
# 2nd line: extra id info
# 3rd line: financial data
# 4th line: financial data (CONT.)
comp = comp[['cusip', 'cik', 'datadate', \
'gvkey', 'conm', 'sic', 'incorp', 'addzip', 'fyearq', 'fqtr', 'fyr', 'ipodate', 'costat', 'exchg', \
'actq', 'atq', 'aqcy', 'ceqq', 'cheq', 'cshoq', 'dlcq', 'dlttq', 'dpq', 'ibq', 'intanq', 'lctq', 'revtq', 'sstky', 'txditcq', 'xsgaq', 'oancfy', 'prccq', \
'iby', 'xidocy', 'rectq', 'ppegtq', 'saleq', 'xrdq']]

print('number of quarterly filings in Compustat: ' + str(comp.shape[0]))

### Creat lagged variables in compustat raw data
comp['lag_prccq'] = comp.groupby(['gvkey'])['prccq'].shift(1)
comp['lag_cshoq'] = comp.groupby(['gvkey'])['cshoq'].shift(1)
comp['lag_ceqq'] = comp.groupby(['gvkey'])['ceqq'].shift(1)
comp['lag_dlcq'] = comp.groupby(['gvkey'])['dlcq'].shift(1)
comp['lag_dlttq'] = comp.groupby(['gvkey'])['dlttq'].shift(1)
comp['lag_atq'] = comp.groupby(['gvkey'])['atq'].shift(1)
comp['lag_ibq'] = comp.groupby(['gvkey'])['ibq'].shift(1)
comp['lag_revtq'] = comp.groupby(['gvkey'])['revtq'].shift(1)
comp['lag_rectq'] = comp.groupby(['gvkey'])['rectq'].shift(1)
comp['lag_oancfy'] = comp.groupby(['gvkey'])['oancfy'].shift(1)
comp['lag_xidocy'] = comp.groupby(['gvkey'])['xidocy'].shift(1)
comp['lag_saleq'] = comp.groupby(['gvkey'])['saleq'].shift(1)
comp['lag2_saleq'] = comp.groupby(['gvkey'])['saleq'].shift(2)

####################### Create ABTONE variables for Huang et al. 2014
### EARN: earnings before extraordinary items (Compustat data item ibq) scaled by lagged total assets (Compustat data item atq)
comp['EARN'] = comp['ibq']/comp['lag_atq']
### LOSS, an indicator variable set to 1 when EARN is negative, and is 0 otherwise
comp['LOSS'] = 0 
comp.loc[comp['EARN'] < 0, 'LOSS'] = 1
### DEARN: change in earnings before extraordinary item scaled by beginning total assets (Compustat data item atq)
comp['DEARN'] = (comp['ibq'] - comp['lag_ibq'])/comp['lag_atq']
### STD_EARN: standard deviation of EARN calculated over the last five quarters
comp['STD_EARN'] = comp['EARN'].rolling(5).std()
### CFO: quarterly operating cash flows (Compustat data item oancfy) scaled by beginning total assets (Compustat data item atq);
comp['CFO'] = (comp['oancfy'] - comp['lag_oancfy'])/comp['lag_atq']
# ### TACC: total accruals, defined as quarterly income before extraordinary items (Compustat data item ibq) minus \
# ### the difference between quarterly operating cash flows (Compustat data item oancfy) and \
# ### quarterly extraordinary items and discontinued operations included in CFO (Compustat data item xidocy);
# comp['TACC'] = comp['ibq'] - ((comp['oancfy']-comp['lag_oancfy']) - (comp['xidocy'] - comp['lag_xidocy']))
# ### TA: total assets, scaled by lagged total assets (Compustat data item atq);
# comp['TA'] = comp['atq']/comp['lag_atq']
# comp['LAG_TA'] = comp.groupby(['gvkey'])['TA'].shift(1)
# comp['LAG_TA_REV'] = 1/comp['LAG_TA']
# ### DSALES: quarterly change in revenue (Compustat data item revtq), scaled by lagged total assets (Compustat data item atq);
# comp['DSALES'] = (comp['revtq'] - comp['lag_revtq'])/comp['lag_atq']
# ### DAR: quarterly change in accounts receivable (Compustat data item rectq), scaled by lagged total assets (Compustat data item atq);
# comp['DAR'] = (comp['rectq'] - comp['lag_rectq'])/comp['lag_atq']
# ### DSAR = DSALES - DAR
# comp['DSAR'] = comp['DSALES'] - comp['DAR']
### PPE: gross property, plant, and equipment (Compustat data item ppegtq), scaled by lagged total assets (Compustat data item atq);
comp['PPE'] = comp['ppegtq']/comp['lag_atq']

################################## Create variables for measuring litigation risk (Kim and Skinner 2012)
### SG: Year t-1 sales (Compustat data item saleq) less year t-2 sales scaled by beginning of year t-1 total assets (Compustat data item atq)
comp['LAG_SG'] = (comp['lag_saleq'] - comp['lag2_saleq'])/comp['lag_atq']
comp['SG'] = (comp['saleq'] - comp['lag_saleq'])/comp['atq']

### leap1_EARN, leap2_EARN, leap3_EARN and leap1_CFO, leap2_CFO, leap3_CFO for Huang et al. 2014 TABLE 4 replication
# comp['leap1_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-1)
# comp['leap2_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-2)
# comp['leap3_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-3)

comp['lag1_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(1)
comp['lag2_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(2)
comp['lag3_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(3)
# comp['leap1_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-1)
# comp['leap2_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-2)
# comp['leap3_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-3)

### Delete the 9th digit of compustat filings' cusip, and filter filings that have 8-digits cusip after deletion 
comp['cusip'] = comp['cusip'].astype(str).str[:-1]
comp = comp.loc[comp['cusip'].str.len() == 8]
print('number of quarterly filings in Compustat after deleting non-8-digits cusips: ' + str(comp.shape[0]))

### Delete the two 'day digits' of compustat filings' data date and create the date_key
comp['date_key'] = comp['datadate'].astype(str).str[:-2]
### Create first 2-digits SIC:SIC2, first 3-digits SIC:SIC3
comp['SIC2'] = comp['sic'].astype(str).str[:-2]

comp.shape

number of quarterly filings in Compustat: 1189346
number of quarterly filings in Compustat after deleting non-8-digits cusips: 1188941


(1188941, 64)

In [5]:
##################### Left merge CRSP and Compstat, key not unique in Compustat
crsp_comp = pd.merge(crsp, comp, on = ['cusip', 'date_key'], how='left', validate = '1:m')
crsp_comp

Unnamed: 0,date,permco,cusip,vol,date_key,adj_ret_m,birth,age,cik,datadate,gvkey,conm,sic,incorp,addzip,fyearq,fqtr,fyr,ipodate,costat,exchg,actq,atq,aqcy,ceqq,cheq,cshoq,dlcq,dlttq,dpq,ibq,intanq,lctq,revtq,sstky,txditcq,xsgaq,oancfy,prccq,iby,xidocy,rectq,ppegtq,saleq,xrdq,lag_prccq,lag_cshoq,lag_ceqq,lag_dlcq,lag_dlttq,lag_atq,lag_ibq,lag_revtq,lag_rectq,lag_oancfy,lag_xidocy,lag_saleq,lag2_saleq,EARN,LOSS,DEARN,STD_EARN,CFO,PPE,LAG_SG,SG,lag1_CFO,lag2_CFO,lag3_CFO,SIC2
0,1985-12-31,7952,68391610,,198512,-0.043061,1985-12-31,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1986-02-28,7952,68391610,828.0,198602,-0.329643,1985-12-31,59,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1986-03-31,7952,68391610,1078.0,198603,0.311500,1985-12-31,90,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1986-04-30,7952,68391610,957.0,198604,-0.090689,1985-12-31,120,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1986-05-30,7952,68391610,1074.0,198605,-0.273500,1985-12-31,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4534180,2019-08-30,53453,88160R10,1340932.0,201908,-0.045951,2010-05-28,3381,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4534181,2019-09-30,53453,88160R10,1365915.0,201909,0.051615,2010-05-28,3412,1318605.0,20190930.0,184996.0,TESLA INC,3711.0,DE,94304,2019.0,3.0,12.0,20100629.0,A,14.0,10940.0,32795.0,45.0,6040.0,5571.0,180.0,2253.0,12383.0,530.851,143.469,537.0,10146.0,6302.86,1015.0,0.0,929.738,980.0,240.87,-967.0,0.0,1128.0,24453.0,6302.86,333.954,223.46,179.118,5715.393,2011.177,12309.747,31872.597,-408.334,6349.676,1147.1,224.0,0.0,6349.676,4541.464,0.004501,0.0,0.017313,0.014702,0.023719,0.767211,0.056732,-0.001428,0.029870,-0.092046,0.042189,37
4534182,2019-10-31,53453,88160R10,2351125.0,201910,0.288162,2010-05-28,3443,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4534183,2019-11-29,53453,88160R10,1578851.0,201911,0.012724,2010-05-28,3472,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
########## Aggregate the monthly returns in CRSP to quarterly returns by summing up 3-months returns in a quarter
########## STD_RET: Standard deviation of RET over all months in the quarter;
adj_ret = list()
std_ret = list()
turnover = list()
skew_ret = list()
gvkey = crsp_comp['gvkey'].values.tolist()
adj_ret_m = crsp_comp['adj_ret_m'].values.tolist()
vol_m = crsp_comp['vol'].values.tolist()

for index, value in enumerate(gvkey):
    ### requires a quarter-end flag to be non-zero, and the quarter-end flag of previous two months to be zero (avoid time-slot mismatch)
    if math.isnan(value) == False and math.isnan(gvkey[index-1]) == True and math.isnan(gvkey[index-2]) == True:
        adj_ret_i = adj_ret_m[index] + adj_ret_m[index-1] + adj_ret_m[index-2]
        turnover_i = vol_m[index] + vol_m[index-1] + vol_m[index-2]
        std_ret_i = np.std([adj_ret_m[index], adj_ret_m[index-1], adj_ret_m[index-2]])
        skew_ret_i = skew([adj_ret_m[index], adj_ret_m[index-1], adj_ret_m[index-2]])
    else:
        adj_ret_i = float('NaN')
        turnover_i = float('NaN')
        std_ret_i = float('NaN')
        skew_ret_i = float('NaN')
    adj_ret.append(adj_ret_i)
    turnover.append(turnover_i)
    std_ret.append(std_ret_i)
    skew_ret.append(skew_ret_i)

crsp_comp = crsp_comp.assign(RET = adj_ret)
crsp_comp = crsp_comp.assign(TURNOVER = turnover)
crsp_comp = crsp_comp.assign(STD_RET = std_ret)
crsp_comp = crsp_comp.assign(SKEW_RET = skew_ret)

########## Delete adj_ret_m column, and any rows that contains missing values of gvkey or RET
# crsp_comp = crsp_comp.drop(columns=['adj_ret_m'])
crsp_comp = crsp_comp[crsp_comp['gvkey'].notnull()]
crsp_comp = crsp_comp[crsp_comp['RET'].notnull()]

##### Generate LAG1_RET, LAG2_RET, LAG3_RET
crsp_comp['LAG1_RET'] = crsp_comp.groupby(['gvkey'])['RET'].shift(1)
crsp_comp['LAG2_RET'] = crsp_comp.groupby(['gvkey'])['RET'].shift(2)
crsp_comp['LAG3_RET'] = crsp_comp.groupby(['gvkey'])['RET'].shift(3)

##### Delete last two digit (.0) of datadate
crsp_comp['datadate'] = crsp_comp['datadate'].astype(str).str[:-2]

############# Save merged CRSP_COMP dataframe into local file crsp_comp_10-Q.csv
crsp_comp.to_csv('..\\filings\\crsp_comp_' + obj_type + '.csv', index = 0)

################# Inspect crsp_comp 
crsp_comp

In [8]:
########################################################################################
############ Concatenate and prepare merge: ID_DATA and TEXT_DATA ######################
########################################################################################

############## Define a function to concatenate all csv files with file name that matches a certain pattern into one data frame
def concatenate (indir, file_name_match):
    os.chdir(indir)
    file_list = glob.glob(file_name_match)
    df_list = list()
    colnames = pd.read_csv(file_list[0], header = None).loc[0]
    
    for filename in file_list:
        # print(filename)
        df = pd.read_csv(filename, low_memory = False)
        df_list.append(df)

    df_concat = pd.concat(df_list, axis = 0)
    df_concat.columns = colnames
    return df_concat

############## Concatenate id_data and text_data files and create two data frames
id_data = concatenate('..\\filings', data_type_id + '_'+ obj_type + '_' + '*.csv')
text_data = concatenate('..\\filings', data_type_text + '_'+ obj_type + '_' + '*.csv')
############## Save id_data dataframe into local file id_data_10-Q.csv
id_data.to_csv('..\\filings\\' + data_type_id + '_'+ obj_type + '.csv', index = 0)

############## text_data modifications #####################
############## Calculate tone : tone = (n_pos - n_negation - n_neg)/nw
text_data['tone'] = (text_data['n_pos'] - text_data['n_negation'] - text_data['n_neg'])/text_data['nw']
text_data['tone_gi'] = (text_data['n_pos_gi'] - text_data['n_negation'] - text_data['n_neg_gi'])/text_data['nw']
text_data['tone_he'] = (text_data['n_pos_he'] - text_data['n_negation'] - text_data['n_neg_he'])/text_data['nw']

############## Correct modal words labels in text_data
text_data = text_data.rename(columns={'n_modal_weak': 'n_modal_strong', 'n_modal_strong': 'n_modal_weak'})
############## Save text_data dataframe into local file text_data_10-Q.csv
text_data.to_csv('..\\filings\\' + data_type_text + '_'+ obj_type + '.csv', index = 0)

print('Number of ' + obj_type + ' in edgar from 1993 Q1 to 2020 Q1: ' + str(len(id_data.index)))
print('Number of ' + obj_type + ' parsed and downloaded: ' + str(len(text_data.index)))

########################################################################################
######################### Merge ID_DATA with TEXT_DATA #################################
########################################################################################

############## Left merge ID_DATA and TEXT_DATA, key not unique in left data sets
edgar = pd.merge(id_data, text_data, on = ['accnum'], how = 'inner', validate = 'm:1')

edgar = edgar.sort_values(by = ['cik', 'rp'])

########## Convert date variables to date format
edgar['fd'] = pd.to_datetime(edgar['fd'], format='%Y%m%d')
edgar['rp'] = pd.to_datetime(edgar['rp'], format='%Y%m%d')

############################## Create Textual Variables ##########################################
######## NW: natural log of 1 + total number of words in the document
edgar['NW'] = np.log(1 + edgar['nw'])

######## TONE: number of net positive words (n_pos - n_neg - n_negations) per 1000 total words
edgar['TONE'] = edgar['tone']*1000
edgar['TONE_GI'] = edgar['tone_gi']*1000
edgar['TONE_HE'] = edgar['tone_he']*1000

######## TLAG: Time lag between the news release date (CRSP date) and document filing date (EDGAR filing date)
edgar['TLAG'] = (edgar['fd'] - edgar['rp']).dt.days

edgar['LAG1_NW'] = edgar.groupby(['cik'])['NW'].shift(1)
edgar['LAG2_NW'] = edgar.groupby(['cik'])['NW'].shift(2)
edgar['LAG3_NW'] = edgar.groupby(['cik'])['NW'].shift(3)
edgar['LAG1_TONE'] = edgar.groupby(['cik'])['TONE'].shift(1)
edgar['LAG2_TONE'] = edgar.groupby(['cik'])['TONE'].shift(2)
edgar['LAG3_TONE'] = edgar.groupby(['cik'])['TONE'].shift(3)
edgar['LAG1_TLAG'] = edgar.groupby(['cik'])['TLAG'].shift(1)
edgar['LAG2_TLAG'] = edgar.groupby(['cik'])['TLAG'].shift(2)
edgar['LAG3_TLAG'] = edgar.groupby(['cik'])['TLAG'].shift(3)

############## prepare merge: edgar
edgar['fd'] = edgar['fd'].str.replace('-', '')
edgar['rp'] = edgar['rp'].str.replace('-', '')
edgar['date_key'] = edgar['rp'].astype(str).str[:-2]

Number of 10-Q in edgar from 1993 Q1 to 2020 Q1: 594017
Number of 10-Q parsed and downloaded: 575579


In [11]:
########################################################################################
######################## Merge EDGAR with CRSP_COMPUSTAT ###############################
########################################################################################

############## Left merge ID_DATA and CRSP_COMP, key not unique in both data sets
crsp_comp_edgar = pd.merge(edgar, crsp_comp, on = ['cik', 'date_key'], how = 'inner', validate = 'm:m')

### Drop duplicated rows in accnum (EDGAR) and cik-rp (COMPUSTAT), and rename sic, date_key, date and datadate
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar.duplicated('accnum') == False]
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar.duplicated(subset=['cik', 'rp']) == False]
crsp_comp_edgar = crsp_comp_edgar.rename(columns={'sic_y': 'SIC', 'date_key': 'cquarter', 'date': 'date_crsp', 'datadate': 'date_comp'})

### Reorder crsp_comp_edgar columns
# 1st line: merge keys
# 2nd line: extra id info
# 3rd line: financial raw data (not lagged variables)
# 4th line: financial raw data (lagged variables)
# 5th line: ready-to-use regression variables
# 6th line: ready-to-use regressional lag variables
crsp_comp_edgar = crsp_comp_edgar[['cusip', 'cik', 'rp', 'accnum', \
'name', 'gvkey', 'SIC', 'fd', 'date_crsp', 'date_comp', 'cquarter', 'fyearq', 'fqtr', 'incorp', 'state', 'addzip', 'costat', 'age', \
'actq', 'aqcy', 'cheq', 'dpq', 'ibq', 'intanq', 'lctq', 'revtq', 'txditcq', 'xsgaq', 'sstky', 'xrdq',\
'atq', 'lag_atq', 'ceqq', 'lag_ceqq', 'cshoq', 'lag_cshoq', 'dlcq', 'lag_dlcq', 'dlttq', 'lag_dlttq', 'prccq', 'lag_prccq', 'ibq', 'lag_ibq', \
'nw', 'NW', 'TONE', 'TONE_GI', 'TONE_HE', 'TLAG', 'RET', 'STD_RET', 'EARN', 'LOSS', 'DEARN', 'STD_EARN', 'CFO', 'lag1_CFO', 'lag2_CFO', 'lag3_CFO', 'PPE', 'SIC2', 'SG', 'LAG_SG', 'SKEW_RET', 'TURNOVER', \
'LAG1_RET', 'LAG2_RET', 'LAG3_RET', 'LAG1_NW', 'LAG2_NW', 'LAG3_NW', 'LAG1_TONE', 'LAG2_TONE', 'LAG3_TONE', 'LAG1_TLAG', 'LAG2_TLAG', 'LAG3_TLAG']]

################## Inspect crsp_comp_edgar 
print('number of observations after merging with edgar data: ' + str(crsp_comp_edgar.shape[0]))
crsp_comp_edgar

number of observations after merging with edgar data: 303034


Unnamed: 0,cusip,cik,rp,accnum,name,gvkey,SIC,fd,date_crsp,date_comp,cquarter,fyearq,fqtr,incorp,state,addzip,costat,age,actq,aqcy,cheq,dpq,ibq,intanq,lctq,revtq,txditcq,xsgaq,sstky,xrdq,atq,lag_atq,ceqq,lag_ceqq,cshoq,lag_cshoq,dlcq,lag_dlcq,dlttq,lag_dlttq,prccq,lag_prccq,ibq.1,lag_ibq,NW,TONE,TONE_GI,TONE_HE,TLAG,RET,STD_RET,EARN,LOSS,DEARN,STD_EARN,CFO,lag1_CFO,lag2_CFO,lag3_CFO,PPE,SIC2,SG,LAG_SG,SKEW_RET,TURNOVER,LAG1_RET,LAG2_RET,LAG3_RET,LAG1_NW,LAG2_NW,LAG3_NW,LAG1_TONE,LAG2_TONE,LAG3_TONE,LAG1_TLAG,LAG2_TLAG,LAG3_TLAG
0,48273010,20,1995-09-30,0000893220-95-000710,K TRON INTERNATIONAL INC,6314.0,3823.0,1995-11-03,1995-09-29,19950930,199509,1995.0,3.0,NJ,NJ,08071-0888,I,5418,48.464,0.000,2.507,0.974,0.375,,63.869,22.297,0.199,7.615,0.074,0.535,75.132,76.265,8.439,8.152,3.104,3.104,41.114,42.890,0.189,0.225,5.5000,5.50,0.375,-8.950,7.926964,-10.108303,21.299639,7.581227,34.0,-0.171164,0.038383,0.004917,0.0,0.122271,0.034862,0.033764,0.006208,-0.008558,0.014989,,38,-0.142216,0.032505,0.059820,910.0,-0.079184,-0.583745,-0.030765,7.820440,9.328123,,-12.449799,-14.845764,,45.0,45.0,
1,48273010,20,1996-03-30,0000893220-96-000686,K TRON INTERNATIONAL INC,6314.0,3823.0,1996-05-02,1996-03-29,19960331,199603,1996.0,1.0,NJ,NJ,08071-0888,I,5600,42.388,0.000,2.450,0.796,0.752,,26.358,23.579,0.466,8.125,0.000,0.613,65.438,69.296,9.897,9.421,3.113,3.113,7.882,2.133,26.593,35.004,7.7500,6.25,0.752,0.561,7.869019,-8.033665,22.953328,8.798776,33.0,0.197571,0.156629,0.010852,0.0,0.002756,0.034785,-0.068547,0.048288,0.033764,0.006208,0.633096,38,-0.015786,0.033407,0.524379,2519.0,0.118911,-0.171164,-0.079184,7.926964,7.820440,9.328123,-10.108303,-12.449799,-14.845764,34.0,45.0,45.0
2,48273010,20,1996-06-29,0000893220-96-001241,K TRON INTERNATIONAL INC,6314.0,3823.0,1996-07-26,1996-06-28,19960630,199606,1996.0,2.0,NJ,NJ,08071-0888,I,5691,39.157,0.000,3.325,0.771,0.928,,45.448,21.880,0.466,8.025,0.070,0.619,61.142,65.438,10.592,9.897,3.127,3.113,27.322,7.882,2.725,26.593,8.5000,7.75,0.928,0.752,8.053887,-7.949126,21.939587,4.133545,27.0,0.053657,0.054066,0.014181,0.0,0.002690,0.036744,0.070127,-0.068547,0.048288,0.033764,0.656346,38,-0.027788,-0.015786,-0.518552,3788.0,0.197571,0.118911,-0.171164,7.869019,7.926964,7.820440,-8.033665,-10.108303,-12.449799,33.0,34.0,45.0
3,48273010,20,1996-09-28,0000893220-96-001772,K TRON INTERNATIONAL INC,6314.0,3823.0,1996-10-30,1996-09-30,19960930,199609,1996.0,3.0,NJ,NJ,08071-0888,I,5785,37.346,0.000,2.253,0.894,1.161,,42.412,22.547,0.466,7.668,0.070,0.523,59.275,61.142,11.824,10.592,3.127,3.127,23.326,27.322,2.772,2.725,9.2500,8.50,1.161,0.928,9.420277,-9.809485,22.294285,5.755979,32.0,0.055580,0.021281,0.018989,0.0,0.003811,0.005546,0.060155,0.070127,-0.068547,0.048288,0.722482,38,0.011253,-0.027788,-0.546126,5791.0,0.053657,0.197571,0.118911,8.053887,7.869019,7.926964,-7.949126,-8.033665,-10.108303,27.0,33.0,34.0
4,48273010,20,1997-03-29,0000893220-97-000850,K TRON INTERNATIONAL INC,6314.0,3823.0,1997-04-30,1997-03-31,19970331,199703,1997.0,1.0,NJ,NJ,08071-0888,I,5967,33.912,0.000,4.426,0.770,1.050,,19.139,21.344,0.459,7.684,0.050,0.722,53.037,55.330,13.770,13.194,3.143,3.137,0.325,0.861,18.316,20.807,10.3750,10.25,1.050,1.185,7.471363,-2.277904,21.640091,-3.416856,32.0,0.006413,0.006035,0.018977,0.0,-0.002440,0.003929,-0.189301,0.073235,0.060155,0.070127,0.732171,38,-0.009823,-0.012326,0.247687,3587.0,0.041001,0.055580,0.053657,9.420277,8.053887,7.869019,-9.809485,-7.949126,-8.033665,32.0,27.0,33.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306668,73931J10,1774170,2019-09-30,0001493152-19-017037,"PowerFleet, Inc.",121759.0,3812.0,2019-11-13,2019-09-30,20190930,201909,2019.0,3.0,DE,DE,07677,A,7367,37.089,4.350,5.867,0.447,-2.099,15.654,25.527,16.043,0.000,8.145,0.177,1.824,63.278,60.412,27.610,27.860,18.597,18.425,0.849,0.831,1.122,1.341,5.4703,5.88,-2.099,-2.585,10.231495,-11.093902,28.166985,5.727047,44.0,-0.071427,0.053872,-0.034745,1.0,0.008045,0.012112,-0.033487,-0.010651,0.001488,-0.025361,0.199878,38,0.012911,0.056495,-0.483720,18786.0,-0.023351,-0.040491,-0.063266,,,,,,,,,
306669,74383L10,1778784,2019-09-30,0001104659-19-061274,"Provident Bancorp, Inc. /MD/",25327.0,6036.0,2019-11-08,2019-09-30,20190930,201909,2019.0,3.0,MD,MD,01913,A,1553,,0.000,38.967,0.192,3.509,0.000,,,,4.956,0.000,,1078.365,1031.175,135.851,131.763,9.382,9.372,0.000,0.000,33.864,85.864,24.0300,27.99,3.509,2.531,9.373224,-11.216859,31.611149,10.112169,39.0,-0.154998,0.019580,0.003403,0.0,0.000948,0.000516,0.006784,0.001915,-0.014683,0.005118,,60,0.000528,0.000593,0.694883,5352.0,0.187937,-0.088997,-0.106560,,,,,,,,,
306670,G8136L10,1779474,2019-09-30,0001140361-19-020475,Silver Spike Acquisition Corp.,35489.0,9995.0,2019-11-13,2019-09-30,20190930,201909,2019.0,3.0,,E9,10022,A,0,1.299,,0.987,0.000,0.553,0.000,0.076,0.000,0.000,0.134,,,251.986,0.315,243.160,0.020,31.250,31.250,0.000,0.069,0.000,0.000,10.0900,,0.553,,9.213037,-7.480551,41.492120,2.792739,44.0,0.281796,0.082823,1.755556,0.0,,,,,,,0.000000,99,,,-0.348554,,,,,,,,,,,,,
306671,03836J10,1781983,2019-09-30,0001104659-19-064239,"Aprea Therapeutics, Inc.",35648.0,2836.0,2019-11-14,2019-09-30,20190930,201909,2019.0,3.0,DE,DE,02116,A,0,56.745,0.000,52.334,0.004,-6.249,0.000,9.584,0.000,0.000,7.213,5.621,4.910,57.336,62.215,-70.796,-61.852,19.877,1.182,0.227,0.112,0.359,0.182,,,-6.249,-8.738,10.880102,-19.224976,25.438729,2.636138,45.0,-0.346108,0.091127,-0.100442,1.0,0.040006,,-0.103078,,,,,28,0.000000,0.000000,-0.339588,,,,,,,,,,,,,


In [12]:
####################### Modify data type in crsp_comp_edgar
########### Define a function that changes pandas series data type to string
def columns_to_str (df, colnames):
    for col in colnames:
        df[col] = df[col].astype(str)
    return df

########### Apply columns_to_str to various identification variables
crsp_comp_edgar = columns_to_str(crsp_comp_edgar, ['cik', 'gvkey', 'fyearq', 'fqtr'])

########## Convert date variables to date format
crsp_comp_edgar['date_comp'] = pd.to_datetime(crsp_comp_edgar['date_comp'], format='%Y%m%d')
# crsp_comp_edgar['ipodate'] = pd.to_datetime(crsp_comp_edgar['ipodate'], format='%Y%m%d')

########## Convert SIC variables to integer
# print('number of observations dropped because of missing SIC ' + str(crsp_comp_edgar[crsp_comp_edgar['SIC'].isnull()].shape[0]))
# crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['SIC'].notnull()]
crsp_comp_edgar['SIC'] = crsp_comp_edgar['SIC'].astype(int)

########### Inspect column data types
# print(crsp_comp_edgar.dtypes)

In [13]:
########################################################################################
############################### Variable Creation ######################################
########################################################################################

######## NEG: An indicator variable takes the value of 1 when market-adjusted stock return (RET) is negative and is 0 otherwise
crsp_comp_edgar['NEG'] = 0 
crsp_comp_edgar.loc[crsp_comp_edgar['RET'] < 0, 'NEG'] = 1

############################## Control Variables #######################################
######## Size: Firm size, defined as the natural logarithm of market value of equity [at the beginning of the quarter] \
######## defined as [beginning-of-quarter] common share price (Compustat data item prccq) \
######## times [beginning-of-quarter] common shares outstanding (Compustat data item cshoq)
crsp_comp_edgar['SIZE'] = np.log(crsp_comp_edgar['lag_prccq']*crsp_comp_edgar['lag_cshoq'])

######## MTB: Market-to-book ratio, defined as [beginning-of-quarter] market value of equity \
######## defined as common share price (Compustat data item prccq) times common shares outstanding (Compustat data item cshoq) \
######## divided by [beginning-of-quarter] book value of equity (Compustat data item ceqq) 
crsp_comp_edgar['MTB'] = (crsp_comp_edgar['lag_prccq']*crsp_comp_edgar['lag_cshoq'])/crsp_comp_edgar['lag_ceqq']

######## LEV: Leverage, defined as [beginning-of-quarter] short term debt plus [beginning-of-quarter] long term debt \
######## (Compustat data item dlcq + Compustat data item dlttq) scaled by [beginning-of-quarter] total assets (Compustat data item atq)
crsp_comp_edgar['LEV'] = (crsp_comp_edgar['lag_dlcq'] + crsp_comp_edgar['lag_dlttq'])/crsp_comp_edgar['lag_atq']

######## AGE: log(1 + age from the first year the firm entered the CRSP dataset)
crsp_comp_edgar['AGE'] = np.log(1 + crsp_comp_edgar['age'])

In [14]:
crsp_comp_edgar[crsp_comp_edgar['TLAG'] == crsp_comp_edgar['TLAG'].max()]

Unnamed: 0,cusip,cik,rp,accnum,name,gvkey,SIC,fd,date_crsp,date_comp,cquarter,fyearq,fqtr,incorp,state,addzip,costat,age,actq,aqcy,cheq,dpq,ibq,intanq,lctq,revtq,txditcq,xsgaq,sstky,xrdq,atq,lag_atq,ceqq,lag_ceqq,cshoq,lag_cshoq,dlcq,lag_dlcq,dlttq,lag_dlttq,prccq,lag_prccq,ibq.1,lag_ibq,NW,TONE,TONE_GI,TONE_HE,TLAG,RET,STD_RET,EARN,LOSS,DEARN,STD_EARN,CFO,lag1_CFO,lag2_CFO,lag3_CFO,PPE,SIC2,SG,LAG_SG,SKEW_RET,TURNOVER,LAG1_RET,LAG2_RET,LAG3_RET,LAG1_NW,LAG2_NW,LAG3_NW,LAG1_TONE,LAG2_TONE,LAG3_TONE,LAG1_TLAG,LAG2_TLAG,LAG3_TLAG,NEG,SIZE,MTB,LEV,AGE
12687,12590210,25354,1996-04-27,0001140361-07-012753,CPI CORP,2555.0,7200,2007-06-21,1996-04-30,1996-04-30,199604,1996.0,1.0,DE,DE,63103,I,4991,71.409,0.0,5.565,9.441,-2.12,,67.453,104.668,2.258,69.071,0.867,,299.77,300.488,171.242,174.168,13.914,13.867,16.1,7.875,54.824,54.804,17.375,14.75,-2.12,12.261,7.372118,-8.805031,19.496855,3.144654,4072.0,0.125765,0.01386,-0.007055,1.0,-0.047859,0.017487,-0.180367,0.108983,0.017021,0.046292,1.129682,72,-0.151376,-0.053613,0.378317,25605.0,-0.246276,-0.178387,0.184839,7.70886,10.600901,7.571988,-1.796138,-14.687578,-2.059732,40.0,41.0,41.0,0,5.320755,1.174373,0.208591,8.515592


In [15]:
########################################################################################
############################### Variable Screening #####################################
########################################################################################

############## Drop financial and utility firms (SIC codes between 6000 and 6999 and between 4900 and 4999, respectively)
del_fin = crsp_comp_edgar.loc[(crsp_comp_edgar['SIC'] >= 6000) & (crsp_comp_edgar['SIC'] <= 6999)].shape[0]
crsp_comp_edgar = crsp_comp_edgar.loc[(crsp_comp_edgar['SIC'] < 6000) | (crsp_comp_edgar['SIC'] > 6999)] # financial
del_ut = crsp_comp_edgar.loc[(crsp_comp_edgar['SIC'] >= 4900) & (crsp_comp_edgar['SIC'] <= 4999)].shape[0]
crsp_comp_edgar = crsp_comp_edgar.loc[(crsp_comp_edgar['SIC'] < 4900) | (crsp_comp_edgar['SIC'] > 4999)] # utility
print('number of firm-quarters from utility and financial firms: ' + str(del_fin + del_ut))

############## Drop files (firm-quarter) that have missing SIZE, MTB, LEV, or with non-positive total assets or book value of equity, \
############## or with [beginning-of-quarter] common share price less than $1
del_size = crsp_comp_edgar[crsp_comp_edgar['SIZE'].isnull()].shape[0]
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['SIZE'].isnull() == False]
del_mtb = crsp_comp_edgar[crsp_comp_edgar['MTB'].isnull()].shape[0]
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['MTB'].isnull() == False]
del_lev = crsp_comp_edgar[crsp_comp_edgar['LEV'].isnull()].shape[0]
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['LEV'].isnull() == False]
del_atq = crsp_comp_edgar.loc[(crsp_comp_edgar['atq'] <= 0) | (crsp_comp_edgar['atq'].isnull())].shape[0]
crsp_comp_edgar = crsp_comp_edgar.loc[crsp_comp_edgar['atq'] > 0]
del_ceqq = crsp_comp_edgar.loc[(crsp_comp_edgar['ceqq'] <= 0) | (crsp_comp_edgar['ceqq'].isnull())].shape[0]
crsp_comp_edgar = crsp_comp_edgar.loc[crsp_comp_edgar['ceqq'] > 0]
del_prccq = crsp_comp_edgar.loc[(crsp_comp_edgar['lag_prccq'] < 1) | (crsp_comp_edgar['lag_prccq'].isnull())].shape[0]
crsp_comp_edgar = crsp_comp_edgar.loc[crsp_comp_edgar['lag_prccq'] >= 1]
print('number of firm-quarters with missing SIZE, MTB, LEV or non-positive total assets or non-positive book value of equity, or lag_prcc < 1: ' \
      + str(del_size + del_mtb + del_lev + del_atq + del_ceqq + del_prccq))

## Change SIC back to str
crsp_comp_edgar['SIC'] = crsp_comp_edgar['SIC'].astype(str)

########## Drop files (firm-quarter) that contain number of words less than 1% threshold
nwq01 = crsp_comp_edgar['nw'].quantile(.01)
print('number of words, 1% quantile: ' + str(nwq01))
del_word01 = crsp_comp_edgar.loc[crsp_comp_edgar['nw'] < nwq01].shape[0]
print('number of files that contain total words less than 1% threshold: ' + str(del_word01))
crsp_comp_edgar = crsp_comp_edgar.loc[crsp_comp_edgar['nw'] >= nwq01]

########## Drop files (firm-quarter) that contain negative TLAG
# Rationale to drop negative TLAG: By construction, filings with filing date prior to news release date cannot be addressing the news. 
# ANTICIPATION is not purpose of the paper.
del_TLAG0 = crsp_comp_edgar[crsp_comp_edgar['TLAG'] < 0].shape[0]
# print('number of files that contain negative TLAG: ' + str(del_TLAG0))
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['TLAG'] >= 0]

########## Drop files (firm-quarter) that contain larger than 99% TLAG
tlagq99 = crsp_comp_edgar['TLAG'].quantile(.99)
print('TLAG 99% quantile: ' + str(tlagq99))
del_TLAG99 = crsp_comp_edgar.loc[crsp_comp_edgar['TLAG'] > tlagq99].shape[0]
print('number of files that contain negative or larger than 99% TLAG: ' + str(del_TLAG99 + del_TLAG0))
crsp_comp_edgar = crsp_comp_edgar.loc[crsp_comp_edgar['TLAG'] <= tlagq99]

############## Inspect sample size after variable screening
print('Number of firm-quarters after variable screening: ' + str(crsp_comp_edgar.shape[0]))

############## Winsorize SIZE, MTB, LEV
###### Define a function that winsorize a variable at 1% and 99% 
def winsorize (df, colnames):
    for col in colnames:
        varq01 = df[col].quantile(.01)
        varq99 = df[col].quantile(.99)
        df[col] = df[col].clip(varq01, varq99)
    return df

############## Drop infinite MTB and winsorize 
crsp_comp_edgar = crsp_comp_edgar[crsp_comp_edgar['MTB'] != float('inf')]
# crsp_comp_edgar = winsorize(crsp_comp_edgar, ['SIZE', 'MTB', 'LEV'])

############## Save merged crsp_comp_edgar to csv file
crsp_comp_edgar.to_csv('..\\filings\\crsp_comp_edgar_' + obj_type + '.csv', index = 0)

number of firm-quarters from utility and financial firms: 82612
number of firm-quarters with missing SIZE, MTB, LEV or non-positive total assets or non-positive book value of equity, or lag_prcc < 1: 26450


KeyError: 'nw'

In [17]:
########################################################################################
############### Table 2: Summary Statistics and Correlation Matrix #####################
########################################################################################

############# Table 2 Panel A: Summary statistics for selected variables
######### Variable groups:
# 1st line: textual variables, generally consistent with LM's summary statistics
# 2nd line: fundamental variables (main)
# 3rd line: abtone
selected_vars = crsp_comp_edgar[['NW','nw', 'TONE','TLAG', 'READ', \
                                   'RET', 'NEG', 'SIZE', 'MTB', 'LEV' \
                                 # 'AGE', 'age', 'EARN', 'STD_RET', 'STD_EARN', 'LOSS', 'DEARN' \
                                 ]]

T2PA = selected_vars.describe().transpose() 

############# Summary statistics for all raw and processed variables
full_summary = crsp_comp_edgar.describe().transpose()

############# Save T2PA
table_path = '..\\output\\Tables.xlsx'
if os.path.exists(table_path) == True:
    book = load_workbook(table_path)
    writer = pd.ExcelWriter(table_path, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    T2PA.to_excel(writer, sheet_name='T2PA_raw', float_format="%.4f")

    writer.save()
    writer.close()
    
else:
    T2PA.to_excel(table_path, sheet_name='T2PA_raw', float_format="%.4f")

T2PA

PermissionError: [Errno 13] Permission denied: '..\\output\\Tables.xlsx'

In [18]:
# full_summary

In [19]:
############# Table 1 Panel B: Correlation matrix for selected variables
######### pearson correlation
T2PB_pearson = selected_vars.loc[:, selected_vars.columns != 'nw'].corr(method='pearson')

# T1PB_pearson

In [20]:
######### spearman correlation
T2PB_spearman = selected_vars.loc[:, selected_vars.columns != 'nw'].corr(method='spearman')

# T1PB_spearman

In [21]:
######### Combine two correlation matrices. right-up matrix: pearson; left-down matrix: spearman 
for row in list(range(0, len(T2PB_spearman.index))):
    T2PB_spearman.iloc[row, row+1:] = T2PB_pearson.iloc[row, row+1:]
    
##### Save T2PB
table_path = '..\\output\\Tables.xlsx'
if os.path.exists(table_path) == True:
    book = load_workbook(table_path)
    writer = pd.ExcelWriter(table_path, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    T2PB_spearman.to_excel(writer, sheet_name='T2PB_raw', float_format="%.4f")

    writer.save()
    writer.close()
    
else:
    T2PB_spearman.to_excel(table_path, sheet_name='T2PB_raw', float_format="%.4f")

T2PB_spearman

Unnamed: 0,NW,TONE,TLAG,READ,RET,NEG,SIZE,MTB,LEV
NW,1.0,-0.43695,-0.218368,-0.066191,-0.01778,0.006934,0.325124,0.001852,0.064511
TONE,-0.448985,1.0,0.004051,0.063639,0.024358,-0.019707,-0.062134,0.000228,0.062558
TLAG,-0.288556,0.007148,1.0,0.036475,-0.030902,0.045327,-0.408406,0.002707,-0.025436
READ,-0.253252,0.150973,0.130605,1.0,-0.003532,0.004504,-0.018003,-0.001895,0.064338
RET,-0.014593,0.033008,-0.051923,-0.013885,1.0,-0.66473,-0.031996,-0.001128,-0.00517
NEG,0.008231,-0.022716,0.044928,0.017293,-0.865965,1.0,-0.025118,0.002079,-0.002787
SIZE,0.337262,-0.048299,-0.423067,-0.112303,0.015577,-0.027368,1.0,0.002434,0.114442
MTB,0.112102,0.017449,-0.090028,-0.047888,-0.045844,0.026336,0.39809,1.0,0.000367
LEV,0.053513,0.063628,-0.03399,0.07084,0.002292,-0.004502,0.15088,-0.075327,1.0
