In [1]:
############### Import packages
import os, numpy as np, pandas as pd, time, glob, re, math, statsmodels.api as sm, patsy as ps
from tqdm import tqdm
from time import process_time
from datetime import datetime
from datetime import date
from openpyxl import load_workbook
from patsy import dmatrices

##########################################################
##################### parameter ##########################
##########################################################
obj_type = '10-Q'
data_type_text = 'text_data'
data_type_id = 'id_data'

############### Set working directory to parent directory
if os.getcwd() != 'F:\\github\\narrative_conservatism\\code':
    os.chdir('F:\\github\\narrative_conservatism\\code')
    
############### Set pandas column printing constraint
pd.set_option('display.max_columns', None)

In [4]:
########### Read CRSP raw data files
crsp_cols = ['date', 'PERMCO', 'CUSIP', 'RET', 'vwretd']
crsp = pd.read_csv('..\\filings\\crsp.csv', usecols = crsp_cols)
crsp.columns = ['date', 'permco', 'cusip', 'ret', 'vwretd']

### Delete the two 'day digits' of crsp filings' data date and create the date_key
crsp['date_key'] = crsp['date'].astype(str).str[:-2]

### Drop CRSP raw rows that contains non-numeric returns ('B' and 'C'), fill NA with 0
print('number of monthly data in CRSP: ' + str(len(crsp.index)))
crsp = crsp[(crsp['ret'] != 'B') & (crsp['ret'] != 'C')]
crsp['ret'] = crsp['ret'].fillna(0)
print('number of monthly data in CRSP that contains only numeric returns: ' + str(len(crsp.index)))

### Mutate adjusted monthly returns and delete ret and vwretd
crsp = crsp.assign(adj_ret_m = crsp['ret'].astype(float) - crsp['vwretd'].astype(float))
crsp = crsp.drop(columns=['ret', 'vwretd'])

### Mutate age from the first entry date of the firm in CRSP
crsp['date'] = crsp['date'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))
birth = crsp.groupby(['permco'])['date'].min()
crsp = pd.merge(crsp, birth, on = ['permco'], how='left', validate = 'm:1')
crsp = crsp.rename(columns={'date_x': 'date', 'date_y': 'birth'})
crsp['age'] = (crsp['date'] - crsp['birth']).dt.days

crsp

number of monthly data in CRSP: 4606907
number of monthly data in CRSP that contains only numeric returns: 4511394


Unnamed: 0,date,permco,cusip,date_key,adj_ret_m,birth,age
0,1985-12-31,7952,68391610,198512,-0.043061,1985-12-31,0
1,1986-02-28,7952,68391610,198602,-0.329643,1985-12-31,59
2,1986-03-31,7952,68391610,198603,0.311500,1985-12-31,90
3,1986-04-30,7952,68391610,198604,-0.090689,1985-12-31,120
4,1986-05-30,7952,68391610,198605,-0.273500,1985-12-31,150
...,...,...,...,...,...,...,...
4511389,2019-08-30,53453,88160R10,201908,-0.045951,2010-05-28,3381
4511390,2019-09-30,53453,88160R10,201909,0.051615,2010-05-28,3412
4511391,2019-10-31,53453,88160R10,201910,0.288162,2010-05-28,3443
4511392,2019-11-29,53453,88160R10,201911,0.012724,2010-05-28,3472


In [2]:
########################################################################################
############ Merge COMPUSTAT quarterly data with CRSP monthly data #####################
########################################################################################

########### Read compustat raw data files
comp_cols = ['gvkey', 'datadate', 'fyearq', 'fqtr', 'fyr', 'cusip', 'conm', 'actq', \
             'atq', 'ceqq', 'cheq', 'cshoq', 'dlcq', 'dlttq', 'dpq', 'ibq', 'intanq', 'lctq', 'ppegtq', 'rectq', \
             'revtq', 'txditcq', 'xsgaq', 'iby', 'oancfy', 'xidocy', 'exchg', 'cik', 'costat', 'prccq', 'addzip', 'incorp', 'sic', 'ipodate']
comp = pd.read_csv('..\\filings\\compustat.csv', usecols = comp_cols)

### Reorder compustat column
# 1st line: merge keys
# 2nd line: extra id info
# 3rd line: financial data
# 4th line: financial data (CONT.)
comp = comp[['cusip', 'cik', 'datadate', \
'gvkey', 'conm', 'sic', 'incorp', 'addzip', 'fyearq', 'fqtr', 'fyr', 'ipodate', 'costat', 'exchg', \
'actq', 'atq', 'ceqq', 'cheq', 'cshoq', 'dlcq', 'dlttq', 'dpq', 'ibq', 'intanq', 'lctq', 'revtq', 'txditcq', 'xsgaq', 'oancfy', 'prccq', \
'iby', 'xidocy', 'rectq', 'ppegtq']]

print('number of quarterly filings in Compustat: ' + str(comp.shape[0]))

### Creat lagged variables in compustat raw data
comp['lag_prccq'] = comp.groupby(['gvkey'])['prccq'].shift(1)
comp['lag_cshoq'] = comp.groupby(['gvkey'])['cshoq'].shift(1)
comp['lag_ceqq'] = comp.groupby(['gvkey'])['ceqq'].shift(1)
comp['lag_dlcq'] = comp.groupby(['gvkey'])['dlcq'].shift(1)
comp['lag_dlttq'] = comp.groupby(['gvkey'])['dlttq'].shift(1)
comp['lag_atq'] = comp.groupby(['gvkey'])['atq'].shift(1)
comp['lag_ibq'] = comp.groupby(['gvkey'])['ibq'].shift(1)
comp['lag_revtq'] = comp.groupby(['gvkey'])['revtq'].shift(1)
comp['lag_rectq'] = comp.groupby(['gvkey'])['rectq'].shift(1)
comp['lag_oancfy'] = comp.groupby(['gvkey'])['oancfy'].shift(1)
comp['lag_xidocy'] = comp.groupby(['gvkey'])['xidocy'].shift(1)

####################### Create ABTONE variables for Huang et al. 2014
### EARN: earnings before extraordinary items (Compustat data item ibq) scaled by lagged total assets (Compustat data item atq)
comp['EARN'] = comp['ibq']/comp['lag_atq']
### LOSS, an indicator variable set to 1 when EARN is negative, and is 0 otherwise
comp['LOSS'] = 0 
comp.loc[comp['EARN'] < 0, 'LOSS'] = 1
### DEARN: change in earnings before extraordinary item scaled by beginning total assets (Compustat data item atq)
comp['DEARN'] = (comp['ibq'] - comp['lag_ibq'])/comp['lag_atq']
### STD_EARN: standard deviation of EARN calculated over the last five quarters
comp['STD_EARN'] = comp['EARN'].rolling(5).std()
### CFO: quarterly operating cash flows (Compustat data item oancfy) scaled by beginning total assets (Compustat data item atq);
comp['CFO'] = (comp['oancfy'] - comp['lag_oancfy'])/comp['lag_atq']
### TACC: total accruals, defined as quarterly income before extraordinary items (Compustat data item ibq) minus \
### the difference between quarterly operating cash flows (Compustat data item oancfy) and \
### quarterly extraordinary items and discontinued operations included in CFO (Compustat data item xidocy);
comp['TACC'] = comp['ibq'] - ((comp['oancfy']-comp['lag_oancfy']) - (comp['xidocy'] - comp['lag_xidocy']))
### TA: total assets, scaled by lagged total assets (Compustat data item atq);
comp['TA'] = comp['atq']/comp['lag_atq']
comp['LAG_TA'] = comp.groupby(['gvkey'])['TA'].shift(1)
comp['LAG_TA_REV'] = 1/comp['LAG_TA']
### DSALES: quarterly change in revenue (Compustat data item revtq), scaled by lagged total assets (Compustat data item atq);
comp['DSALES'] = (comp['revtq'] - comp['lag_revtq'])/comp['lag_atq']
### DAR: quarterly change in accounts receivable (Compustat data item rectq), scaled by lagged total assets (Compustat data item atq);
comp['DAR'] = (comp['rectq'] - comp['lag_rectq'])/comp['lag_atq']
### DSAR = DSALES - DAR
comp['DSAR'] = comp['DSALES'] - comp['DAR']
### PPE: gross property, plant, and equipment (Compustat data item ppegtq), scaled by lagged total assets (Compustat data item atq);
comp['PPE'] = comp['ppegtq']/comp['lag_atq']

### leap1_EARN, leap2_EARN, leap3_EARN and leap1_CFO, leap2_CFO, leap3_CFO for Huang et al. 2014 TABLE 4 replication
comp['leap1_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-1)
comp['leap2_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-2)
comp['leap3_EARN'] = comp.groupby(['gvkey'])['EARN'].shift(-3)

comp['leap1_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-1)
comp['leap2_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-2)
comp['leap3_CFO'] = comp.groupby(['gvkey'])['CFO'].shift(-3)

### Delete the 9th digit of compustat filings' cusip, and filter filings that have 8-digits cusip after deletion 
comp['cusip'] = comp['cusip'].astype(str).str[:-1]
comp = comp.loc[comp['cusip'].str.len() == 8]
print('number of quarterly filings in Compustat after deleting non-9-digits cusips: ' + str(comp.shape[0]))

### Delete the two 'day digits' of compustat filings' data date and create the date_key
comp['date_key'] = comp['datadate'].astype(str).str[:-2]
### Create first 2-digits SIC:SIC2
comp['SIC2'] = comp['sic'].astype(str).str[:-2]

comp.shape

number of quarterly filings in Compustat: 1142966
number of quarterly filings in Compustat after deleting non-9-digits cusips: 1142561


(1142561, 66)

In [5]:
##################### Left merge CRSP and Compstat, key not unique in Compustat
crsp_comp = pd.merge(crsp, comp, on = ['cusip', 'date_key'], how='left', validate = '1:m')
crsp_comp

Unnamed: 0,date,permco,cusip,date_key,adj_ret_m,birth,age,cik,datadate,gvkey,conm,sic,incorp,addzip,fyearq,fqtr,fyr,ipodate,costat,exchg,actq,atq,ceqq,cheq,cshoq,dlcq,dlttq,dpq,ibq,intanq,lctq,revtq,txditcq,xsgaq,oancfy,prccq,iby,xidocy,rectq,ppegtq,lag_prccq,lag_cshoq,lag_ceqq,lag_dlcq,lag_dlttq,lag_atq,lag_ibq,lag_revtq,lag_rectq,lag_oancfy,lag_xidocy,EARN,LOSS,DEARN,STD_EARN,CFO,TACC,TA,LAG_TA,LAG_TA_REV,DSALES,DAR,DSAR,PPE,leap1_EARN,leap2_EARN,leap3_EARN,leap1_CFO,leap2_CFO,leap3_CFO,SIC2
0,1985-12-31,7952,68391610,198512,-0.043061,1985-12-31,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1986-02-28,7952,68391610,198602,-0.329643,1985-12-31,59,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1986-03-31,7952,68391610,198603,0.311500,1985-12-31,90,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1986-04-30,7952,68391610,198604,-0.090689,1985-12-31,120,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1986-05-30,7952,68391610,198605,-0.273500,1985-12-31,150,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4512445,2019-08-30,53453,88160R10,201908,-0.045951,2010-05-28,3381,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4512446,2019-09-30,53453,88160R10,201909,0.051615,2010-05-28,3412,1318605.0,20190930.0,184996.0,TESLA INC,3711.0,DE,94304,2019.0,3.0,12.0,20100629.0,A,14.0,10940.0,32795.0,6040.0,5571.0,180.0,2253.0,12383.0,530.851,143.469,537.0,10146.0,6302.86,0.0,929.738,980.0,240.87,-967.0,0.0,1128.0,24453.0,223.46,179.118,5715.393,2011.177,12309.747,31872.597,-408.334,6349.676,1147.1,224.0,0.0,0.004501,0.0,0.017313,0.014702,0.023719,-612.531,1.028940,1.10238,0.907128,-0.001469,-0.000599,-0.00087,0.767211,0.003202,,,0.043452,,,37
4512447,2019-10-31,53453,88160R10,201910,0.288162,2010-05-28,3443,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4512448,2019-11-29,53453,88160R10,201911,0.012724,2010-05-28,3472,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
########## Aggregate the monthly returns in CRSP to quarterly returns by summing up 3-months returns in a quarter
########## STD_RET: Standard deviation of RET over all months in the quarter;
adj_ret = list()
std_ret = list()
gvkey = crsp_comp['gvkey'].values.tolist()
adj_ret_m = crsp_comp['adj_ret_m'].values.tolist()

for index, value in enumerate(gvkey):
    ### requires a quarter-end flag to be non-zero, and the quarter-end flag of previous two months to be zero (avoid time-slot mismatch)
    if math.isnan(value) == False and math.isnan(gvkey[index-1]) == True and math.isnan(gvkey[index-2]) == True:
        adj_ret_i = adj_ret_m[index] + adj_ret_m[index-1] + adj_ret_m[index-2]
        std_ret_i = np.std([adj_ret_m[index], adj_ret_m[index-1], adj_ret_m[index-2]])
    else:
        adj_ret_i = float('NaN')
        std_ret_i = float('NaN')
    adj_ret.append(adj_ret_i)
    std_ret.append(std_ret_i)

crsp_comp = crsp_comp.assign(RET = adj_ret)
crsp_comp = crsp_comp.assign(STD_RET = std_ret)

########## Delete adj_ret_m column, and any rows that contains missing values of gvkey or RET
# crsp_comp = crsp_comp.drop(columns=['adj_ret_m'])
crsp_comp = crsp_comp[crsp_comp['gvkey'].notnull()]
crsp_comp = crsp_comp[crsp_comp['RET'].notnull()]

##### Delete last two digit (.0) of datadate
crsp_comp['datadate'] = crsp_comp['datadate'].astype(str).str[:-2]

############## Save merged CRSP_COMP dataframe into local file crsp_comp_10-Q.csv
crsp_comp.to_csv('..\\filings\\crsp_comp_' + obj_type + '.csv', index = 0)

In [7]:
################# Inspect crsp_comp 
print('number of quarterly fillings after merging CRSP and Compustat: ' + str(crsp_comp.shape[0]))
crsp_comp

number of quarterly fillings after merging CRSP and Compustat: 740697


Unnamed: 0,date,permco,cusip,date_key,adj_ret_m,birth,age,cik,datadate,gvkey,conm,sic,incorp,addzip,fyearq,fqtr,fyr,ipodate,costat,exchg,actq,atq,ceqq,cheq,cshoq,dlcq,dlttq,dpq,ibq,intanq,lctq,revtq,txditcq,xsgaq,oancfy,prccq,iby,xidocy,rectq,ppegtq,lag_prccq,lag_cshoq,lag_ceqq,lag_dlcq,lag_dlttq,lag_atq,lag_ibq,lag_revtq,lag_rectq,lag_oancfy,lag_xidocy,EARN,LOSS,DEARN,STD_EARN,CFO,TACC,TA,LAG_TA,LAG_TA_REV,DSALES,DAR,DSAR,PPE,leap1_EARN,leap2_EARN,leap3_EARN,leap1_CFO,leap2_CFO,leap3_CFO,SIC2,RET,STD_RET
80,1991-03-28,7953,36720410,199103,-0.040461,1985-12-31,1913,43350.0,19910331,12994.0,GAS NATURAL INC,4924.0,OH,44114,1991.0,3.0,6.0,,I,12.0,5.374,21.068,8.054,1.164,1.054,0.485,6.960,0.154,0.744,0.000,3.684,8.452,2.363,,,9.50,,,3.841,25.008,,,,,,,,,,,,,0.0,,,,,,,,,,,,-0.001282,-0.016276,0.032164,,,,49,-0.139241,0.011776
83,1991-06-28,7953,36720410,199106,0.122510,1985-12-31,2005,43350.0,19910630,12994.0,GAS NATURAL INC,4924.0,OH,44114,1991.0,4.0,6.0,,I,12.0,3.869,19.599,7.982,1.595,1.073,0.210,6.965,0.163,-0.027,0.000,2.215,4.499,2.429,,,10.50,,,1.882,25.224,9.50,1.054,8.054,0.485,6.960,21.068,0.744,8.452,3.841,,,-0.001282,1.0,-0.036596,,,,0.930273,,,-0.187631,-0.092985,-0.094646,1.197266,-0.016276,0.032164,0.027662,,,,49,0.117936,0.066681
86,1991-09-30,7953,36720410,199109,0.024018,1985-12-31,2099,43350.0,19910930,12994.0,GAS NATURAL INC,4924.0,OH,44114,1992.0,1.0,6.0,,I,12.0,2.823,19.494,7.509,0.566,1.075,0.630,6.965,0.224,-0.319,0.000,2.512,2.771,2.478,,,11.50,-0.319,,1.384,26.340,10.50,1.073,7.982,0.210,6.965,19.599,-0.027,4.499,1.882,,,-0.016276,1.0,-0.014899,,,,0.994643,0.930273,1.074953,-0.088168,-0.025409,-0.062758,1.343946,0.032164,0.027662,-0.004938,,,,49,0.050494,0.078342
89,1991-12-31,7953,36720410,199112,-0.113558,1985-12-31,2191,43350.0,19911231,12994.0,GAS NATURAL INC,4924.0,OH,44114,1992.0,2.0,6.0,,I,12.0,6.188,22.992,7.975,1.170,1.075,2.164,6.760,0.234,0.627,0.000,5.379,8.333,2.532,,,14.50,,,4.046,26.718,11.50,1.075,7.509,0.630,6.965,19.494,-0.319,2.771,1.384,,,0.032164,0.0,0.048528,,,,1.179440,0.994643,1.005386,0.285319,0.136555,0.148764,1.370576,0.027662,-0.004938,-0.020273,,,,49,0.170952,0.122995
92,1992-03-31,7953,36720410,199203,0.105518,1985-12-31,2282,43350.0,19920331,12994.0,GAS NATURAL INC,4924.0,OH,44114,1992.0,3.0,6.0,,I,12.0,5.904,22.683,8.450,1.341,1.075,1.347,6.735,0.237,0.636,0.000,4.943,8.124,2.550,,,11.75,,,3.226,26.884,14.50,1.075,7.975,2.164,6.760,22.992,0.627,8.333,4.046,,,0.027662,0.0,0.000391,,,,0.986561,1.179440,0.847860,-0.009090,-0.035665,0.026574,1.169276,-0.004938,-0.020273,0.029008,,,,49,-0.158378,0.130189
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4512437,2018-12-31,53453,88160R10,201812,0.039362,2010-05-28,3139,1318605.0,20181231,184996.0,TESLA INC,3711.0,DE,94304,2018.0,4.0,12.0,20100629.0,A,14.0,8306.308,29739.614,4923.243,3878.169,172.603,2629.460,9454.055,496.737,139.483,350.651,9992.136,7225.873,0.000,1023.749,2097.802,332.80,-976.091,0.0,949.022,23343.447,264.77,171.578,4508.838,2106.538,9726.589,29262.713,311.516,6824.413,1155.001,863.241,0.0,0.004767,0.0,-0.005879,0.018233,0.042189,-1095.078,1.016297,1.048467,0.953773,0.013719,-0.007039,0.020758,0.797720,-0.023609,-0.014123,0.004501,-0.092046,0.029870,0.023719,37,0.407883,0.150163
4512440,2019-03-29,53453,88160R10,201903,-0.138087,2010-05-28,3227,1318605.0,20190331,184996.0,TESLA INC,3711.0,DE,94304,2019.0,1.0,12.0,20100629.0,A,14.0,7677.822,28912.524,4605.596,2329.119,173.682,1914.073,10834.402,467.577,-702.135,347.880,9242.800,4541.464,0.000,1044.103,-639.606,279.86,-702.135,0.0,1046.945,23233.540,332.80,172.603,4923.243,2629.460,9454.055,29739.614,139.483,7225.873,949.022,2097.802,0.0,-0.023609,1.0,-0.028300,0.018164,-0.092046,2035.273,0.972189,1.016297,0.983964,-0.090264,0.003293,-0.093556,0.781232,-0.014123,0.004501,0.003202,0.029870,0.023719,0.043452,37,-0.294656,0.076771
4512443,2019-06-28,53453,88160R10,201906,0.139577,2010-05-28,3318,1318605.0,20190630,184996.0,TESLA INC,3711.0,DE,94304,2019.0,2.0,12.0,20100629.0,A,14.0,10181.952,31872.597,5715.393,5082.746,179.118,2011.177,12309.747,578.572,-408.334,480.833,9588.773,6349.676,0.000,971.159,224.000,223.46,-1110.469,0.0,1147.100,23893.195,279.86,173.682,4605.596,1914.073,10834.402,28912.524,-702.135,4541.464,1046.945,-639.606,0.0,-0.014123,1.0,0.010162,0.016833,0.029870,-1271.940,1.102380,0.972189,1.028607,0.062541,0.003464,0.059077,0.826396,0.004501,0.003202,,0.023719,0.043452,,37,-0.208027,0.148010
4512446,2019-09-30,53453,88160R10,201909,0.051615,2010-05-28,3412,1318605.0,20190930,184996.0,TESLA INC,3711.0,DE,94304,2019.0,3.0,12.0,20100629.0,A,14.0,10940.000,32795.000,6040.000,5571.000,180.000,2253.000,12383.000,530.851,143.469,537.000,10146.000,6302.860,0.000,929.738,980.000,240.87,-967.000,0.0,1128.000,24453.000,223.46,179.118,5715.393,2011.177,12309.747,31872.597,-408.334,6349.676,1147.100,224.000,0.0,0.004501,0.0,0.017313,0.014702,0.023719,-612.531,1.028940,1.102380,0.907128,-0.001469,-0.000599,-0.000870,0.767211,0.003202,,,0.043452,,,37,0.075005,0.050690


In [8]:
########################################################################################
############ Concatenate and prepare merge: ID_DATA and TEXT_DATA ######################
########################################################################################

############## Define a function to concatenate all csv files with file name that matches a certain pattern into one data frame
def concatenate (indir, file_name_match):
    os.chdir(indir)
    file_list = glob.glob(file_name_match)
    df_list = list()
    colnames = pd.read_csv(file_list[0], header = None).loc[0]
    
    for filename in file_list:
        # print(filename)
        df = pd.read_csv(filename, low_memory = False)
        df_list.append(df)

    df_concat = pd.concat(df_list, axis = 0)
    df_concat.columns = colnames
    return df_concat

############## Concatenate id_data and text_data files and create two data frames
id_data = concatenate('..\\filings', data_type_id + '_'+ obj_type + '_' + '*.csv')
text_data = concatenate('..\\filings', data_type_text + '_'+ obj_type + '_' + '*.csv')
############## Save id_data dataframe into local file id_data_10-Q.csv
id_data.to_csv('..\\filings\\' + data_type_id + '_'+ obj_type + '.csv', index = 0)

############## text_data modifications #####################
############## Calculate tone : tone = (n_pos - n_negation - n_neg)/nw
text_data['tone'] = (text_data['n_pos'] - text_data['n_negation'] - text_data['n_neg'])/text_data['nw']

############## Correct modal words labels in text_data
text_data.columns = ['accnum', 'nw', 'nvocab', 'n_neg', 'n_pos', 'n_uctt', 'n_lit', 'n_cstr', \
                     'n_modal_strong', 'n_modal_moderate', 'n_modal_weak', 'n_negation', 'tone']

############## Save text_data dataframe into local file text_data_10-Q.csv
text_data.to_csv('..\\filings\\' + data_type_text + '_'+ obj_type + '.csv', index = 0)

print('Number of ' + obj_type + ' in edgar from 1993 Q1 to 2020 Q1: ' + str(len(id_data.index)))
print('Number of ' + obj_type + ' parsed and downloaded: ' + str(len(text_data.index)))

Number of 10-Q in edgar from 1993 Q1 to 2020 Q1: 594017
Number of 10-Q parsed and downloaded: 575579


In [9]:
########################################################################################
######################## Merge ID_DATA with CRSP_COMPUSTAT #############################
########################################################################################

# id_data = concatenate('..\\filings', data_type_id + '_'+ obj_type + '_' + '*.csv')

############## prepare merge: ID_DATA
id_data['fd'] = id_data['fd'].str.replace('-', '')
id_data['rp'] = id_data['rp'].str.replace('-', '')
id_data['date_key'] = id_data['rp'].astype(str).str[:-2]

In [24]:
############## Left merge ID_DATA and CRSP_COMP, key not unique in both data sets
id_crsp_comp = pd.merge(id_data, crsp_comp, on = ['cik', 'date_key'], how = 'inner', validate = 'm:m')

### Drop duplicated rows in accnum (EDGAR) and cik-rp (COMPUSTAT), and rename sic, date_key, date and datadate
id_crsp_comp = id_crsp_comp[id_crsp_comp.duplicated('accnum') == False]
id_crsp_comp = id_crsp_comp[id_crsp_comp.duplicated(subset=['cik', 'rp']) == False]
id_crsp_comp = id_crsp_comp.rename(columns={'sic_y': 'SIC', 'date_key': 'cquarter', 'date': 'date_crsp', 'datadate': 'date_comp'})

### Reorder id_crsp_comp columns
# 1st line: merge keys
# 2nd line: extra id info
# 3rd line: financial raw data (not lagged variables)
# 4th line: financial raw data (lagged variables)
# 5th line: ready-to-use regression variables
id_crsp_comp = id_crsp_comp[['cusip', 'cik', 'rp', 'accnum', \
'name', 'gvkey', 'SIC', 'fd', 'date_crsp', 'date_comp', 'cquarter', 'fyearq', 'fqtr', 'incorp', 'state', 'addzip', 'costat', 'age', \
'actq', 'cheq', 'dpq', 'ibq', 'intanq', 'lctq', 'revtq', 'txditcq', 'xsgaq', \
'atq', 'lag_atq', 'ceqq', 'lag_ceqq', 'cshoq', 'lag_cshoq', 'dlcq', 'lag_dlcq', 'dlttq', 'lag_dlttq', 'prccq', 'lag_prccq', 'ibq', 'lag_ibq', \
'RET', 'STD_RET', 'EARN', 'LOSS', 'DEARN', 'STD_EARN', 'CFO', 'leap1_EARN', 'leap2_EARN', 'leap3_EARN', 'leap1_CFO', 'leap2_CFO', 'leap3_CFO', 'TACC', 'LAG_TA_REV', 'DSAR', 'PPE', 'SIC2']]

In [25]:
################## Inspect id_crsp_comp 
print('number of observations after merging with edgar data: ' + str(id_crsp_comp.shape[0]))
id_crsp_comp

number of observations after merging with edgar data: 303034


Unnamed: 0,cusip,cik,rp,accnum,name,gvkey,SIC,fd,date_crsp,date_comp,cquarter,fyearq,fqtr,incorp,state,addzip,costat,age,actq,cheq,dpq,ibq,intanq,lctq,revtq,txditcq,xsgaq,atq,lag_atq,ceqq,lag_ceqq,cshoq,lag_cshoq,dlcq,lag_dlcq,dlttq,lag_dlttq,prccq,lag_prccq,ibq.1,lag_ibq,RET,STD_RET,EARN,LOSS,DEARN,STD_EARN,CFO,leap1_EARN,leap2_EARN,leap3_EARN,leap1_CFO,leap2_CFO,leap3_CFO,TACC,LAG_TA_REV,DSAR,PPE,SIC2
0,54626810,60512,19930630,0000060512-94-000005,LOUISIANA LAND & EXPLORATION CO,6819.0,1311.0,19930813,1993-06-30,19930630,199306,1993.0,2.0,MD,MD,70112,I,11324,193.800,64.300,27.300,5.600,,208.200,189.300,136.100,23.900,1278.000,1203.400,424.200,421.300,28.729,28.647,86.500,0.000,356.300,379.500,42.5000,45.25,5.600,2.700,-0.059108,0.054464,0.004653,0.0,0.002410,0.002744,0.031660,-0.001408,0.003730,0.003372,0.037089,0.041509,-0.075597,-32.500,1.004737,-0.000249,1.996925,13
1,88579Y10,66740,19930630,0000066740-94-000015,MINNESOTA MINING & MANUFACTURING CO,7435.0,2670.0,19930813,1993-06-30,19930630,199306,1993.0,2.0,DE,DE,55144,A,17348,6382.000,650.000,264.000,331.000,,3365.000,3540.000,0.000,893.000,12145.000,12037.000,6590.000,6710.000,216.975,218.753,829.000,657.000,679.000,635.000,108.0000,109.50,331.000,330.000,-0.014779,0.036793,0.027499,0.0,0.000083,0.001605,0.038797,0.026019,0.023387,0.025088,0.050391,0.040150,-0.135689,-136.000,0.993188,-0.000249,0.932375,26
2,08750910,11860,19930930,0000011860-94-000005,BETHLEHEM STEEL CORP /DE/,2189.0,3312.0,19931112,1993-09-30,19930930,199309,1993.0,3.0,DE,DE,18016-7699,I,24715,1074.900,232.500,69.100,30.700,,908.400,1055.300,0.000,38.400,5168.098,5463.199,580.800,958.900,91.307,91.025,100.300,88.400,728.500,650.200,14.3750,18.75,30.700,-13.600,-0.270146,0.119124,0.005619,0.0,0.008109,0.007031,0.007871,-0.046942,0.002195,0.004469,0.025212,-0.013205,0.004332,-12.300,1.004723,-0.008823,1.315529,33
3,54626810,60512,19930930,0000060512-94-000007,LOUISIANA LAND & EXPLORATION CO,6819.0,1311.0,19931110,1993-09-30,19930930,199309,1993.0,3.0,MD,MD,70112,I,11416,180.200,9.400,28.200,-1.800,,195.100,187.900,124.800,23.700,1662.300,1278.000,417.100,424.200,33.137,28.729,0.000,86.500,747.700,356.300,44.6250,42.50,-1.800,5.600,0.025969,0.094378,-0.001408,1.0,-0.005790,0.004136,0.037089,0.003730,0.003372,0.000335,0.041509,-0.075597,0.026837,-49.200,0.941628,-0.031768,2.206729,13
4,88579Y10,66740,19930930,0000066740-94-000016,MINNESOTA MINING & MANUFACTURING CO,7435.0,2670.0,19931112,1993-09-30,19930930,199309,1993.0,3.0,DE,DE,55144,A,17440,6445.000,665.000,262.000,316.000,,3404.000,3481.000,0.000,859.000,12229.000,12145.000,6600.000,6590.000,215.791,216.975,796.000,829.000,682.000,679.000,102.8750,108.00,316.000,331.000,-0.076728,0.030508,0.026019,0.0,-0.001235,0.001592,0.050391,0.023387,0.025088,0.026336,0.040150,-0.135689,0.031711,-296.000,0.991107,-0.006669,0.944257,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306668,87182910,96021,20191228,0000096021-20-000017,SYSCO CORP,10247.0,5140.0,20200205,2019-12-31,20191231,201912,2020.0,2.0,DE,DE,77077,A,17807,8661.610,539.625,185.011,383.410,4879.128,6931.968,15025.042,142.301,2218.801,19372.034,18956.575,2527.526,2454.748,508.843,510.864,897.619,160.338,8654.524,9183.272,85.5400,79.40,383.410,453.781,-0.001390,0.025118,0.020226,0.0,-0.003712,0.005675,0.030749,,,,,,,-199.480,0.947773,-0.013420,,51
306669,87840910,96699,20191228,0001171843-20-000876,TECHNICAL COMMUNICATIONS CORP,10364.0,3663.0,20200211,2019-12-31,20191231,201912,2020.0,1.0,MA,MA,01742-2892,A,13271,2.146,0.748,0.006,-0.480,0.000,0.574,0.666,0.000,0.789,2.847,2.917,1.752,2.219,1.850,1.850,0.148,0.000,0.521,0.000,5.0500,2.50,-0.480,1.032,1.266332,0.797053,-0.164553,1.0,-0.518341,0.266651,-0.162153,,,,,,,-0.007,0.705519,-0.763456,1.803565,36
306670,86737U10,96793,20191231,0001564590-20-004619,SUNLINK HEALTH SYSTEMS INC,10380.0,5912.0,20200213,2019-12-31,20191231,201912,2020.0,2.0,OH,OH,30339,A,17197,14.384,4.607,0.350,0.203,1.295,6.440,12.805,0.000,0.994,22.823,25.811,14.762,14.810,6.983,6.987,0.782,3.270,0.851,0.937,1.0700,1.13,0.203,-0.143,-0.135859,0.028797,0.007865,0.0,0.013405,0.021058,-0.013483,,,,,,,,0.946070,0.025648,0.831467,59
306671,87288520,98338,20191130,0001213900-20-000888,TSR INC,10305.0,7371.0,20200113,2019-11-29,20191130,201911,2019.0,2.0,DE,DE,11788,A,15218,10.365,2.988,0.002,0.061,0.000,5.241,15.233,0.000,2.634,11.826,12.109,6.287,6.225,1.962,1.962,0.252,0.281,0.271,0.327,3.2001,4.29,0.061,-0.663,-0.315755,0.142523,0.005038,0.0,0.059790,0.027598,-0.008671,-0.079909,,,-0.032133,,,0.166,1.035098,0.046577,0.065654,73


In [26]:
########################################################################################
###################### Merge TEXT_DATA with ID_CRSP_COMPUSTAT ##########################
########################################################################################

############## Left merge ID_CRSP_COMP and TEXT_DATA, key unique in both data sets
id_crsp_comp_text = pd.merge(id_crsp_comp, text_data, on = ['accnum'], how = 'inner', validate = '1:1')
print('number of firm-quarters of merged id_crsp_comp_text: ' + str(id_crsp_comp_text.shape[0]))

number of firm-quarters of merged id_crsp_comp_text: 303034


In [27]:
############### Inspect if firm-quarter key is unique : YES, key is unique
# print(id_crsp_comp_text[id_crsp_comp_text.duplicated('accnum')])
# print(id_crsp_comp_text[id_crsp_comp_text.duplicated(subset=['cik', 'rp'])])

In [28]:
####################### Modify data type in ID_CRSP_COMP_TEXT
########### Define a function that changes pandas series data type to string
def columns_to_str (df, colnames):
    for col in colnames:
        df[col] = df[col].astype(str)
    return df

########### Apply columns_to_str to various identification variables
id_crsp_comp_text = columns_to_str(id_crsp_comp_text, ['cik', 'gvkey', 'fyearq', 'fqtr'])

########## Convert date variables to date format
id_crsp_comp_text['fd'] = pd.to_datetime(id_crsp_comp_text['fd'], format='%Y%m%d')
id_crsp_comp_text['rp'] = pd.to_datetime(id_crsp_comp_text['rp'], format='%Y%m%d')
id_crsp_comp_text['date_comp'] = pd.to_datetime(id_crsp_comp_text['date_comp'], format='%Y%m%d')

# id_crsp_comp_text['ipodate'] = pd.to_datetime(id_crsp_comp_text['ipodate'], format='%Y%m%d')

########## Convert SIC variables to integer
id_crsp_comp_text['SIC'] = id_crsp_comp_text['SIC'].astype(int)

########### Inspect column data types
# print(id_crsp_comp_text.dtypes)

In [29]:
########################################################################################
############################### Variable Creation ######################################
########################################################################################

############################## Main Variables ##########################################
######## NW: natural log of 1 + total number of words in the document
id_crsp_comp_text['NW'] = np.log(1 + id_crsp_comp_text['nw'])

######## TONE: number of net positive words (n_pos - n_neg - n_negations) per 1000 total words
id_crsp_comp_text['TONE'] = id_crsp_comp_text['tone']*1000

######## TLAG: Time lag between the news release date (CRSP date) and document filing date (EDGAR filing date)
id_crsp_comp_text['TLAG'] = (id_crsp_comp_text['fd'] - id_crsp_comp_text['date_crsp']).dt.days

######## NEG: An indicator variable takes the value of 1 when market-adjusted stock return (RET) is negative and is 0 otherwise
id_crsp_comp_text['NEG'] = 0 
id_crsp_comp_text.loc[id_crsp_comp_text['RET'] < 0, 'NEG'] = 1

############################## Control Variables #######################################
######## Size: Firm size, defined as the natural logarithm of market value of equity [at the beginning of the quarter] \
######## defined as [beginning-of-quarter] common share price (Compustat data item prccq) \
######## times [beginning-of-quarter] common shares outstanding (Compustat data item cshoq)
id_crsp_comp_text['SIZE'] = np.log(id_crsp_comp_text['lag_prccq']*id_crsp_comp_text['lag_cshoq'])

######## MTB: Market-to-book ratio, defined as [beginning-of-quarter] market value of equity \
######## defined as common share price (Compustat data item prccq) times common shares outstanding (Compustat data item cshoq) \
######## divided by [beginning-of-quarter] book value of equity (Compustat data item ceqq) 
id_crsp_comp_text['MTB'] = (id_crsp_comp_text['lag_prccq']*id_crsp_comp_text['lag_cshoq'])/id_crsp_comp_text['lag_ceqq']

######## LEV: Leverage, defined as [beginning-of-quarter] short term debt plus [beginning-of-quarter] long term debt \
######## (Compustat data item dlcq + Compustat data item dlttq) scaled by [beginning-of-quarter] total assets (Compustat data item atq)
id_crsp_comp_text['LEV'] = (id_crsp_comp_text['lag_dlcq'] + id_crsp_comp_text['lag_dlttq'])/id_crsp_comp_text['lag_atq']

######## AGE: log(1 + age from the first year the firm entered the CRSP dataset)
id_crsp_comp_text['AGE'] = np.log(1 + id_crsp_comp_text['age'])

In [30]:
########################################################################################
############################### Variable Screening #####################################
########################################################################################

############## Drop financial and utility firms (SIC codes between 6000 and 6999 and between 4900 and 4999, respectively)
del_fin = id_crsp_comp_text.loc[(id_crsp_comp_text['SIC'] >= 6000) & (id_crsp_comp_text['SIC'] <= 6999)].shape[0]
id_crsp_comp_text = id_crsp_comp_text.loc[(id_crsp_comp_text['SIC'] < 6000) | (id_crsp_comp_text['SIC'] > 6999)] # financial
del_ut = id_crsp_comp_text.loc[(id_crsp_comp_text['SIC'] >= 4900) & (id_crsp_comp_text['SIC'] <= 4999)].shape[0]
id_crsp_comp_text = id_crsp_comp_text.loc[(id_crsp_comp_text['SIC'] < 4900) | (id_crsp_comp_text['SIC'] > 4999)] # utility
print('number of firm-quarters from utility and financial firms: ' + str(del_fin + del_ut))

############## Drop files (firm-quarter) that have missing SIZE, MTB, LEV, or with non-positive total assets or book value of equity, \
############## or with [beginning-of-quarter] common share price less than $1
del_size = id_crsp_comp_text[id_crsp_comp_text['SIZE'].isnull()].shape[0]
id_crsp_comp_text = id_crsp_comp_text[id_crsp_comp_text['SIZE'].isnull() == False]
del_mtb = id_crsp_comp_text[id_crsp_comp_text['MTB'].isnull()].shape[0]
id_crsp_comp_text = id_crsp_comp_text[id_crsp_comp_text['MTB'].isnull() == False]
del_lev = id_crsp_comp_text[id_crsp_comp_text['LEV'].isnull()].shape[0]
id_crsp_comp_text = id_crsp_comp_text[id_crsp_comp_text['LEV'].isnull() == False]
del_atq = id_crsp_comp_text.loc[(id_crsp_comp_text['atq'] <= 0) | (id_crsp_comp_text['atq'].isnull())].shape[0]
id_crsp_comp_text = id_crsp_comp_text.loc[id_crsp_comp_text['atq'] > 0]
del_ceqq = id_crsp_comp_text.loc[(id_crsp_comp_text['ceqq'] <= 0) | (id_crsp_comp_text['ceqq'].isnull())].shape[0]
id_crsp_comp_text = id_crsp_comp_text.loc[id_crsp_comp_text['ceqq'] > 0]
del_prccq = id_crsp_comp_text.loc[(id_crsp_comp_text['lag_prccq'] < 1) | (id_crsp_comp_text['lag_prccq'].isnull())].shape[0]
id_crsp_comp_text = id_crsp_comp_text.loc[id_crsp_comp_text['lag_prccq'] >= 1]
print('number of firm-quarters with missing SIZE, MTB, LEV or non-positive total assets or non-positive book value of equity, or lag_prcc < 1: ' \
      + str(del_size + del_mtb + del_lev + del_atq + del_ceqq + del_prccq))

## Change SIC back to str
id_crsp_comp_text['SIC'] = id_crsp_comp_text['SIC'].astype(str)

########## Drop files (firm-quarter) that contain number of words less than 1% threshold
nwq01 = id_crsp_comp_text['nw'].quantile(.01)
print('number of words, 1% quantile: ' + str(nwq01))
del_word01 = id_crsp_comp_text.loc[id_crsp_comp_text['nw'] < nwq01].shape[0]
print('number of files that contain total words less than 1% threshold: ' + str(del_word01))
id_crsp_comp_text = id_crsp_comp_text.loc[id_crsp_comp_text['nw'] >= nwq01]

########## Drop files (firm-quarter) that contain negative TLAG
# Rationale to drop negative TLAG: By construction, filings with filing date prior to news release date cannot be addressing the news. 
# ANTICIPATION is not purpose of the paper.
del_TLAG0 = id_crsp_comp_text[id_crsp_comp_text['TLAG'] < 0].shape[0]
# print('number of files that contain negative TLAG: ' + str(del_TLAG0))
id_crsp_comp_text = id_crsp_comp_text[id_crsp_comp_text['TLAG'] >= 0]

########## Drop files (firm-quarter) that contain larger than 99% TLAG
tlagq99 = id_crsp_comp_text['TLAG'].quantile(.99)
print('TLAG 99% quantile: ' + str(tlagq99))
del_TLAG99 = id_crsp_comp_text.loc[id_crsp_comp_text['TLAG'] > tlagq99].shape[0]
print('number of files that contain negative or larger than 99% TLAG: ' + str(del_TLAG99 + del_TLAG0))
id_crsp_comp_text = id_crsp_comp_text.loc[id_crsp_comp_text['TLAG'] <= tlagq99]

############## Inspect sample size after variable screening
print('Number of firm-quarters after variable screening: ' + str(id_crsp_comp_text.shape[0]))

############## Winsorize SIZE, MTB, LEV
###### Define a function that winsorize a variable at 1% and 99% 
def winsorize (df, colnames):
    for col in colnames:
        varq01 = df[col].quantile(.01)
        varq99 = df[col].quantile(.99)
        df[col] = df[col].clip(varq01, varq99)
    return df

id_crsp_comp_text = winsorize(id_crsp_comp_text, ['SIZE', 'MTB', 'LEV'])

############## Save merged ID_CRSP_COMP_TEXT to csv file
id_crsp_comp_text.to_csv('..\\filings\\crsp_comp_edgar_' + obj_type + '.csv', index = 0)

number of firm-quarters from utility and financial firms: 82612
number of firm-quarters with missing SIZE, MTB, LEV or non-positive total assets or non-positive book value of equity, or lag_prcc < 1: 26450
number of words, 1% quantile: 1145.0
number of files that contain total words less than 1% threshold: 1934
TLAG 99% quantile: 52.0
number of files that contain negative or larger than 99% TLAG: 1697
Number of firm-quarters after variable screening: 190341


In [18]:
########################################################################################
############### Table 1: Summary Statistics and Correlation Matrix #####################
########################################################################################

############# Table 1 Panel A: Summary statistics for selected variables
######### Variable groups:
# 1st line: textual variables, generally consistent with LM's summary statistics
# 2nd line: fundamental variables (main)
# 3rd line: abtone
selected_vars = id_crsp_comp_text[['NW','nw', 'TONE','TLAG', \
                                   'RET', 'NEG', 'SIZE', 'MTB', 'LEV' \
                                 # 'AGE', 'age', 'EARN', 'STD_RET', 'STD_EARN', 'LOSS', 'DEARN' \
                                 ]]

T1PA = selected_vars.describe().transpose() 

############# Summary statistics for all raw and processed variables
full_summary = id_crsp_comp_text.describe().transpose()

############# Save T1PA
table_path = '..\\output\\Tables.xlsx'
if os.path.exists(table_path) == True:
    book = load_workbook(table_path)
    writer = pd.ExcelWriter(table_path, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    T1PA.to_excel(writer, sheet_name='T1PA_raw', float_format="%.4f")

    writer.save()
    writer.close()
    
else:
    T1PA.to_excel(table_path, sheet_name='T1PA_raw', float_format="%.4f")

T1PA

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
NW,190341.0,8.998194,0.784043,7.044033,8.458928,9.068777,9.538924,13.490002
nw,190341.0,10887.473256,9968.794523,1145.0,4716.0,8679.0,13889.0,722159.0
TONE,190341.0,-8.540406,6.800588,-64.54289,-12.440758,-7.436261,-3.745318,22.28739
TLAG,190341.0,39.45554,6.176093,0.0,36.0,40.0,45.0,52.0
RET,190341.0,0.008255,0.285653,-1.833079,-0.131376,-0.002584,0.12766,18.312252
NEG,190341.0,0.505861,0.499967,0.0,0.0,1.0,1.0,1.0
SIZE,190341.0,6.005431,1.979078,2.001575,4.546041,5.906342,7.304254,11.206073
MTB,190341.0,3.647577,4.505002,0.288135,1.355554,2.261444,3.992795,30.900821
LEV,190341.0,0.197492,0.189621,0.0,0.009608,0.161434,0.32721,0.724242


In [19]:
# full_summary

In [20]:
############# Table 1 Panel B: Correlation matrix for selected variables
######### pearson correlation
T1PB_pearson = selected_vars.corr(method='pearson')

# T1PB_pearson

In [21]:
######### spearman correlation
T1PB_spearman = selected_vars.corr(method='spearman')

# T1PB_spearman

In [22]:
######### Combine two correlation matrices. right-up matrix: pearson; left-down matrix: spearman 
for row in list(range(0, len(T1PB_spearman.index))):
    T1PB_spearman.iloc[row, row+1:] = T1PB_pearson.iloc[row, row+1:]
    
##### Save T1PB
table_path = '..\\output\\Tables.xlsx'
if os.path.exists(table_path) == True:
    book = load_workbook(table_path)
    writer = pd.ExcelWriter(table_path, engine = 'openpyxl')
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

    T1PB_spearman.to_excel(writer, sheet_name='T1PB_raw', float_format="%.4f")

    writer.save()
    writer.close()
    
else:
    T1PB_spearman.to_excel(table_path, sheet_name='T1PB_raw', float_format="%.4f")

T1PB_spearman

Unnamed: 0,NW,nw,TONE,TLAG,RET,NEG,SIZE,MTB,LEV
NW,1.0,0.818686,-0.445295,-0.224474,-0.018596,0.007607,0.330459,0.095023,0.066745
nw,1.0,1.0,-0.377403,-0.121655,-0.013106,0.007425,0.234296,0.083641,0.069477
TONE,-0.454737,-0.454737,1.0,0.017566,0.024041,-0.019339,-0.072872,-0.028026,0.059154
TLAG,-0.295092,-0.295092,0.018932,1.0,-0.030915,0.045384,-0.409253,-0.038506,-0.026983
RET,-0.015184,-0.015184,0.032852,-0.05196,1.0,-0.664697,-0.031521,-0.018909,-0.005821
NEG,0.008848,0.008848,-0.022683,0.044987,-0.865966,1.0,-0.02546,0.012195,-0.002767
SIZE,0.342373,0.342373,-0.057625,-0.423105,0.015557,-0.027306,1.0,0.233385,0.119777
MTB,0.114767,0.114767,0.014027,-0.090102,-0.04585,0.026327,0.398114,1.0,0.056133
LEV,0.056743,0.056743,0.059015,-0.033975,0.002259,-0.004462,0.150865,-0.075261,1.0
