In [26]:
import pandas as pd
import numpy as np
from copy import copy

In [27]:
pd.set_option('mode.chained_assignment', None)

# Import the data

In [28]:
crsp_raw = pd.read_csv('CRSP stock price421.csv')

In [29]:
compustat_raw = pd.read_csv('fundamentals2010cc.csv')

In [30]:
def clean_data(df, type_dict):
    print('Cleaning date variables:')
    for v in type_dict['date_vars']:
        print(v)
        df[v] = pd.to_datetime(df[v], format = '%Y%m%d', errors = 'coerce')
        
    print('Cleaning numeric variables:')
    for v in type_dict['float_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], errors = 'coerce')
    
    print('Cleaning integer variables:')
    for v in type_dict['int_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], downcast = 'signed', errors = 'coerce')
        
    print('Final data types:')
    print(df.dtypes)
    
    return df

## Cleaning Up CRSP

In [31]:
crsp_raw

Unnamed: 0,PERMNO,date,NCUSIP,TICKER,COMNAM,DIVAMT,PRC,VOL,RET,BID,ASK,SHROUT,CFACPR,CFACSHR
0,10104,20100129,68389X10,ORCL,ORACLE CORP,0.05,23.06000,6068156.0,-0.057888,23.06000,23.07000,5011220.0,1.0,1.0
1,10104,20100226,68389X10,ORCL,ORACLE CORP,,24.65000,5771538.0,0.068951,24.64000,24.65000,5015000.0,1.0,1.0
2,10104,20100331,68389X10,ORCL,ORACLE CORP,,25.71000,6618577.0,0.043002,25.68000,25.69000,5019091.0,1.0,1.0
3,10104,20100430,68389X10,ORCL,ORACLE CORP,0.05,25.86750,5580407.0,0.008071,25.86000,25.87000,5029523.0,1.0,1.0
4,10104,20100528,68389X10,ORCL,ORACLE CORP,,22.57000,7406752.0,-0.127477,22.56000,22.58000,5026000.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60553,93436,20190830,88160R10,TSLA,TESLA INC,,225.61000,1340932.0,-0.066222,225.56000,225.62000,179127.0,5.0,5.0
60554,93436,20190930,88160R10,TSLA,TESLA INC,,240.87000,1365915.0,0.067639,240.78000,240.84000,180000.0,5.0,5.0
60555,93436,20191031,88160R10,TSLA,TESLA INC,,314.92001,2351125.0,0.307427,314.94000,315.07999,180245.0,5.0,5.0
60556,93436,20191129,88160R10,TSLA,TESLA INC,,329.94000,1578851.0,0.047695,329.94000,330.04999,180245.0,5.0,5.0


In [32]:
crsp_datatypes = {'date_vars': ['date'],
                 'float_vars': ['PRC', 'DIVAMT', 'BID', 'ASK', 'CFACPR', 'CFACSHR', "RET"],
                 'int_vars': ['SHROUT', 'VOL']}
crsp = clean_data(copy(crsp_raw), crsp_datatypes)

Cleaning date variables:
date
Cleaning numeric variables:
PRC
DIVAMT
BID
ASK
CFACPR
CFACSHR
RET
Cleaning integer variables:
SHROUT
VOL
Final data types:
PERMNO              int64
date       datetime64[ns]
NCUSIP             object
TICKER             object
COMNAM             object
DIVAMT            float64
PRC               float64
VOL               float64
RET               float64
BID               float64
ASK               float64
SHROUT            float64
CFACPR            float64
CFACSHR           float64
dtype: object


In [33]:
# Choose the right variables
crsp_names = {
              "RET" : "Return",
              'SHROUT': 'Shares Outstanding on Trading Day', 
              'COMNAM': 'Company Name',\
              'date': 'datadate', 
              'NCUSIP': 'cusip',
              "TICKER": "Ticker",
              'DIVAMT': 'Dividend Cash Amount',
              'PRC': 'Price',
              'BID': 'Bid',
              'ASK': 'Ask',
              'VOL': 'Volume on Trading Day',
              'CFACPR': 'Price Adjustment Factor',
              'CFACSHR': 'Share Adjustment Factor'}

crsp = crsp.rename(index = str, columns = crsp_names)
crsp = crsp[list(crsp_names.values())]

In [34]:
# Make a few more useful variables
crsp['Price'] = np.abs(crsp['Price'])
crsp['Shares Outstanding'] = crsp['Shares Outstanding on Trading Day'] * crsp['Share Adjustment Factor']
crsp['Volume'] = crsp['Volume on Trading Day'] * crsp['Share Adjustment Factor']
crsp['Market Cap'] = crsp['Shares Outstanding'] * crsp['Price'] / 1e6   #million

In [35]:
# Drop a few variables
crsp = crsp.drop(['Shares Outstanding on Trading Day', 'Volume on Trading Day', 'Bid', 'Ask', "Volume", "Price Adjustment Factor", "Share Adjustment Factor"], axis = 1)

In [36]:
# Check for unique identifier

def check_unique(dataframe, identifier_list):
    unique_identifier = dataframe.groupby(by = identifier_list).count()
    return unique_identifier.shape[0] == dataframe.shape[0]

print(check_unique(crsp, ['cusip', 'datadate']))

False


In [37]:
crsp["Dividend Cash Amount"].fillna(0,inplace=True)

In [38]:
crsp

Unnamed: 0,Return,Company Name,datadate,cusip,Ticker,Dividend Cash Amount,Price,Shares Outstanding,Market Cap
0,-0.057888,ORACLE CORP,2010-01-29,68389X10,ORCL,0.05,23.06000,5011220.0,115.558733
1,0.068951,ORACLE CORP,2010-02-26,68389X10,ORCL,0.00,24.65000,5015000.0,123.619750
2,0.043002,ORACLE CORP,2010-03-31,68389X10,ORCL,0.00,25.71000,5019091.0,129.040830
3,0.008071,ORACLE CORP,2010-04-30,68389X10,ORCL,0.05,25.86750,5029523.0,130.101186
4,-0.127477,ORACLE CORP,2010-05-28,68389X10,ORCL,0.00,22.57000,5026000.0,113.436820
...,...,...,...,...,...,...,...,...,...
60553,-0.066222,TESLA INC,2019-08-30,88160R10,TSLA,0.00,225.61000,895635.0,202.064212
60554,0.067639,TESLA INC,2019-09-30,88160R10,TSLA,0.00,240.87000,900000.0,216.783000
60555,0.307427,TESLA INC,2019-10-31,88160R10,TSLA,0.00,314.92001,901225.0,283.813786
60556,0.047695,TESLA INC,2019-11-29,88160R10,TSLA,0.00,329.94000,901225.0,297.350176


In [39]:
crsp.dropna(inplace = True)

In [40]:
crsp = crsp[crsp["Market Cap"] > 0]

In [41]:
crsp

Unnamed: 0,Return,Company Name,datadate,cusip,Ticker,Dividend Cash Amount,Price,Shares Outstanding,Market Cap
0,-0.057888,ORACLE CORP,2010-01-29,68389X10,ORCL,0.05,23.06000,5011220.0,115.558733
1,0.068951,ORACLE CORP,2010-02-26,68389X10,ORCL,0.00,24.65000,5015000.0,123.619750
2,0.043002,ORACLE CORP,2010-03-31,68389X10,ORCL,0.00,25.71000,5019091.0,129.040830
3,0.008071,ORACLE CORP,2010-04-30,68389X10,ORCL,0.05,25.86750,5029523.0,130.101186
4,-0.127477,ORACLE CORP,2010-05-28,68389X10,ORCL,0.00,22.57000,5026000.0,113.436820
...,...,...,...,...,...,...,...,...,...
60553,-0.066222,TESLA INC,2019-08-30,88160R10,TSLA,0.00,225.61000,895635.0,202.064212
60554,0.067639,TESLA INC,2019-09-30,88160R10,TSLA,0.00,240.87000,900000.0,216.783000
60555,0.307427,TESLA INC,2019-10-31,88160R10,TSLA,0.00,314.92001,901225.0,283.813786
60556,0.047695,TESLA INC,2019-11-29,88160R10,TSLA,0.00,329.94000,901225.0,297.350176


In [42]:
crsp.shape

(60259, 9)

## Cleaning up Compustat

In [43]:
compustat_raw

Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,tic,cusip,conm,...,bkvlps,csho,dltt,dpvieb,ibc,lt,sale,wcap,costat,mkvalt
0,1045,20101231,2010.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,-11.8309,333.450,9253.0,11635.0,-471.0,29033.0,22170.0,-1942.0,A,2597.5755
1,1045,20111231,2011.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,-21.2099,335.268,6702.0,10548.0,-1979.0,30959.0,24022.0,-1873.0,A,117.3438
2,1045,20121231,2012.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,-23.8210,335.292,7116.0,10831.0,-1876.0,31497.0,24855.0,-2232.0,A,266.5571
3,1045,20131231,2013.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,-10.4608,261.069,15353.0,11133.0,-1834.0,45009.0,26712.0,517.0,A,6591.9923
4,1045,20141231,2014.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,2.8976,697.475,16196.0,12259.0,2882.0,41750.0,42650.0,-1323.0,A,37405.5843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5839,316056,20151231,2015.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,0.2667,95.991,1479.8,391.7,154.7,2255.6,2068.1,288.0,A,6327.7267
5840,316056,20161231,2016.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,1.1892,95.274,1415.6,413.8,231.2,2131.0,2238.0,399.7,A,6097.5360
5841,316056,20171231,2017.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,4.2246,95.062,1442.3,455.7,276.7,2136.5,2408.2,571.9,A,7563.1327
5842,316056,20181231,2018.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,6.8789,94.637,1409.5,472.1,435.4,2156.2,2731.7,410.8,A,7543.5153


In [44]:
# Define the data types
compustat_datatypes = {'date_vars': ['datadate'],
                 'float_vars': ['at', 'dltt', 'wcap', 'sale', 'csho', 'mkvalt', 'bkvlps', "lt", "dpvieb", "ibc"],
                 'int_vars': ['gvkey', 'fyear']}
compustat = clean_data(copy(compustat_raw), compustat_datatypes)

Cleaning date variables:
datadate
Cleaning numeric variables:
at
dltt
wcap
sale
csho
mkvalt
bkvlps
lt
dpvieb
ibc
Cleaning integer variables:
gvkey
fyear
Final data types:
gvkey                int32
datadate    datetime64[ns]
fyear              float64
indfmt              object
consol              object
popsrc              object
datafmt             object
tic                 object
cusip               object
conm                object
curcd               object
at                 float64
bkvlps             float64
csho               float64
dltt               float64
dpvieb             float64
ibc                float64
lt                 float64
sale               float64
wcap               float64
costat              object
mkvalt             float64
dtype: object


In [45]:
compustat_names = \
    {# Identifiers
     'gvkey': 'Gvkey',
     "tic": "Ticker",
     'fyear': 'Fiscal Year',
     'curcd': 'Currency', 
     'datadate': 'datadate',
     "cusip" : "cusip",
     
     # Balance Sheet
     'at': 'Total Assets',
     'dltt': 'Long Term Debt',
     'wcap':'Working Capital',
     "lt": "Total liabilities",
     "dpvieb": "depreciation",
     "ibc" : "Income before extraordinary items",
        
     # Income Statement
     'sale': 'Sales/Turnover',  
     'ibc' : 'Income before extraordinary items',
     
     # Cash Flow Statement
     # 'dqc': 'Depreciation and Amortization',
        
     # Market Data
     'csho': 'Shares Outstanding (Compustat)',
     # 'prccq': 'Price (Compustat)',
     'mkvalt': 'Market Value',
     'bkvlps': 'Book Value Per Share'}
compustat = compustat.rename(index = str, columns = compustat_names)
compustat = compustat[list(compustat_names.values())]

In [46]:
#compustat = compustat.set_index(['Cusip', 'datadate'])

In [47]:
compustat["cusip"] = compustat["cusip"].apply(lambda x: x[:8])

In [48]:
compustat.shape

(5844, 16)

In [49]:
compustat = compustat[compustat["Book Value Per Share"] != 0]
compustat["Book Equity"] = compustat["Shares Outstanding (Compustat)"] * compustat["Book Value Per Share"]
compustat = compustat[compustat["Book Equity"] > 0]

In [50]:
import datetime
import dateutil.relativedelta

data_compu = pd.DataFrame(columns = compustat.columns)
k = 0
p = 0
for i in range(len(compustat)):
    for o in range(12):
        data_compu = data_compu.append(compustat.iloc[i:i+1, :])
        data_compu["datadate"].iloc[p:p+1][0] = data_compu["datadate"].iloc[p:p+1][0] - dateutil.relativedelta.relativedelta(months=11-k)
        k+=1
        p+=1
        if k >=12:
            k = 0
        

In [51]:
data_compu

Unnamed: 0,Gvkey,Ticker,Fiscal Year,Currency,datadate,cusip,Total Assets,Long Term Debt,Working Capital,Total liabilities,depreciation,Income before extraordinary items,Sales/Turnover,Shares Outstanding (Compustat),Market Value,Book Value Per Share,Book Equity
4,1045,AAL,2014.0,USD,2014-01-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,2882.0,42650.0,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-02-28,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,2882.0,42650.0,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-03-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,2882.0,42650.0,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-04-30,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,2882.0,42650.0,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-05-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,2882.0,42650.0,697.475,37405.5843,2.8976,2021.003560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5843,316056,ALLE,2019.0,USD,2019-08-31,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,402.1,2854.0,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-09-30,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,402.1,2854.0,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-10-31,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,402.1,2854.0,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-11-30,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,402.1,2854.0,92.724,11547.8470,8.1683,757.397449


In [52]:
data_compu["Working Capital"] = data_compu["Working Capital"] / 1e6
data_compu["depreciation"] = data_compu["depreciation"] / 1e6
data_compu["Sales/Turnover"] = data_compu["Sales/Turnover"] / 1e6
data_compu

Unnamed: 0,Gvkey,Ticker,Fiscal Year,Currency,datadate,cusip,Total Assets,Long Term Debt,Working Capital,Total liabilities,depreciation,Income before extraordinary items,Sales/Turnover,Shares Outstanding (Compustat),Market Value,Book Value Per Share,Book Equity
4,1045,AAL,2014.0,USD,2014-01-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,2882.0,0.042650,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-02-28,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,2882.0,0.042650,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-03-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,2882.0,0.042650,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-04-30,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,2882.0,0.042650,697.475,37405.5843,2.8976,2021.003560
4,1045,AAL,2014.0,USD,2014-05-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,2882.0,0.042650,697.475,37405.5843,2.8976,2021.003560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5843,316056,ALLE,2019.0,USD,2019-08-31,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,402.1,0.002854,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-09-30,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,402.1,0.002854,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-10-31,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,402.1,0.002854,92.724,11547.8470,8.1683,757.397449
5843,316056,ALLE,2019.0,USD,2019-11-30,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,402.1,0.002854,92.724,11547.8470,8.1683,757.397449


In [53]:
compustat_model_1 = data_compu.drop(["Long Term Debt","Total Assets","Working Capital","Total liabilities","depreciation","Income before extraordinary items","Sales/Turnover","Market Value"], axis = 1)
compustat_model_1.dropna(inplace = True)
compustat_model_2 = data_compu.drop(["Long Term Debt","Total liabilities","Sales/Turnover","Market Value"], axis = 1)
compustat_model_2.dropna(inplace = True)
compustat_model_3 = data_compu
compustat_model_3.dropna(inplace = True)

In [54]:
print("model_1:", compustat_model_1.shape)
print("model_2:", compustat_model_2.shape)
print("model_3:", compustat_model_3.shape)

model_1: (56148, 9)
model_2: (42384, 13)
model_3: (41088, 17)


## Firm characteristics

In [55]:
crsp_sorted = crsp.sort_values(["cusip", "datadate"])
crsp_sorted.drop_duplicates(subset = ["cusip", "datadate"], keep='first', inplace = True)
crsp_sorted.shape

(60154, 9)

In [56]:
compustat_model_1_sorted = compustat_model_1.sort_values(["cusip", "datadate"])
compustat_model_2_sorted = compustat_model_2.sort_values(["cusip", "datadate"])
compustat_model_3_sorted = compustat_model_3.sort_values(["cusip", "datadate"])
print("model_1:", compustat_model_1.shape)
print("model_2:", compustat_model_2.shape)
print("model_3:", compustat_model_3.shape)

model_1: (56148, 9)
model_2: (42384, 13)
model_3: (41088, 17)


Create a new dataframe to store results.

In [57]:
def last_day_of_month(any_day):
    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)  # this will never fail
    return next_month - datetime.timedelta(days=next_month.day)

In [58]:
crsp_sorted["datadate"] = crsp_sorted["datadate"].apply(lambda x: last_day_of_month(x))

In [59]:
data_crsp_comp_model_1 = pd.merge(compustat_model_1_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
data_crsp_comp_model_2 = pd.merge(compustat_model_2_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
data_crsp_comp_model_3 = pd.merge(compustat_model_3_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
del[data_crsp_comp_model_1["Ticker_y"]]
del[data_crsp_comp_model_2["Ticker_y"]]
del[data_crsp_comp_model_3["Ticker_y"]]
#data_crsp_comp["Book equity"] = data_crsp_comp["Shares Outstanding"] * data_crsp_comp["Book Value Per Share"]

#data_crsp_comp = data_crsp_comp[data_crsp_comp["Book equity"] > 0]
print("model_1:", data_crsp_comp_model_1.shape)
print("model_2:", data_crsp_comp_model_2.shape)
print("model_3:", data_crsp_comp_model_3.shape)

model_1: (46359, 15)
model_2: (34116, 19)
model_3: (33337, 23)


In [60]:
data_crsp_comp_model_1.reset_index(drop=True).to_csv("data_crsp_comp_1.csv")
data_crsp_comp_model_2.reset_index(drop=True).to_csv("data_crsp_comp_2.csv")
data_crsp_comp_model_3.reset_index(drop=True).to_csv("data_crsp_comp_3.csv")

In [61]:
data_model_list = ["data_crsp_comp_model_1", "data_crsp_comp_model_2", "data_crsp_comp_model_3"]

## new table

In [123]:
charac_1 = data_crsp_comp_model_1[["Ticker_x", "datadate"]]
charac_2 = data_crsp_comp_model_2[["Ticker_x", "datadate"]]
charac_3 = data_crsp_comp_model_3[["Ticker_x", "datadate"]]
charac_list = ["charac_1", "charac_2", "charac_3"]

### logsize

In [124]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha)["LogSize"] = np.log(eval(mod)["Market Cap"])
    

### return2-12

In [126]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha).insert(len(eval(cha).columns), 'Return_2_12', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 12:
            for i in range(12, len(group)):
                return_2_12 = group.iloc[i - 2].loc["Price"] / group.iloc[i - 12].loc["Price"] - 1
            #print(return_2_12)
                eval(cha).loc[group.index[i], "Return_2_12"] = return_2_12

### logissues

In [127]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha).insert(len(eval(cha).columns),  'LogIssues', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 36:
            for i in range(36, len(group)):
                LogIssues = np.log(group.iloc[i - 1].loc["Shares Outstanding"] / group.iloc[i - 36].loc["Shares Outstanding"])
                eval(cha).loc[group.index[i], "LogIssues"] = LogIssues

### logB/M

In [128]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha)["LogBM"] = np.log(eval(mod)["Book Equity"]/ eval(mod)["Market Cap"])

### Accruals

In [129]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["Accruals"] = eval(mod)["Working Capital"] - eval(mod)["depreciation"]

### ROA

In [131]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["ROA"] = eval(mod)["Income before extraordinary items"] / eval(mod)["Total Assets"]

### Return%

In [133]:
for i in range(0,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["Return"] = eval(mod)["Return"]*100

### LogAG

In [135]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha).insert(len(eval(cha).columns),  'LogAG', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 12:
            for i in range(12, len(group)):
                LogAG = np.log(group.iloc[i - 1].loc["Total Assets"] / group.iloc[i - 12].loc["Total Assets"])
                eval(cha).loc[group.index[i], "LogAG"] = LogAG

### DY

In [136]:
charac_3.insert(len(charac_3.columns), 'DY', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            DY = group.iloc[i].loc["Dividend Cash Amount"] / group.iloc[i].loc["Price"]
            charac_3.loc[group.index[i], "DY"] =  DY


Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000


### Logreturn13-36

In [137]:
charac_3.insert(len(charac_3.columns), 'Return_13_36', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 36:
        for i in range(36, len(group)):
            return_13_36 = np.log(group.iloc[i - 13].loc["Price"] / group.iloc[i - 36].loc["Price"])
            #print(return_2_12)
            charac_3.loc[group.index[i], "Return_13_36"] = return_13_36


Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY,Return_13_36
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000,0.000000
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000,0.000000
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000,0.000000
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000,0.000000
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000,0.461034
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200,0.491943
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000,0.524957
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000,0.257307


### LogIssues_1y

In [138]:
charac_3.insert(len(charac_3.columns), 'LogIssues_1y', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            LogIssues_1y = np.log(group.iloc[i - 1].loc["Shares Outstanding"] / group.iloc[i - 12].loc["Shares Outstanding"])
            charac_3.loc[group.index[i], "LogIssues_1y"] =  LogIssues_1y



Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY,Return_13_36,LogIssues_1y
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000,0.000000,0.000000
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000,0.000000,0.000000
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000,0.000000,0.000000
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000,0.000000,0.000000
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000,0.461034,0.002824
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200,0.491943,0.002824
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000,0.524957,0.002700
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000,0.257307,0.003034


### Turnover

In [139]:
charac_3.insert(len(charac_3.columns), 'Turnover', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            Turnover = np.mean(group.iloc[i - 12 : i - 1]["Sales/Turnover"])
            charac_3.loc[group.index[i], "Turnover"] =  Turnover



Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY,Return_13_36,LogIssues_1y,Turnover
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000,0.000000,0.000000,0.000000
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000,0.000000,0.000000,0.000000
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000,0.000000,0.000000,0.000000
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000,0.000000,0.000000,0.000000
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000,0.461034,0.002824,0.010288
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200,0.491943,0.002824,0.010421
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000,0.524957,0.002700,0.010553
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000,0.257307,0.003034,0.010686


### Debtprice

In [140]:
charac_3["Debtprice"] = data_crsp_comp_model_3["Total liabilities"] / data_crsp_comp_model_3["Market Value"]


Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY,Return_13_36,LogIssues_1y,Turnover,Debtprice
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000,0.461034,0.002824,0.010288,0.630859
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200,0.491943,0.002824,0.010421,0.630859
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000,0.524957,0.002700,0.010553,0.630859
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000,0.257307,0.003034,0.010686,0.630859


### Salesprice

In [141]:
charac_3["Salesprice"] = data_crsp_comp_model_3["Sales/Turnover"] / data_crsp_comp_model_3["Market Value"] *1e6


Unnamed: 0,Ticker_x,datadate,LogSize,Return_2_12,LogIssues,LogBM,Accruals,ROA,Return,LogAG,DY,Return_13_36,LogIssues_1y,Turnover,Debtprice,Salesprice
0,AES,2010-01-31,2.131983,0.000000,0.000000,6.643416,-0.007792,0.022710,-5.1089,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224,1.735318
1,AES,2010-02-28,2.055969,0.000000,0.000000,6.719431,-0.007792,0.022710,-7.4426,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224,1.735318
2,AES,2010-03-31,1.996891,0.000000,0.000000,6.778508,-0.007792,0.022710,-5.9025,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224,1.735318
3,AES,2010-04-30,2.216551,0.000000,0.000000,6.558848,-0.007792,0.022710,4.9091,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224,1.735318
4,AES,2010-05-31,2.100289,0.000000,0.000000,6.675110,-0.007792,0.022710,-11.0052,0.000000,0.000000,0.000000,0.000000,0.000000,3.131224,1.735318
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33332,RCL,2019-08-31,3.084339,-0.011176,-0.026855,6.321884,-0.016873,0.062915,-10.3662,0.090447,0.000000,0.461034,0.002824,0.010288,0.630859,0.392820
33333,RCL,2019-09-30,3.122442,-0.104664,-0.026855,6.283782,-0.016873,0.062915,4.6318,0.090447,0.007200,0.491943,0.002824,0.010421,0.630859,0.392820
33334,RCL,2019-10-31,3.127381,-0.004297,-0.023677,6.278843,-0.016873,0.062915,0.4616,0.090447,0.000000,0.524957,0.002700,0.010553,0.630859,0.392820
33335,RCL,2019-11-30,3.225252,-0.041921,-0.023343,6.180972,-0.016873,0.062915,10.2821,0.090447,0.000000,0.257307,0.003034,0.010686,0.630859,0.392820


In [143]:
print("model_1:", charac_1.shape)
print("model_2:", charac_2.shape)
print("model_3:", charac_3.shape)

model_1: (46359, 6)
model_2: (34116, 10)
model_3: (33337, 16)


In [144]:
## model 1
for i in range(3):
    eval(charac_list[i])["LogBM"] = eval(charac_list[i])["LogBM"]/10
    eval(charac_list[i])["LogBM"] = eval(charac_list[i])["LogBM"]*(-1)
    #eval(charac_list[i])["Return"] = eval(charac_list[i])["Return"] + 1

In [150]:
## model 2
for i in range(1,3):
    eval(charac_list[i])["LogIssues"] = eval(charac_list[i])["LogIssues"]* -1
    eval(charac_list[i])["Accruals"] = eval(charac_list[i])["Accruals"]* 4


In [146]:
## model 3
for i in range(2,3):
    eval(charac_list[i])["DY"] = eval(charac_list[i])["DY"]* 10
    eval(charac_list[i])["Turnover"] = eval(charac_list[i])["Turnover"]* 4
    eval(charac_list[i])["LogIssues_1y"] = eval(charac_list[i])["LogIssues_1y"]* -10
    eval(charac_list[i])["Salesprice"] = eval(charac_list[i])["Salesprice"]* 4
    #eval(charac_list[i])["LogIssues_1y"] = eval(charac_list[i])["LogIssues_1y"]* -1
    

In [151]:
for i in range(3):
    eval(charac_list[i]).reset_index(drop = True).to_csv(charac_list[i]+str(".csv"))

In [152]:
print("model_1", "\n", np.mean(charac_1))
print("model_2", "\n", np.mean(charac_2))
print("model_3", "\n", np.mean(charac_3))

model_1 
 LogSize        2.934720
Return_2_12    0.111625
LogBM         -0.565993
Return         1.467716
dtype: float64
model_2 
 LogSize        2.953313
Return_2_12    0.121850
LogIssues      0.009103
LogBM         -0.542149
Accruals      -0.020479
ROA            0.074050
Return         1.567177
LogAG          0.075497
dtype: float64
model_3 
 LogSize         2.944763
Return_2_12     0.124923
LogIssues       0.009580
LogBM          -0.541997
Accruals       -0.020917
ROA             0.073417
Return          1.564073
LogAG           0.076484
DY              0.015185
Return_13_36    0.134708
LogIssues_1y    0.046170
Turnover        0.077602
Debtprice       0.515623
Salesprice      2.850461
dtype: float64


In [153]:
print("model_1", "\n", np.std(charac_1))
print("model_2", "\n", np.std(charac_2))
print("model_3", "\n", np.std(charac_3))

model_1 
 LogSize        1.150383
Return_2_12    0.276324
LogBM          0.093749
Return         7.485350
dtype: float64
model_2 
 LogSize        1.201619
Return_2_12    0.297889
LogIssues      0.104605
LogBM          0.091243
Accruals       0.091216
ROA            0.074154
Return         7.846301
LogAG          0.183047
dtype: float64
model_3 
 LogSize         1.193928
Return_2_12     0.320525
LogIssues       0.102328
LogBM           0.090621
Accruals        0.092144
ROA             0.072508
Return          7.864085
LogAG           0.186510
DY              0.235377
Return_13_36    0.333819
LogIssues_1y    0.627729
Turnover        0.172311
Debtprice       0.543717
Salesprice      3.603817
dtype: float64
