In [155]:
import pandas as pd
import numpy as np
from copy import copy

In [156]:
pd.set_option('mode.chained_assignment', None)

# Import the data

In [157]:
#crsp_raw = pd.read_csv('CRSP stock price421.csv')
crsp_raw = pd.read_csv('stockdata.csv')
crsp_raw = crsp_raw.drop(columns = 'Unnamed: 0')
crsp_raw

Unnamed: 0,PERMNO,date,NCUSIP,TICKER,COMNAM,DIVAMT,PRC,VOL,RET,BID,ASK,SHROUT,CFACPR,CFACSHR
0,10104,2012-01-31,68389X10,ORCL,ORACLE CORP,0.06,28.21000,7248771.0,0.102144,28.19000,28.20000,5025837.0,1.0,1.0
1,10104,2012-02-29,68389X10,ORCL,ORACLE CORP,,29.25500,5679684.0,0.037044,29.26000,29.27000,4979000.0,1.0,1.0
2,10104,2012-03-30,68389X10,ORCL,ORACLE CORP,,29.16000,8170098.0,-0.003247,29.15000,29.16000,4975106.0,1.0,1.0
3,10104,2012-04-30,68389X10,ORCL,ORACLE CORP,0.06,29.40000,5364162.0,0.010288,29.39000,29.40000,4975106.0,1.0,1.0
4,10104,2012-05-31,68389X10,ORCL,ORACLE CORP,,26.47000,6501551.0,-0.09966,26.48000,26.48000,4905000.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48868,93436,2019-08-30,88160R10,TSLA,TESLA INC,,225.61000,1340932.0,-0.066222,225.56000,225.62000,179127.0,5.0,5.0
48869,93436,2019-09-30,88160R10,TSLA,TESLA INC,,240.87000,1365915.0,0.067639,240.78000,240.84000,180000.0,5.0,5.0
48870,93436,2019-10-31,88160R10,TSLA,TESLA INC,,314.92001,2351125.0,0.307427,314.94000,315.07999,180245.0,5.0,5.0
48871,93436,2019-11-29,88160R10,TSLA,TESLA INC,,329.94000,1578851.0,0.047695,329.94000,330.04999,180245.0,5.0,5.0


In [158]:
compustat_raw = pd.read_csv('fundamentals2010c.csv')

In [159]:
def clean_data(df, type_dict):
    print('Cleaning date variables:')
    for v in type_dict['date_vars']:
        print(v)
        df[v] = pd.to_datetime(df[v], format = '%Y%m%d', errors = 'coerce')
        
    print('Cleaning numeric variables:')
    for v in type_dict['float_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], errors = 'coerce')
    
    print('Cleaning integer variables:')
    for v in type_dict['int_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], downcast = 'signed', errors = 'coerce')
        
    print('Final data types:')
    print(df.dtypes)
    
    return df

## Cleaning Up CRSP

In [160]:
crsp_datatypes = {'date_vars': ['date'],
                 'float_vars': ['PRC', 'DIVAMT', 'BID', 'ASK', 'CFACPR', 'CFACSHR', "RET"],
                 'int_vars': ['SHROUT', 'VOL']}
crsp = clean_data(copy(crsp_raw), crsp_datatypes)

Cleaning date variables:
date
Cleaning numeric variables:
PRC
DIVAMT
BID
ASK
CFACPR
CFACSHR
RET
Cleaning integer variables:
SHROUT
VOL
Final data types:
PERMNO              int64
date       datetime64[ns]
NCUSIP             object
TICKER             object
COMNAM             object
DIVAMT            float64
PRC               float64
VOL               float64
RET               float64
BID               float64
ASK               float64
SHROUT            float64
CFACPR            float64
CFACSHR           float64
dtype: object


In [161]:
# Choose the right variables
crsp_names = {
              "RET" : "Return",
              'SHROUT': 'Shares Outstanding on Trading Day', 
              'COMNAM': 'Company Name',\
              'date': 'datadate', 
              'NCUSIP': 'cusip',
              "TICKER": "Ticker",
              'DIVAMT': 'Dividend Cash Amount',
              'PRC': 'Price',
              'BID': 'Bid',
              'ASK': 'Ask',
              'VOL': 'Volume on Trading Day',
              'CFACPR': 'Price Adjustment Factor',
              'CFACSHR': 'Share Adjustment Factor'}

crsp = crsp.rename(index = str, columns = crsp_names)
crsp = crsp[list(crsp_names.values())]

In [162]:
# Make a few more useful variables
crsp['Price'] = np.abs(crsp['Price'])
crsp['Shares Outstanding'] = crsp['Shares Outstanding on Trading Day'] * crsp['Share Adjustment Factor']
crsp['Volume'] = crsp['Volume on Trading Day'] * crsp['Share Adjustment Factor']
crsp['Market Cap'] = crsp['Shares Outstanding'] * crsp['Price'] / 1e6   #million

In [163]:
# Drop a few variables
crsp = crsp.drop(['Shares Outstanding on Trading Day', 'Volume on Trading Day', 'Bid', 'Ask', "Volume", "Price Adjustment Factor", "Share Adjustment Factor"], axis = 1)

In [164]:
# Check for unique identifier

def check_unique(dataframe, identifier_list):
    unique_identifier = dataframe.groupby(by = identifier_list).count()
    return unique_identifier.shape[0] == dataframe.shape[0]

print(check_unique(crsp, ['cusip', 'datadate']))

False


In [165]:
crsp["Dividend Cash Amount"].fillna(0,inplace=True)

In [166]:
crsp

Unnamed: 0,Return,Company Name,datadate,cusip,Ticker,Dividend Cash Amount,Price,Shares Outstanding,Market Cap
0,0.102144,ORACLE CORP,NaT,68389X10,ORCL,0.06,28.21000,5025837.0,141.778862
1,0.037044,ORACLE CORP,NaT,68389X10,ORCL,0.00,29.25500,4979000.0,145.660645
2,-0.003247,ORACLE CORP,NaT,68389X10,ORCL,0.00,29.16000,4975106.0,145.074091
3,0.010288,ORACLE CORP,NaT,68389X10,ORCL,0.06,29.40000,4975106.0,146.268116
4,-0.099660,ORACLE CORP,NaT,68389X10,ORCL,0.00,26.47000,4905000.0,129.835350
...,...,...,...,...,...,...,...,...,...
48868,-0.066222,TESLA INC,NaT,88160R10,TSLA,0.00,225.61000,895635.0,202.064212
48869,0.067639,TESLA INC,NaT,88160R10,TSLA,0.00,240.87000,900000.0,216.783000
48870,0.307427,TESLA INC,NaT,88160R10,TSLA,0.00,314.92001,901225.0,283.813786
48871,0.047695,TESLA INC,NaT,88160R10,TSLA,0.00,329.94000,901225.0,297.350176


In [167]:
crsp.dropna(inplace = True)

In [168]:
crsp = crsp[crsp["Market Cap"] > 0]

In [169]:
crsp

Unnamed: 0,Return,Company Name,datadate,cusip,Ticker,Dividend Cash Amount,Price,Shares Outstanding,Market Cap


In [170]:
crsp.shape

(0, 9)

## Cleaning up Compustat

In [171]:
compustat_raw

Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,tic,cusip,conm,...,at,bkvlps,csho,dltt,dpvieb,lt,sale,wcap,costat,mkvalt
0,1045,20141231,2014.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,43771.0,2.8976,697.475,16196.0,12259.0,41750.0,42650.0,-1323.0,A,37405.5843
1,1045,20151231,2015.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,48415.0,9.0215,624.622,18330.0,13144.0,42780.0,40990.0,-3620.0,A,26452.7417
2,1045,20161231,2016.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,51274.0,7.4612,507.294,22489.0,14194.0,47489.0,40180.0,-3548.0,A,23685.5569
3,1045,20171231,2017.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,51396.0,8.2564,475.508,22511.0,15646.0,47470.0,42207.0,-5818.0,A,24740.6812
4,1045,20181231,2018.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,60580.0,-0.3669,460.611,29081.0,17443.0,60749.0,44541.0,-9459.0,A,14790.2192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5835,316056,20151231,2015.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,2285.3,0.2667,95.991,1479.8,391.7,2255.6,2068.1,288.0,A,6327.7267
5836,316056,20161231,2016.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,2247.4,1.1892,95.274,1415.6,413.8,2131.0,2238.0,399.7,A,6097.5360
5837,316056,20171231,2017.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,2542.0,4.2246,95.062,1442.3,455.7,2136.5,2408.2,571.9,A,7563.1327
5838,316056,20181231,2018.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,2810.2,6.8789,94.637,1409.5,472.1,2156.2,2731.7,410.8,A,7543.5153


In [172]:
# Define the data types
compustat_datatypes = {'date_vars': ['datadate'],
                 'float_vars': ['at', 'dltt', 'wcap', 'sale', 'csho', 'mkvalt', 'bkvlps', "lt", "dpvieb"],
                 'int_vars': ['gvkey', 'fyear']}
compustat = clean_data(copy(compustat_raw), compustat_datatypes)

Cleaning date variables:
datadate
Cleaning numeric variables:
at
dltt
wcap
sale
csho
mkvalt
bkvlps
lt
dpvieb
Cleaning integer variables:
gvkey
fyear
Final data types:
gvkey                int32
datadate    datetime64[ns]
fyear              float64
indfmt              object
consol              object
popsrc              object
datafmt             object
tic                 object
cusip               object
conm                object
curcd               object
at                 float64
bkvlps             float64
csho               float64
dltt               float64
dpvieb             float64
lt                 float64
sale               float64
wcap               float64
costat              object
mkvalt             float64
dtype: object


In [173]:
compustat_names = \
    {# Identifiers
     'gvkey': 'Gvkey',
     "tic": "Ticker",
     'fyear': 'Fiscal Year',
     'curcd': 'Currency', 
     'datadate': 'datadate',
     "cusip" : "cusip",
     
     # Balance Sheet
     'at': 'Total Assets',
     'dltt': 'Long Term Debt',
     'wcap':'Working Capital',
     "lt": "Total liabilities",
     "dpvieb": "depreciation",
     
        
     # Income Statement
     'sale': 'Sales/Turnover',  
    
     
     # Cash Flow Statement
     # 'dqc': 'Depreciation and Amortization',
        
     # Market Data
     'csho': 'Shares Outstanding (Compustat)',
     # 'prccq': 'Price (Compustat)',
     'mkvalt': 'Market Value',
     'bkvlps': 'Book Value Per Share'}
compustat = compustat.rename(index = str, columns = compustat_names)
compustat = compustat[list(compustat_names.values())]

In [174]:
#compustat = compustat.set_index(['Cusip', 'datadate'])

In [175]:
compustat["cusip"] = compustat["cusip"].apply(lambda x: x[:8])

In [176]:
compustat.shape

(5840, 15)

In [177]:
compustat = compustat[compustat["Book Value Per Share"] != 0]
compustat["Book Equity"] = compustat["Shares Outstanding (Compustat)"] * compustat["Book Value Per Share"]
compustat = compustat[compustat["Book Equity"] > 0]

In [178]:
import datetime
import dateutil.relativedelta

data_compu = pd.DataFrame(columns = compustat.columns)
k = 0
p = 0
for i in range(len(compustat)):
    for o in range(12):
        data_compu = data_compu.append(compustat.iloc[i:i+1, :])
        data_compu["datadate"].iloc[p:p+1][0] = data_compu["datadate"].iloc[p:p+1][0] - dateutil.relativedelta.relativedelta(months=11-k)
        k+=1
        p+=1
        if k >=12:
            k = 0
        

In [179]:
data_compu

Unnamed: 0,Gvkey,Ticker,Fiscal Year,Currency,datadate,cusip,Total Assets,Long Term Debt,Working Capital,Total liabilities,depreciation,Sales/Turnover,Shares Outstanding (Compustat),Market Value,Book Value Per Share,Book Equity
0,1045,AAL,2014.0,USD,2014-01-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,42650.0,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-02-28,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,42650.0,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-03-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,42650.0,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-04-30,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,42650.0,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-05-31,02376R10,43771.0,16196.0,-1323.0,41750.0,12259.0,42650.0,697.475,37405.5843,2.8976,2021.003560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5839,316056,ALLE,2019.0,USD,2019-08-31,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,2854.0,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-09-30,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,2854.0,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-10-31,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,2854.0,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-11-30,G0176J10,2967.2,1483.2,494.8,2206.8,494.6,2854.0,92.724,11547.8470,8.1683,757.397449


In [180]:
data_compu["Working Capital"] = data_compu["Working Capital"] / 1e6
data_compu["depreciation"] = data_compu["depreciation"] / 1e6
data_compu["Sales/Turnover"] = data_compu["Sales/Turnover"] / 1e6
data_compu

Unnamed: 0,Gvkey,Ticker,Fiscal Year,Currency,datadate,cusip,Total Assets,Long Term Debt,Working Capital,Total liabilities,depreciation,Sales/Turnover,Shares Outstanding (Compustat),Market Value,Book Value Per Share,Book Equity
0,1045,AAL,2014.0,USD,2014-01-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,0.042650,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-02-28,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,0.042650,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-03-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,0.042650,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-04-30,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,0.042650,697.475,37405.5843,2.8976,2021.003560
0,1045,AAL,2014.0,USD,2014-05-31,02376R10,43771.0,16196.0,-0.001323,41750.0,0.012259,0.042650,697.475,37405.5843,2.8976,2021.003560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5839,316056,ALLE,2019.0,USD,2019-08-31,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,0.002854,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-09-30,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,0.002854,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-10-31,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,0.002854,92.724,11547.8470,8.1683,757.397449
5839,316056,ALLE,2019.0,USD,2019-11-30,G0176J10,2967.2,1483.2,0.000495,2206.8,0.000495,0.002854,92.724,11547.8470,8.1683,757.397449


In [181]:
compustat_model_1 = data_compu.drop(["Long Term Debt","Total Assets","Working Capital","Total liabilities","depreciation","Sales/Turnover","Market Value"], axis = 1)
compustat_model_1.dropna(inplace = True)
compustat_model_2 = data_compu.drop(["Long Term Debt","Total liabilities","Sales/Turnover","Market Value"], axis = 1)
compustat_model_2.dropna(inplace = True)
compustat_model_3 = data_compu
compustat_model_3.dropna(inplace = True)

In [182]:
print("model_1:", compustat_model_1.shape)
print("model_2:", compustat_model_2.shape)
print("model_3:", compustat_model_3.shape)

model_1: (56148, 9)
model_2: (42420, 12)
model_3: (41112, 16)


## Firm characteristics

In [183]:
crsp_sorted = crsp.sort_values(["cusip", "datadate"])
crsp_sorted.drop_duplicates(subset = ["cusip", "datadate"], keep='first', inplace = True)
crsp_sorted.shape

(0, 9)

In [184]:
compustat_model_1_sorted = compustat_model_1.sort_values(["cusip", "datadate"])
compustat_model_2_sorted = compustat_model_2.sort_values(["cusip", "datadate"])
compustat_model_3_sorted = compustat_model_3.sort_values(["cusip", "datadate"])
print("model_1:", compustat_model_1.shape)
print("model_2:", compustat_model_2.shape)
print("model_3:", compustat_model_3.shape)

model_1: (56148, 9)
model_2: (42420, 12)
model_3: (41112, 16)


Create a new dataframe to store results.

In [185]:
def last_day_of_month(any_day):
    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)  # this will never fail
    return next_month - datetime.timedelta(days=next_month.day)

In [186]:
crsp_sorted["datadate"] = crsp_sorted["datadate"].apply(lambda x: last_day_of_month(x))

In [187]:
data_crsp_comp_model_1 = pd.merge(compustat_model_1_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
data_crsp_comp_model_2 = pd.merge(compustat_model_2_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
data_crsp_comp_model_3 = pd.merge(compustat_model_3_sorted, crsp_sorted, on = ["cusip","datadate"], how = "inner")
del[data_crsp_comp_model_1["Ticker_y"]]
del[data_crsp_comp_model_2["Ticker_y"]]
del[data_crsp_comp_model_3["Ticker_y"]]
#data_crsp_comp["Book equity"] = data_crsp_comp["Shares Outstanding"] * data_crsp_comp["Book Value Per Share"]

#data_crsp_comp = data_crsp_comp[data_crsp_comp["Book equity"] > 0]
print("model_1:", data_crsp_comp_model_1.shape)
print("model_2:", data_crsp_comp_model_2.shape)
print("model_3:", data_crsp_comp_model_3.shape)

model_1: (0, 15)
model_2: (0, 18)
model_3: (0, 22)


In [188]:
data_crsp_comp_model_1.reset_index(drop=True).to_csv("data_crsp_comp_1.csv")
data_crsp_comp_model_2.reset_index(drop=True).to_csv("data_crsp_comp_2.csv")
data_crsp_comp_model_3.reset_index(drop=True).to_csv("data_crsp_comp_3.csv")

In [189]:
data_model_list = ["data_crsp_comp_model_1", "data_crsp_comp_model_2", "data_crsp_comp_model_3"]

## new table

In [190]:
charac_1 = data_crsp_comp_model_1[["Ticker_x", "datadate"]]
charac_2 = data_crsp_comp_model_2[["Ticker_x", "datadate"]]
charac_3 = data_crsp_comp_model_3[["Ticker_x", "datadate"]]
charac_list = ["charac_1", "charac_2", "charac_3"]

### logsize

In [191]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha)["LogSize"] = np.log(eval(mod)["Market Cap"])
    

### return2-12

In [192]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha).insert(len(eval(cha).columns), 'Return_2_12', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 12:
            for i in range(12, len(group)):
                return_2_12 = group.iloc[i - 2].loc["Price"] / group.iloc[i - 12].loc["Price"] - 1
            #print(return_2_12)
                eval(cha).loc[group.index[i], "Return_2_12"] = return_2_12

### logissues

In [193]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha).insert(len(eval(cha).columns),  'LogIssues', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 36:
            for i in range(36, len(group)):
                LogIssues = np.log(group.iloc[i - 1].loc["Shares Outstanding"] / group.iloc[i - 36].loc["Shares Outstanding"])
                eval(cha).loc[group.index[i], "LogIssues"] = LogIssues

### logB/M

In [194]:
for i, (cha, mod) in enumerate(zip(charac_list, data_model_list)):
    eval(cha)["LogBM"] = np.log(eval(mod)["Book Equity"]/ eval(mod)["Market Cap"])

### Accruals

In [195]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["Accruals"] = eval(mod)["Working Capital"] - eval(mod)["depreciation"]

### ROA

In [196]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["ROA"] = eval(mod)["Income before extraordinary items"] / eval(mod)["Total Assets"]

KeyError: 'Income before extraordinary items'

### Return%

In [100]:
for i in range(0,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha)["Return"] = eval(mod)["Return"]*100

### LogAG

In [101]:
for i in range(1,3):
    cha, mod = charac_list[i], data_model_list[i]
    eval(cha).insert(len(eval(cha).columns),  'LogAG', 0)
    for name, group in eval(mod).groupby('Ticker_x'):
        if len(group) > 12:
            for i in range(12, len(group)):
                LogAG = np.log(group.iloc[i - 1].loc["Total Assets"] / group.iloc[i - 12].loc["Total Assets"])
                eval(cha).loc[group.index[i], "LogAG"] = LogAG

### DY

In [102]:
charac_3.insert(len(charac_3.columns), 'DY', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            DY = group.iloc[i].loc["Dividend Cash Amount"] / group.iloc[i].loc["Price"]
            charac_3.loc[group.index[i], "DY"] =  DY


### Logreturn13-36

In [103]:
charac_3.insert(len(charac_3.columns), 'Return_13_36', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 36:
        for i in range(36, len(group)):
            return_13_36 = np.log(group.iloc[i - 13].loc["Price"] / group.iloc[i - 36].loc["Price"])
            #print(return_2_12)
            charac_3.loc[group.index[i], "Return_13_36"] = return_13_36


### LogIssues_1y

In [104]:
charac_3.insert(len(charac_3.columns), 'LogIssues_1y', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            LogIssues_1y = np.log(group.iloc[i - 1].loc["Shares Outstanding"] / group.iloc[i - 12].loc["Shares Outstanding"])
            charac_3.loc[group.index[i], "LogIssues_1y"] =  LogIssues_1y



### Turnover

In [105]:
charac_3.insert(len(charac_3.columns), 'Turnover', 0)
for name, group in data_crsp_comp_model_3.groupby('Ticker_x'):
    if len(group) > 12:
        for i in range(12, len(group)):
            Turnover = np.mean(group.iloc[i - 12 : i - 1]["Sales/Turnover"])
            charac_3.loc[group.index[i], "Turnover"] =  Turnover



### Debtprice

In [106]:
charac_3["Debtprice"] = data_crsp_comp_model_3["Total liabilities"] / data_crsp_comp_model_3["Market Value"]


### Salesprice

In [107]:
charac_3["Salesprice"] = data_crsp_comp_model_3["Sales/Turnover"] / data_crsp_comp_model_3["Market Value"] *1e6


In [108]:
print("model_1:", charac_1.shape)
print("model_2:", charac_2.shape)
print("model_3:", charac_3.shape)

model_1: (41141, 6)
model_2: (30656, 9)
model_3: (29913, 15)


In [109]:
## model 1
for i in range(3):
    eval(charac_list[i])["LogBM"] = eval(charac_list[i])["LogBM"]/10
    eval(charac_list[i])["LogBM"] = eval(charac_list[i])["LogBM"]*(-1)
    #eval(charac_list[i])["Return"] = eval(charac_list[i])["Return"] + 1

In [110]:
## model 2
for i in range(1,3):
    eval(charac_list[i])["LogIssues"] = eval(charac_list[i])["LogIssues"]* -1
    eval(charac_list[i])["Accruals"] = eval(charac_list[i])["Accruals"]* 4


In [111]:
## model 3
for i in range(2,3):
    eval(charac_list[i])["DY"] = eval(charac_list[i])["DY"]* 10
    eval(charac_list[i])["Turnover"] = eval(charac_list[i])["Turnover"]* 4
    eval(charac_list[i])["LogIssues_1y"] = eval(charac_list[i])["LogIssues_1y"]* -10
    eval(charac_list[i])["Salesprice"] = eval(charac_list[i])["Salesprice"]* 4
    #eval(charac_list[i])["LogIssues_1y"] = eval(charac_list[i])["LogIssues_1y"]* -1
    

In [112]:
for i in range(3):
    eval(charac_list[i]).reset_index(drop = True).to_csv(charac_list[i]+str(".csv"))

In [113]:
print("model_1", "\n", np.mean(charac_1))
print("model_2", "\n", np.mean(charac_2))
print("model_3", "\n", np.mean(charac_3))

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


model_1 
 LogSize        2.924857
Return_2_12    0.109837
LogBM         -0.566101
Return         1.448294
dtype: float64
model_2 
 LogSize        2.953661
Return_2_12    0.119219
LogIssues      0.008978
LogBM         -0.544056
Accruals      -0.022621
Return         1.539230
LogAG          0.073053
dtype: float64
model_3 
 LogSize         2.945247
Return_2_12     0.122394
LogIssues       0.009517
LogBM          -0.544041
Accruals       -0.023088
Return          1.535691
LogAG           0.074002
DY              0.015682
Return_13_36    0.132834
LogIssues_1y    0.044893
Turnover        0.077575
Debtprice       0.524327
Salesprice      2.795593
dtype: float64


In [114]:
print("model_1", "\n", np.std(charac_1))
print("model_2", "\n", np.std(charac_2))
print("model_3", "\n", np.std(charac_3))

model_1 
 datadate       1037 days 08:26:24.118781936
LogSize                            1.134047
Return_2_12                        0.273968
LogBM                              0.092834
Return                             7.468952
dtype: object
model_2 
 datadate       1033 days 23:06:24.178351888
LogSize                            1.178848
Return_2_12                         0.29285
LogIssues                          0.106085
LogBM                              0.090647
Accruals                            0.09559
Return                             7.778895
LogAG                              0.184295
dtype: object
model_3 
 datadate        1034 days 08:34:29.560946208
LogSize                             1.169265
Return_2_12                         0.317915
LogIssues                           0.103556
LogBM                               0.089937
Accruals                            0.096644
Return                              7.795223
LogAG                               0.188044
DY        