In [4]:
##########################################
# Pricing/Mispricing Paper Table 2       #
# Andrew Lou                             #
# Date: June 28 2022                     #
##########################################

# package imports
from tkinter import Y
import pandas as pd
import numpy as np
import datetime as dt
import wrds
import matplotlib.pyplot as plt
from pandas.tseries.offsets import *
from scipy import stats

# connect to WRDS
conn = wrds.Connection()

crsp_m = conn.raw_sql("""
                      select a.permno, a.permco, a.date, b.shrcd, b.exchcd,
                      a.ret, a.retx, a.shrout, a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date >='01/01/1968'
                      and b.exchcd between 1 and 3
                      """, date_cols=['date'])

Enter your WRDS username [andrewlou]:alou6683
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: y
Created .pgpass file successfully.
Loading library list...
Done


In [5]:
# change variable format to int
crsp_m[['permco','permno','shrcd','exchcd']]=crsp_m[['permco','permno','shrcd','exchcd']].astype(int)

# Line up date to be end of month
crsp_m['jdate']=crsp_m['date']+MonthEnd(0)

# add delisting return
dlret = conn.raw_sql("""
                     select permno, dlret, dlstdt 
                     from crsp.msedelist
                     """, date_cols=['dlstdt'])

In [6]:
dlret.permno=dlret.permno.astype(int)
#dlret['dlstdt']=pd.to_datetime(dlret['dlstdt'])
dlret['jdate']=dlret['dlstdt']+MonthEnd(0)

crsp = pd.merge(crsp_m, dlret, how='left',on=['permno','jdate'])
crsp['dlret']=crsp['dlret'].fillna(0)
crsp['ret']=crsp['ret'].fillna(0)

# retadj factors in the delisting returns
crsp['retadj']=(1+crsp['ret'])*(1+crsp['dlret'])-1

# calculate market equity
crsp['me']=crsp['prc'].abs()*crsp['shrout'] 
crsp=crsp.drop(['dlret','dlstdt','prc','shrout'], axis=1)
crsp=crsp.sort_values(by=['jdate','permco','me'])

In [7]:
crsp.head(5)

Unnamed: 0,permno,permco,date,shrcd,exchcd,ret,retx,jdate,retadj,me
945326,28820,6,1968-01-31,10,2,-0.144385,-0.144385,1968-01-31,-0.144385,57000.0
953346,29161,64,1968-01-31,10,2,0.55102,0.55102,1968-01-31,0.55102,20900.0
572378,17670,74,1968-01-31,10,1,-0.017857,-0.017857,1968-01-31,-0.017857,34375.0
1268572,41515,80,1968-01-31,11,1,0.119266,0.119266,1968-01-31,0.119266,177876.0
620508,18702,267,1968-01-31,10,1,0.142857,0.142857,1968-01-31,0.142857,48992.0


In [8]:
### Aggregate Market Cap ###
# sum of me across different permno belonging to same permco a given date
crsp_summe = crsp.groupby(['jdate','permco'])['me'].sum().reset_index()

# largest mktcap within a permco/date
crsp_maxme = crsp.groupby(['jdate','permco'])['me'].max().reset_index()

# join by jdate/maxme to find the permno
crsp1=pd.merge(crsp, crsp_maxme, how='inner', on=['jdate','permco','me'])

# drop me column and replace with the sum me
crsp1=crsp1.drop(['me'], axis=1)

# join with sum of me to get the correct market cap info
crsp2=pd.merge(crsp1, crsp_summe, how='inner', on=['jdate','permco'])

# sort by permno and date and also drop duplicates
crsp2=crsp2.sort_values(by=['permno','jdate']).drop_duplicates()

crsp2['year']=crsp2['jdate'].dt.year
crsp2['month']=crsp2['jdate'].dt.month

crsp2.head(80)

In [20]:
### Ten year later date
crsp2['ffdate']=crsp2['jdate']+MonthEnd(-120)
crsp2['ffyear']=crsp2['ffdate'].dt.year
crsp2['ffmonth']=crsp2['ffdate'].dt.month
crsp2['1+retx']=1+crsp2['retx']
crsp2=crsp2.sort_values(by=['permno','date'])

# cumret by stock
crsp2['cumretx']=crsp2.groupby(['permno','ffyear'])['1+retx'].cumprod()

# lag cumret
crsp2['lcumretx']=crsp2.groupby(['permno'])['cumretx'].shift(120)

# lag market cap
crsp2['lme']=crsp2.groupby(['permno'])['me'].shift(120)

# if first permno then use me/(1+retx) to replace the missing value
crsp2['count']=crsp2.groupby(['permno']).cumcount()
crsp2['lme']=np.where(crsp2['count']==0, crsp2['me']/crsp2['1+retx'], crsp2['lme'])

In [23]:
crsp2['wt']=np.where(crsp2['ffmonth']==1, crsp2['lme'], crsp2['me']*crsp2['lcumretx'])
crsp2=crsp2[['permno','jdate','shrcd','exchcd','retadj','me','wt','cumretx','lme']]
crsp2=crsp2.sort_values(by=['permno','jdate']).drop_duplicates()

In [26]:
crsp2.head(10000)

Unnamed: 0,permno,jdate,shrcd,exchcd,retadj,me,wt,cumretx,lme
989454,10000,1986-01-31,10,3,0.000000,16100.000000,,,
995656,10000,1986-02-28,10,3,-0.257143,11960.000000,,0.742857,
1001867,10000,1986-03-31,10,3,0.365385,16330.000000,,1.014286,
1008088,10000,1986-04-30,10,3,-0.098592,15172.000000,,0.914286,
1014319,10000,1986-05-31,10,3,-0.222656,11793.859375,,0.710714,
...,...,...,...,...,...,...,...,...,...
843969,10074,1984-02-29,11,3,0.093023,5146.500000,,1.205128,
850167,10074,1984-03-31,11,3,0.051064,5365.500000,,1.256410,
856404,10074,1984-04-30,11,3,0.000000,5365.500000,,1.256410,
862660,10074,1984-05-31,11,3,-0.020408,5256.000000,,1.230769,
