In [1]:
##########################################
# Pricing/Mispricing Paper Table 2       #
# Andrew Lou                             #
# Date: June 28 2022                     #
##########################################

# package imports
from tkinter import Y
import pandas as pd
import numpy as np
import datetime as dt
import wrds
import matplotlib.pyplot as plt
from pandas.tseries.offsets import *
from scipy import stats

# connect to WRDS
conn = wrds.Connection()

crsp_m = conn.raw_sql("""
                      select a.permno, a.permco, a.date, b.shrcd, b.exchcd,
                      a.ret, a.retx, a.shrout, a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date >='01/01/1968'
                      and b.exchcd between 1 and 3
                      """, date_cols=['date'])

Enter your WRDS username [andrewlou]: alou6683
Enter your password: ············


WRDS recommends setting up a .pgpass file.


Create .pgpass file now [y/n]?:  y


Created .pgpass file successfully.
Loading library list...
Done


In [2]:
# change variable format to int
crsp_m[['permco','permno','shrcd','exchcd']]=crsp_m[['permco','permno','shrcd','exchcd']].astype(int)

# Line up date to be end of month
crsp_m['jdate']=crsp_m['date']+MonthEnd(0)

# add delisting return
dlret = conn.raw_sql("""
                     select permno, dlret, dlstdt 
                     from crsp.msedelist
                     """, date_cols=['dlstdt'])

In [3]:
dlret.permno=dlret.permno.astype(int)
#dlret['dlstdt']=pd.to_datetime(dlret['dlstdt'])
dlret['jdate']=dlret['dlstdt']+MonthEnd(0)

crsp = pd.merge(crsp_m, dlret, how='left',on=['permno','jdate'])
crsp['dlret']=crsp['dlret'].fillna(0)
crsp['ret']=crsp['ret'].fillna(0)

# retadj factors in the delisting returns
crsp['retadj']=(1+crsp['ret'])*(1+crsp['dlret'])-1

# calculate market equity
crsp['me']=crsp['prc'].abs()*crsp['shrout'] 
crsp=crsp.drop(['dlret','dlstdt','prc','shrout'], axis=1)
crsp=crsp.sort_values(by=['jdate','permco','me'])

In [4]:
crsp.head(5)

Unnamed: 0,permno,permco,date,shrcd,exchcd,ret,retx,jdate,retadj,me
945326,28820,6,1968-01-31,10,2,-0.144385,-0.144385,1968-01-31,-0.144385,57000.0
953346,29161,64,1968-01-31,10,2,0.55102,0.55102,1968-01-31,0.55102,20900.0
572378,17670,74,1968-01-31,10,1,-0.017857,-0.017857,1968-01-31,-0.017857,34375.0
1268572,41515,80,1968-01-31,11,1,0.119266,0.119266,1968-01-31,0.119266,177876.0
620508,18702,267,1968-01-31,10,1,0.142857,0.142857,1968-01-31,0.142857,48992.0


In [5]:
### Aggregate Market Cap ###
# sum of me across different permno belonging to same permco a given date
crsp_summe = crsp.groupby(['jdate','permco'])['me'].sum().reset_index()

# largest mktcap within a permco/date
crsp_maxme = crsp.groupby(['jdate','permco'])['me'].max().reset_index()

# join by jdate/maxme to find the permno
crsp1=pd.merge(crsp, crsp_maxme, how='inner', on=['jdate','permco','me'])

# drop me column and replace with the sum me
crsp1=crsp1.drop(['me'], axis=1)

# join with sum of me to get the correct market cap info
crsp2=pd.merge(crsp1, crsp_summe, how='inner', on=['jdate','permco'])

# sort by permno and date and also drop duplicates
crsp2=crsp2.sort_values(by=['permno','jdate']).drop_duplicates()

crsp2['year']=crsp2['jdate'].dt.year
crsp2['month']=crsp2['jdate'].dt.month

crsp2.head(80)

Unnamed: 0,permno,permco,date,shrcd,exchcd,ret,retx,jdate,retadj,me,year,month
989454,10000,7952,1986-01-31,10,3,0.000000,,1986-01-31,0.000000,16100.000000,1986,1
995656,10000,7952,1986-02-28,10,3,-0.257143,-0.257143,1986-02-28,-0.257143,11960.000000,1986,2
1001867,10000,7952,1986-03-31,10,3,0.365385,0.365385,1986-03-31,0.365385,16330.000000,1986,3
1008088,10000,7952,1986-04-30,10,3,-0.098592,-0.098592,1986-04-30,-0.098592,15172.000000,1986,4
1014319,10000,7952,1986-05-30,10,3,-0.222656,-0.222656,1986-05-31,-0.222656,11793.859375,1986,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1378307,10001,7953,1990-11-30,11,3,0.000000,0.000000,1990-11-30,0.000000,10048.500000,1990,11
1384881,10001,7953,1990-12-31,11,3,0.001299,-0.012987,1990-12-31,0.001299,10013.000000,1990,12
1391439,10001,7953,1991-01-31,11,3,0.013158,0.013158,1991-01-31,0.013158,10144.750000,1991,1
1397973,10001,7953,1991-02-28,11,3,0.012987,0.012987,1991-02-28,0.012987,10276.500000,1991,2


In [6]:
anomaly = pd.read_parquet("anomaly.gzip")

In [10]:
crsp2.size()

TypeError: 'numpy.int64' object is not callable