In [1]:
##########################################
# Size Portfolio for CRSP Securitie      #
##########################################

import pandas as pd
import numpy as np
import datetime as dt
import wrds
from dateutil.relativedelta import *

###################
# Connect to WRDS #
###################
conn = wrds.Connection(wrds_username='arossi');

################################################
# Get CRSP Monthly Stocks for Decile Formation #
################################################
msf = conn.raw_sql("""
                      select a.permno, a.date, 
                      a.ret, a.shrout, a.prc 
                      from crsp.msf as a
                      where a.date >= '12/01/1999'
                      """, date_cols=['date'])

# keep only records with non missing ret prc and shrout value
msf = msf[(msf['prc'].notna()) & (msf['ret'].notna()) & (msf['shrout'].notna())]

msf['permno'] = msf['permno'].astype(int)
msf['size'] = msf['shrout'] * msf['prc'].abs()
msf['year'] = msf['date'].dt.year
msf['month'] = msf['date'].dt.month

# create msf_dec
msf_dec = msf[msf['month']==12][['date','permno','year','size']]

# create msf_ls
msf_ls = msf.sort_values(['permno', 'date'])
msf_ls['year_prev'] = msf_ls['year']-1
msf_ls['size_lag'] = msf_ls.groupby('permno')['size'].shift(1)
msf_ls['size_lag'] = np.where(msf_ls['size_lag'].isna(), msf_ls['size']/(1+msf_ls['ret']), msf_ls['size_lag'])

#################################
# Compute Deciles for Each DEC  #
#################################
msf_dec = msf_dec.sort_values(['year'])
msf_dec['decile']=1+msf_dec.groupby('year')['size'].transform(lambda x: pd.qcut(x, 10, labels=False))

###################################
# Assign Size Group to All Months #
###################################
msf_groups = pd.merge(msf_ls[['permno','date','ret','size_lag','year_prev']], \
                      msf_dec[['permno','year','decile']], how='left', \
                      left_on=['permno','year_prev'], right_on=['permno','year'])

msf_groups=msf_groups[msf_groups['decile'].notna()]

#################################
# Compute Size Weighted Returns #
#################################
msf_groups = msf_groups.sort_values(['decile', 'date'])

# function to calculate value weighted return
def wavg(group, avg_name, weight_name):
    d = group[avg_name]
    w = group[weight_name]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return np.nan

# value-weigthed return
vwrets=msf_groups.groupby(['decile','date']).apply(wavg, 'ret','size_lag').to_frame().reset_index().rename(columns={0: 'vwret'})

################################## 
# Compare Results with CRSP MSIX #
##################################
msix = conn.raw_sql("""
                      select caldt, decret1, decret2, decret3, decret4, decret5,
                      decret6, decret7, decret8, decret9, decret10
                      from crsp.msix where caldt >= '12/01/1999'
                      """, date_cols=['caldt']) 

# transpose msix data
msix1=pd.melt(msix, id_vars='caldt', \
              value_vars=['decret1','decret2', 'decret3', 'decret4', 'decret5', 'decret6', 'decret7', 'decret8','decret9','decret10'])

# extract decile information from decret
msix1['decile'] = msix1['variable'].str[6:].astype(int)
# rename return column
msix1 = msix1.rename(columns={'value':'decret', 'caldt':'date'})
msix1 = msix1.drop(['variable'], axis=1)

decile_returns = pd.merge(vwrets, msix1, how='left', on=['date','decile'])


print(decile_returns)

conn.close();
###################
# End of Program  #
###################

Loading library list...
Done
      decile       date     vwret    decret
0        1.0 2000-01-31  0.222626  0.207740
1        1.0 2000-02-29  0.246672  0.240418
2        1.0 2000-03-31 -0.024419 -0.027778
3        1.0 2000-04-28 -0.175907 -0.170390
4        1.0 2000-05-31 -0.093232 -0.087108
5        1.0 2000-06-30  0.051372  0.056428
6        1.0 2000-07-31 -0.031004 -0.023833
7        1.0 2000-08-31  0.054793  0.068825
8        1.0 2000-09-29 -0.041949 -0.030076
9        1.0 2000-10-31 -0.104683 -0.106276
10       1.0 2000-11-30 -0.127776 -0.122512
11       1.0 2000-12-29 -0.077421 -0.072644
12       1.0 2001-01-31  0.535034  0.542924
13       1.0 2001-02-28 -0.130583 -0.134064
14       1.0 2001-03-30 -0.115912 -0.112741
15       1.0 2001-04-30  0.037949  0.042498
16       1.0 2001-05-31  0.089661  0.080882
17       1.0 2001-06-29 -0.012690 -0.005509
18       1.0 2001-07-31 -0.026824 -0.020549
19       1.0 2001-08-31 -0.022282 -0.018399
20       1.0 2001-09-28 -0.118317 -0.118349
21 