# Using Python on WRDS Platform

## Install WRDS

In [None]:
%pip install -wrds
import wrds

## Setup WRDS connection

Now that we've installed the `wrds` module, the next step is to set up a `pgpass` file on your workstation. The `pgpass` file includes your WRDS username and password so that you do not need to enter them each time you wish to connect to WRDS within Python. With the `wrds` module, creating this file is easy!

First, start python on your workstation. Then following these steps (`wrds_username` is your own WRDS username, the same as your login to the WRDS website) and you will be prompted for your WRDS username and password on your first login:

In [None]:
db = wrds.Connection(wrds_username='valexeev')
db.create_pgpass_file()

You should be able to connect from then on without needing to do so. Test this by disconnecting and reconnecting, using the following:

In [None]:
db.close()
db = wrds.Connection(wrds_username='valexeev')

In [10]:
db.close() # close the connection

# Applications

## Historical S&P 500 Index Constituents

Load required libraries and establish the connection:

In [11]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from scipy import stats


import wrds
db = wrds.Connection(wrds_username='valexeev')

Loading library list...
Done


In [12]:
comp = db.raw_sql("""
                    select gvkey, datadate, at, pstkl, txditc,
                    pstkrv, seq, pstk
                    from comp.funda
                    where indfmt='INDL' 
                    and datafmt='STD'
                    and popsrc='D'
                    and consol='C'
                    and datadate >= '01/01/1959'
                    """, date_cols=['datadate'])

In [13]:
comp

Unnamed: 0,gvkey,datadate,at,pstkl,txditc,pstkrv,seq,pstk
0,001000,1961-12-31,,0.0,0.000,,,
1,001000,1962-12-31,,0.0,,,,0.0
2,001000,1963-12-31,,0.0,0.008,0.0,0.553,0.0
3,001000,1964-12-31,1.416,0.0,0.020,0.0,0.607,0.0
4,001000,1965-12-31,2.310,0.0,0.000,0.0,0.491,0.0
...,...,...,...,...,...,...,...,...
46670,351590,2019-12-31,62131.888,0.0,106.607,0.0,11054.622,0.0
46671,351590,2020-12-31,60256.041,0.0,119.333,0.0,9914.300,0.0
46672,351590,2021-12-31,62325.449,0.0,77.338,0.0,18106.225,0.0
46673,353444,2021-12-31,46650.000,0.0,4546.000,0.0,35687.000,0.0


In [15]:
comp['year']=comp['datadate'].dt.year
comp

Unnamed: 0,gvkey,datadate,at,pstkl,txditc,pstkrv,seq,pstk,year
0,001000,1961-12-31,,0.0,0.000,,,,1961
1,001000,1962-12-31,,0.0,,,,0.0,1962
2,001000,1963-12-31,,0.0,0.008,0.0,0.553,0.0,1963
3,001000,1964-12-31,1.416,0.0,0.020,0.0,0.607,0.0,1964
4,001000,1965-12-31,2.310,0.0,0.000,0.0,0.491,0.0,1965
...,...,...,...,...,...,...,...,...,...
46670,351590,2019-12-31,62131.888,0.0,106.607,0.0,11054.622,0.0,2019
46671,351590,2020-12-31,60256.041,0.0,119.333,0.0,9914.300,0.0,2020
46672,351590,2021-12-31,62325.449,0.0,77.338,0.0,18106.225,0.0,2021
46673,353444,2021-12-31,46650.000,0.0,4546.000,0.0,35687.000,0.0,2021


In [16]:
# create preferrerd stock
comp['ps']=np.where(comp['pstkrv'].isnull(), comp['pstkl'], comp['pstkrv'])
comp['ps']=np.where(comp['ps'].isnull(),comp['pstk'], comp['ps'])
comp['ps']=np.where(comp['ps'].isnull(),0,comp['ps'])
comp['txditc']=comp['txditc'].fillna(0)
comp

Unnamed: 0,gvkey,datadate,at,pstkl,txditc,pstkrv,seq,pstk,year,ps
0,001000,1961-12-31,,0.0,0.000,,,,1961,0.0
1,001000,1962-12-31,,0.0,0.000,,,0.0,1962,0.0
2,001000,1963-12-31,,0.0,0.008,0.0,0.553,0.0,1963,0.0
3,001000,1964-12-31,1.416,0.0,0.020,0.0,0.607,0.0,1964,0.0
4,001000,1965-12-31,2.310,0.0,0.000,0.0,0.491,0.0,1965,0.0
...,...,...,...,...,...,...,...,...,...,...
46670,351590,2019-12-31,62131.888,0.0,106.607,0.0,11054.622,0.0,2019,0.0
46671,351590,2020-12-31,60256.041,0.0,119.333,0.0,9914.300,0.0,2020,0.0
46672,351590,2021-12-31,62325.449,0.0,77.338,0.0,18106.225,0.0,2021,0.0
46673,353444,2021-12-31,46650.000,0.0,4546.000,0.0,35687.000,0.0,2021,0.0


In [17]:
# create book equity
comp['be']=comp['seq']+comp['txditc']-comp['ps']
comp['be']=np.where(comp['be']>0, comp['be'], np.nan)
comp

Unnamed: 0,gvkey,datadate,at,pstkl,txditc,pstkrv,seq,pstk,year,ps,be
0,001000,1961-12-31,,0.0,0.000,,,,1961,0.0,
1,001000,1962-12-31,,0.0,0.000,,,0.0,1962,0.0,
2,001000,1963-12-31,,0.0,0.008,0.0,0.553,0.0,1963,0.0,0.561
3,001000,1964-12-31,1.416,0.0,0.020,0.0,0.607,0.0,1964,0.0,0.627
4,001000,1965-12-31,2.310,0.0,0.000,0.0,0.491,0.0,1965,0.0,0.491
...,...,...,...,...,...,...,...,...,...,...,...
46670,351590,2019-12-31,62131.888,0.0,106.607,0.0,11054.622,0.0,2019,0.0,11161.229
46671,351590,2020-12-31,60256.041,0.0,119.333,0.0,9914.300,0.0,2020,0.0,10033.633
46672,351590,2021-12-31,62325.449,0.0,77.338,0.0,18106.225,0.0,2021,0.0,18183.563
46673,353444,2021-12-31,46650.000,0.0,4546.000,0.0,35687.000,0.0,2021,0.0,40233.000


In [18]:
# number of years in Compustat
comp=comp.sort_values(by=['gvkey','datadate'])
comp['count']=comp.groupby(['gvkey']).cumcount()

comp=comp[['gvkey','datadate','year','be','count']]
comp


Unnamed: 0,gvkey,datadate,year,be,count
0,001000,1961-12-31,1961,,0
1,001000,1962-12-31,1962,,1
2,001000,1963-12-31,1963,0.561,2
3,001000,1964-12-31,1964,0.627,3
4,001000,1965-12-31,1965,0.491,4
...,...,...,...,...,...
46670,351590,2019-12-31,2019,11161.229,0
46671,351590,2020-12-31,2020,10033.633,1
46672,351590,2021-12-31,2021,18183.563,2
46673,353444,2021-12-31,2021,40233.000,0


In [20]:
###################
# CRSP Block      #
###################
# sql similar to crspmerge macro
crsp_m = db.raw_sql("""
                      select a.permno, a.permco, a.date, b.shrcd, b.exchcd,
                      a.ret, a.retx, a.shrout, a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date between '01/01/1959' and '12/31/2017'
                      and b.exchcd between 1 and 3
                      """, date_cols=['date']) 

# change variable format to int
crsp_m[['permco','permno','shrcd','exchcd']]=crsp_m[['permco','permno','shrcd','exchcd']].astype(int)

# Line up date to be end of month
crsp_m['jdate']=crsp_m['date']+MonthEnd(0)
crsp_m


Unnamed: 0,permno,permco,date,shrcd,exchcd,ret,retx,shrout,prc,jdate
0,10000,7952,1986-01-31,10,3,,,3680.0,-4.375000,1986-01-31
1,10000,7952,1986-02-28,10,3,-0.257143,-0.257143,3680.0,-3.250000,1986-02-28
2,10000,7952,1986-03-31,10,3,0.365385,0.365385,3680.0,-4.437500,1986-03-31
3,10000,7952,1986-04-30,10,3,-0.098592,-0.098592,3793.0,-4.000000,1986-04-30
4,10000,7952,1986-05-30,10,3,-0.222656,-0.222656,3793.0,-3.109375,1986-05-31
...,...,...,...,...,...,...,...,...,...,...
338282,93436,53453,2017-08-31,11,3,0.100257,0.100257,166887.0,355.899994,2017-08-31
338283,93436,53453,2017-09-29,11,3,-0.041585,-0.041585,168017.0,341.100006,2017-09-30
338284,93436,53453,2017-10-31,11,3,-0.028056,-0.028056,168067.0,331.529999,2017-10-31
338285,93436,53453,2017-11-30,11,3,-0.068410,-0.068410,168067.0,308.850006,2017-11-30


https://wrds-www.wharton.upenn.edu/pages/wrds-research/applications/python-replications/fama-french-factors-python/

## Compare with FF

In [22]:
_ff = db.get_table(library='ff', table='factors_monthly')
_ff=_ff[['date','smb','hml']]
_ff['date']=_ff['date']+MonthEnd(0)
_ff

Unnamed: 0,date,smb,hml
0,1926-07-31,-0.0256,-0.0243
1,1926-08-31,-0.0117,0.0382
2,1926-09-30,-0.0140,0.0013
3,1926-10-31,-0.0009,0.0070
4,1926-11-30,-0.0010,-0.0051
...,...,...,...
1153,2022-08-31,0.0139,0.0031
1154,2022-09-30,-0.0082,0.0003
1155,2022-10-31,0.0010,0.0805
1156,2022-11-30,-0.0340,0.0139
