# Using Jupyter to Query the WRDS Database

In [30]:
## This notebook provides an introduction to installing and importing WRDS and some sample queries to explore available data sets.
## Register for an account at https://wrds-www.wharton.upenn.edu/

In [31]:
## The following installs and imports WRDS 

In [None]:
!pip install wrds

In [33]:
import wrds

In [34]:
## The following defines the function of your WRDS connection.You can replace "fx" with any text. 
## Login with your WRDS username and password when prompted. Creating a .pgpass file is recommended.

In [None]:
fx = wrds.Connection()

In [36]:
## This imports a few commonly used python libraries.

In [37]:
import numpy as np
import pandas as pd
import datetime as dt
import psycopg2 
import matplotlib.pyplot as plt

In [38]:
## The following lists all WRDS data sets or "libraries" we subscribe to. These can also be explored using the web interface.

In [None]:
fx.list_libraries()

In [40]:
## The following lists the tables in the djones (Historical Dow Jones Indexes) library. In this library there are only two tables.

In [None]:
fx.list_tables(library='djones')

In [None]:
fx.describe_table(library='djones', table='djmonthly')
## This describe the "monthly" table. For detailed descriptions of these variables see: 
## https://wrds-www.wharton.upenn.edu/pages/get-data/dow-jones-averages-total-return-indexes/dow-jones-monthly/

In [None]:
## This retrieves all Dow Jones monthly index variables starting from 2000-01-01 using a raw_sql command.
djm_data = fx.raw_sql("select * FROM djones.djmonthly WHERE date>'2000-01-01'")
djm_data

In [None]:
## This retrieves the first two years of the Dow Jones Monthly Industial average closing values (dji). 
dow_jones_index = fx.raw_sql('SELECT date, dji FROM djones.djmonthly LIMIT 24')
dow_jones_index

In [None]:
## This plot shows the history of the Dow Jones Industrials from 1930 to 1953.  
## Note that the data from this library was discontinued in 2008 
datadaily = fx.raw_sql("select date,dji from djones.djdaily WHERE date BETWEEN '1930-01-01' AND '1953-01-01'", date_cols=['date'], index_col=['date'])
plt.plot(datadaily),
plt.xlabel('Date')
plt.ylabel('Dow Jones Industrial Average')

In [None]:
##This get_table command retrieves the first 10 observations from the Compustat Library (comp) company table.
company1 = fx.get_table(library='comp', table='company',obs=10)
company1

In [None]:
## Same Query but narrows down to specific columns. The gvkey is a unique identifier used by Compustat.
company_narrow = fx.get_table(library='comp', table='company', columns = ['conm', 'gvkey', 'busdesc', 'state', 'sic', 'cik'], 
    obs=5)
company_narrow

In [None]:
##The following lists the tables in the boardex_na (Boardex North America) library. Boardex has biographical data for executives and board members.
fx.list_tables(library='boardex_na')

In [None]:
## This gives the first 10 observations from the Boardex Library education association table.
Boards = fx.get_table(library='boardex_na', table='na_board_education_assoc',obs=8)
Boards

In [None]:
## This is an advanced join command from two compustat libraries: fundamentals and monthly securities prices. Joining data sets is your economics data superpower  
fx.raw_sql("""
    SELECT a.gvkey, a.datadate, a.tic, a.conm, a.at, a.aco, a.capx, b.prccm, b.cshoq, b.trfm
    FROM comp.funda a
    JOIN comp.secm b ON a.gvkey = b.gvkey AND a.datadate = b.datadate
    WHERE a.tic = 'AAPL' AND a.datafmt = 'STD' AND a.consol = 'C' AND a.indfmt = 'INDL'
""")

In [51]:
## This notebook was derived in part from examples on the WRDS web site. For more info see:
## https://wrds-www.wharton.upenn.edu/pages/classroom/using-jupyter-at-wrds-notebook/
## https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-python/querying-wrds-data-python/