# Pandas - Finance Data Reader

### Contents

In [3]:
from IPython.core.display import HTML
css = open('styles/style-table.css').read() + open('styles/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [4]:
# Usual stuff...
import numpy as np
import pandas as pd
from pandas.tseries.offsets import Day, MonthEnd
import pandas_datareader as pdr
import pandas_datareader.data as web
import seaborn as sns

import datetime
#pd.set_option('display.notebook_repr_html', False)

In [5]:
start = datetime.datetime(2007,01,01).date()
start

datetime.date(2007, 1, 1)

In [6]:
end = pd.datetime.now()  - 1* Day()
end = end.date()
end

datetime.date(2016, 6, 28)

### Caching

Needed so that we don't hit the server repeatedly and get IP banned.

In [7]:
import requests_cache
expire_after = datetime.timedelta(days=1)
session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

In [8]:
# quick test
vix = web.DataReader("VIXCLS", 'fred', session=session)
vix.dropna()
vix.tail()

Unnamed: 0_level_0,VIXCLS
DATE,Unnamed: 1_level_1
2016-06-21,18.48
2016-06-22,21.17
2016-06-23,17.25
2016-06-24,25.76
2016-06-27,23.85


In [7]:
spy = web.DataReader("SPY", 'yahoo', start, end, session=session)
spy.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-03,142.25,142.860001,140.570007,141.369995,94807600,116.069465
2007-01-04,141.229996,142.050003,140.610001,141.669998,69620600,116.315778
2007-01-05,141.330002,141.399994,140.380005,140.539993,76645300,115.388006
2007-01-08,140.820007,141.410004,140.25,141.190002,71655000,115.921685
2007-01-09,141.309998,141.600006,140.399994,141.070007,75680100,115.823165


### Yahoo Finance

#### Get stock data from Yahoo

In [8]:
aapl = web.DataReader("AAPL", 'yahoo', start, end, session=session)
aapl.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-03,86.289999,86.579999,81.899999,83.800002,309579900,11.01952
2007-01-04,84.050001,85.949998,83.820003,85.659998,211815100,11.264106
2007-01-05,85.77,86.199997,84.400002,85.049997,208685400,11.183892
2007-01-08,85.959998,86.529998,85.280003,85.47,199276700,11.239121
2007-01-09,86.450003,92.979999,85.15,92.570003,837324600,12.172756


#### Get All company actions from Yahoo

In [9]:
actions = web.DataReader("AAPL", "yahoo-actions", start, end, session=session)
actions.head(5)

Unnamed: 0,action,value
2016-05-05,DIVIDEND,0.57
2016-02-04,DIVIDEND,0.52
2015-11-05,DIVIDEND,0.52
2015-08-06,DIVIDEND,0.52
2015-05-07,DIVIDEND,0.52


#### Get historical dividends from Yahoo...

In [10]:
aapl_dividends = actions[actions.action == 'DIVIDEND']
aapl_dividends.head()

Unnamed: 0,action,value
2016-05-05,DIVIDEND,0.57
2016-02-04,DIVIDEND,0.52
2015-11-05,DIVIDEND,0.52
2015-08-06,DIVIDEND,0.52
2015-05-07,DIVIDEND,0.52


In [11]:
# Get Weekly Series
ohlc_dict = {                                                                                                             
    'Open':'first',
    'High':'max',
    'Low':'min',
    'Close': 'last',
    'Volume': 'sum',
    'Adj Close': 'last'
}

aapl_weekly = aapl.resample('W-FRI', closed='left').apply(ohlc_dict)
aapl_weekly = aapl_weekly[aapl_weekly.columns[::-1]]
aapl_weekly.tail()

Unnamed: 0_level_0,Open,Close,Low,High,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-06-03,99.440002,97.720001,96.629997,100.470001,97.720001,147431800
2016-06-10,97.790001,99.650002,97.449997,101.889999,99.650002,120903800
2016-06-17,98.529999,97.550003,96.07,99.349998,97.550003,161435900
2016-06-24,96.620003,96.099998,94.68,96.889999,96.099998,190601400
2016-07-01,92.910004,93.589996,91.5,94.660004,93.589996,157695100


#### Get Monthly Series

In [12]:
aapl_monthly = aapl.resample('BM', closed='left').apply(ohlc_dict)
aapl_monthly = aapl_monthly[aapl_monthly.columns[::-1]]
aapl_monthly.tail()

Unnamed: 0_level_0,Open,Close,Low,High,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-02-29,94.790001,96.910004,92.589996,98.889999,96.323543,840062800
2016-03-31,96.860001,109.559998,96.650002,110.419998,108.896984,755377500
2016-04-29,109.720001,94.830002,94.25,112.389999,94.256129,829740600
2016-05-31,93.989998,100.349998,89.470001,100.730003,100.349998,924290200
2016-06-30,99.599998,93.589996,91.5,101.889999,93.589996,741838500


#### On how to get data for multiple symbols... Example: SPDR Sectors & SPY

In [13]:
# Get data for S&P500 Sectors
sector_list = ['XLY','XLP','XLE','XLF','XLV','XLI', 'XLB', 'XLK', 'XLU' ]
lookback_years = 3 # in years

# Set up End and Start times for data grab
end = datetime.datetime.now()
start = datetime.datetime(end.year - lookback_years, end.month, end.day)

#For loop for grabing yahoo finance data and setting as a dataframe
for symbol in sector_list:   
    # Set DataFrame as the Stock Ticker
    globals()[symbol] = web.DataReader(symbol,'yahoo',start,end, session=session)
    
SPY = web.DataReader("SPY", 'yahoo', start, end)

In [14]:
SPY.describe().round(2)

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close
count,755.0,755.0,755.0,755.0,755.0,755.0
mean,195.28,196.19,194.31,195.33,116197800.0,189.24
std,13.6,13.65,13.55,13.59,49960660.0,15.66
min,160.48,161.77,160.22,161.21,37317800.0,151.76
25%,186.23,187.24,184.94,186.29,82656500.0,178.08
50%,198.43,199.26,197.52,198.47,103968400.0,192.22
75%,207.07,208.16,205.85,207.11,135994700.0,203.29
max,213.24,213.78,212.91,213.5,507244300.0,211.27


In [15]:
XLV.describe().round(2)

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close
count,755.0,755.0,755.0,755.0,755.0,755.0
mean,64.66,65.04,64.22,64.66,9780538.41,63.24
std,7.92,7.99,7.84,7.91,5925749.28,8.35
min,47.46,47.73,47.18,47.57,2366700.0,45.51
25%,58.26,58.54,57.87,58.18,5878600.0,56.3
50%,67.18,67.74,66.53,67.22,8325100.0,66.26
75%,71.01,71.53,70.73,71.1,11749700.0,70.34
max,77.2,77.4,76.99,77.22,66470200.0,76.04


### Google Finance

In [16]:
aapl = web.DataReader("AAPL", 'google', start, end, session=session)
aapl.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-06-23,95.94,96.29,95.25,96.1,32240187
2016-06-24,92.91,94.66,92.65,93.4,75311356
2016-06-27,93.0,93.05,91.5,92.04,46622188
2016-06-28,92.9,93.66,92.14,93.59,40444914
2016-06-29,93.97,94.55,93.63,94.4,36427827


### St Louis FRED

#### Get 30 yr mortgage rates

In [13]:
import pandas_datareader.fred as fred
mortgage_rate= fred.FredReader("MORTGAGE30US", start, end, session=session).read()
mortgage_rate.columns=["30 Yr Mortgae Rates"]
mortgage_rate
mortgage_rate.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x117b67e50>

#### Get GDP numbers

In [18]:
gdp = fred.FredReader("GDP", start, end, session=session).read()
gdp.tail()

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2015-01-01,17649.3
2015-04-01,17913.7
2015-07-01,18060.2
2015-10-01,18164.8
2016-01-01,18230.1


#### Get Inflation numbers via alertnate approach

In [19]:
gdp = web.DataReader("GDP", "fred", start, end, session=session)
inflation = web.DataReader(["CPIAUCSL", "CPILFESL"], "fred", start, end, session=session)

In [20]:
inflation.tail()

Unnamed: 0_level_0,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-01,238.107,245.232
2016-02-01,237.707,245.925
2016-03-01,237.92,246.095
2016-04-01,238.89,246.574
2016-05-01,239.41,247.074


In [21]:
gdp.tail()

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2015-01-01,17649.3
2015-04-01,17913.7
2015-07-01,18060.2
2015-10-01,18164.8
2016-01-01,18230.1


### Fama/French

#### Get Available Datasets

In [22]:
import pandas_datareader.famafrench as famafrench
datasets = famafrench.get_available_datasets()
print("Total Datasets:{}".format(len(datasets)))
datasets[:5]

Total Datasets:196


['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily']

#### Get Industry Portfolios

In [23]:
ind_folios = web.DataReader("5_Industry_Portfolios", "famafrench", session=session)

In [24]:
#ind_folios.keys()
print(ind_folios.get("DESCR"))

5 Industry Portfolios
---------------------

This file was created by CMPT_IND_RETS using the 201512 CRSP database. It contains value- and equal-weighted returns for 5 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2015 Kenneth R. French

  0 : Average Value Weighted Returns -- Monthly (72 rows x 5 cols)
  1 : Average Equal Weighted Returns -- Monthly (72 rows x 5 cols)
  2 : Average Value Weighted Returns -- Annual (6 rows x 5 cols)
  3 : Average Equal Weighted Returns -- Annual (6 rows x 5 cols)
  4 : Number of Firms in Portfolios (72 rows x 5 cols)
  5 : Average Firm Size (72 rows x 5 cols)
  6 : Sum of BE / Sum of ME (6 rows x 5 cols)
  7 : Value-Weighted Average of BE/ME (6 rows x 5 cols)


### World Bank

In [25]:
import pandas_datareader.wb as wb

In [26]:
wb.get_indicators(session=session).head()

Unnamed: 0,id,name,source,sourceNote,sourceOrganization,topics
0,1.0.HCount.1.90usd,Poverty Headcount ($1.90 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...,Poverty
1,1.0.HCount.2.5usd,Poverty Headcount ($2.50 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...,Poverty
2,1.0.HCount.Mid10to50,Middle Class ($10-50 a day) Headcount,LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...,Poverty
3,1.0.HCount.Ofcl,Official Moderate Poverty Rate-National,LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of data from Nation...,Poverty
4,1.0.HCount.Poor4uds,Poverty Headcount ($4 a day),LAC Equity Lab,The poverty headcount index measures the propo...,LAC Equity Lab tabulations of SEDLAC (CEDLAS a...,Poverty


In [27]:
# Get list of all indicators maintained by World Bank
wb.get_indicators(session=session).topics.unique()

array([u'Poverty ', '', u'Education ', u'Social Protection & Labor',
       u'Economy & Growth', u'Agriculture & Rural Development  ',
       u'Agriculture & Rural Development   ; Climate Change',
       u'Agriculture & Rural Development   ; Climate Change ; Environment ',
       u'Climate Change ; Urban Development  ; Environment ',
       u'Climate Change ; Environment ',
       u'Agriculture & Rural Development   ; Environment ',
       u'Urban Development  ; Environment ',
       u'Economy & Growth ; Private Sector ; Trade',
       u'Economy & Growth ; Trade',
       u'Economy & Growth ; Science & Technology ',
       u'Economy & Growth ; External Debt ; Trade',
       u'Economy & Growth ; Financial Sector ',
       u'Economy & Growth ; External Debt',
       u'Economy & Growth ; External Debt ; Aid Effectiveness ',
       u'Economy & Growth ; Infrastructure  ; Trade',
       u'Economy & Growth ; Financial Sector  ; External Debt',
       u'Economy & Growth ; Financial Sector  ; Cl

In [28]:
# Find datasets by patterns
wb.search('gdp.*capita.*const').iloc[:,:2]

Unnamed: 0,id,name
685,6.0.GDPpc_constant,"GDP per capita, PPP (constant 2011 internation..."
7452,NY.GDP.PCAP.KD,GDP per capita (constant 2005 US$)
7454,NY.GDP.PCAP.KN,GDP per capita (constant LCU)
7456,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2011 internation..."


In [29]:
# Get GDP of US, Canada & Mexico
dat = wb.download(indicator='NY.GDP.PCAP.KD', country=['US', 'CA', 'MX'], start=2005, end=2008, session=session)
dat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,NY.GDP.PCAP.KD
country,year,Unnamed: 2_level_1
Canada,2008,37086.898159
Canada,2007,37054.878287
Canada,2006,36679.368924
Canada,2005,36028.23249
Mexico,2008,8275.465345


In [30]:
# Group GDP of the countries
dat.groupby(level=0).mean()

Unnamed: 0_level_0,NY.GDP.PCAP.KD
country,Unnamed: 1_level_1
Canada,36712.344465
Mexico,8156.805312
United States,44910.605024


### Quandl

In [31]:
import quandl
gdpq = quandl.get("FRED/GDP", start_date = start, end_date=end, session=session)
aaplq = quandl.get("WIKI/AAPL", start_date = start, end_date=end, session=session)

In [32]:
# See if gdp data from FRED is same as gdp data from Quandl
print("GDP(FRED):\n{}, \n GDP(Quandl):\n{}".format(gdp.index, gdpq.index))
is_same = (gdpq.iloc[:,0] == gdp.iloc[:,0]).all()
print("Same data: {}".format(is_same))

GDP(FRED):
DatetimeIndex(['2013-07-01', '2013-10-01', '2014-01-01', '2014-04-01',
               '2014-07-01', '2014-10-01', '2015-01-01', '2015-04-01',
               '2015-07-01', '2015-10-01', '2016-01-01'],
              dtype='datetime64[ns]', name=u'DATE', freq=None), 
 GDP(Quandl):
DatetimeIndex(['2013-07-01', '2013-10-01', '2014-01-01', '2014-04-01',
               '2014-07-01', '2014-10-01', '2015-01-01', '2015-04-01',
               '2015-07-01', '2015-10-01', '2016-01-01'],
              dtype='datetime64[ns]', name=u'DATE', freq=None)
Same data: True


In [33]:
print("AAPL(Quandl):",aaplq.columns)
print("AAPL(Yahoo):", aapl.columns)

('AAPL(Quandl):', Index([u'Open', u'High', u'Low', u'Close', u'Volume', u'Ex-Dividend',
       u'Split Ratio', u'Adj. Open', u'Adj. High', u'Adj. Low', u'Adj. Close',
       u'Adj. Volume'],
      dtype='object'))
('AAPL(Yahoo):', Index([u'Open', u'High', u'Low', u'Close', u'Volume'], dtype='object'))


In [34]:
# See if aapl data from YAHOO is same as aapl data from Quandl
df1 = aaplq.iloc[:,0:4].tail() # pick last four columns.
df2 = aapl.iloc[:,0:4].tail()

df1.name = "Quandl"
df2.name = "Yahoo"
print(df1)
print(df2)

is_same = (aaplq.iloc[:,0:4] == aapl.iloc[:,0:4]).any()
print("Same data: {}".format(is_same))

             Open    High    Low  Close
Date                                   
2016-06-23  95.94  96.290  95.25  96.10
2016-06-24  92.91  94.655  92.65  93.40
2016-06-27  93.00  93.050  91.50  92.08
2016-06-28  92.90  93.660  92.14  93.59
2016-06-29  93.97  94.550  93.63  94.40
             Open   High    Low  Close
Date                                  
2016-06-23  95.94  96.29  95.25  96.10
2016-06-24  92.91  94.66  92.65  93.40
2016-06-27  93.00  93.05  91.50  92.04
2016-06-28  92.90  93.66  92.14  93.59
2016-06-29  93.97  94.55  93.63  94.40
Same data: Open     True
High     True
Low      True
Close    True
dtype: bool


In [35]:
?web.DataReader