# Chapter 6. Accessing Data

In [1]:
import numpy as np
import pandas as pd

# pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

In [3]:
!head -n 5 msft.csv

Date,Open,High,Low,Close,Volume,Adj Close
2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
2016-02-22,52.279999,53.00,52.279999,52.650002,24854400,52.650002
2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999


In [4]:
msft = pd.read_csv('msft.csv')
msft.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
1,2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002
2,2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
3,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
4,2016-02-17,51.490002,52.77,51.450001,52.419998,39670300,52.419998


In [7]:
# specify a column as index
msft = pd.read_csv('msft.csv', index_col=0)
msft.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002
2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
2016-02-17,51.490002,52.77,51.450001,52.419998,39670300,52.419998


In [6]:
msft.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Volume         int64
Adj Close    float64
dtype: object

In [9]:
# specify column type
msft = pd.read_csv("msft.csv", 
                    dtype = { 'Volume' : np.float64})
msft.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Volume       float64
Adj Close    float64
dtype: object

In [12]:
# specify column names
df = pd.read_csv('msft.csv',
                   names = ['open', 'high', 'low',
                           'close', 'volume', 'adjclose'])
df.head()

Unnamed: 0,open,high,low,close,volume,adjclose
Date,Open,High,Low,Close,Volume,Adj Close
2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
2016-02-22,52.279999,53.00,52.279999,52.650002,24854400,52.650002
2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999


In [16]:
df2 = pd.read_csv("msft.csv",
                  usecols=['Date', 'Close'],
                  index_col=['Date'])
df2.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2016-02-23,51.18
2016-02-22,52.650002
2016-02-19,51.82
2016-02-18,52.189999
2016-02-17,52.419998


# Saving

In [17]:
df2.to_csv("msft_temp.csv", index_label='date')

In [19]:
!head msft_temp.csv

date,Close
2016-02-23,51.18
2016-02-22,52.650002
2016-02-19,51.82
2016-02-18,52.189999
2016-02-17,52.419998
2016-02-16,51.09
2016-02-12,50.5
2016-02-11,49.689999
2016-02-10,49.709998999999996


In [21]:
df = pd.read_table("msft.csv", sep=",")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
1,2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002
2,2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
3,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
4,2016-02-17,51.490002,52.77,51.450001,52.419998,39670300,52.419998


In [22]:
df.to_csv("msft_temp2.txt", sep="|")
! head -5 msft_temp2.txt

|Date|Open|High|Low|Close|Volume|Adj Close
0|2016-02-23|52.34|52.369999|50.98|51.18|28390800|51.18
1|2016-02-22|52.279999|53.0|52.279999|52.650002|24854400|52.650002
2|2016-02-19|51.970001|52.279999|51.529999|51.82|33275400|51.82
3|2016-02-18|52.330002|52.950001|52.099998|52.189999|26244700|52.189999


In [23]:
df = pd.read_csv("msft.csv", skiprows=[0, 2, 3])
df

Unnamed: 0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18.1
0,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
1,2016-02-17,51.490002,52.770000,51.450001,52.419998,39670300,52.419998
2,2016-02-16,50.900002,51.090000,50.130001,51.090000,35937100,51.090000
3,2016-02-12,50.250000,50.680000,49.750000,50.500000,34243300,50.140000
4,2016-02-11,48.680000,50.110001,48.509998,49.689999,48878600,49.335773
...,...,...,...,...,...,...,...
7542,1986-03-19,28.751040,28.998720,27.999361,28.249920,47894400,0.067943
7543,1986-03-18,29.499840,29.750399,28.500479,28.751040,67766400,0.069148
7544,1986-03-17,28.998720,29.750399,28.998720,29.499840,133171200,0.070949
7545,1986-03-14,27.999361,29.499840,27.999361,28.998720,308160000,0.069744


In [24]:
df = pd.read_csv("msft.csv", 
                skip_footer = 2,
                engine = 'python')
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-02-23,52.340000,52.369999,50.980000,51.180000,28390800,51.180000
1,2016-02-22,52.279999,53.000000,52.279999,52.650002,24854400,52.650002
2,2016-02-19,51.970001,52.279999,51.529999,51.820000,33275400,51.820000
3,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
4,2016-02-17,51.490002,52.770000,51.450001,52.419998,39670300,52.419998
...,...,...,...,...,...,...,...
7543,1986-03-21,27.501120,27.999361,26.251200,26.749441,59990400,0.064334
7544,1986-03-20,28.249920,28.249920,27.250559,27.501120,58435200,0.066142
7545,1986-03-19,28.751040,28.998720,27.999361,28.249920,47894400,0.067943
7546,1986-03-18,29.499840,29.750399,28.500479,28.751040,67766400,0.069148


In [25]:
pd.read_csv("msft.csv", nrows=3)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
1,2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002
2,2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82


In [26]:
pd.read_csv("msft.csv", skiprows=100, nrows=5, header=0,
           names=['open', 'high', 'low', 'close', 'val', 'adjClose'])

Unnamed: 0,open,high,low,close,val,adjClose
2015-09-29,43.369999,43.57,43.049999,43.439999,32763600,42.841561
2015-09-28,43.830002,44.09,43.209999,43.290001,27613800,42.69363
2015-09-25,44.48,44.73,43.759998,43.939999,29384600,43.334673
2015-09-24,43.450001,44.130001,43.27,43.91,27905600,43.305088
2015-09-23,43.93,44.169998,43.509998,43.869999,17145200,43.265638


In [30]:
df = pd.read_excel("msft.xls")
# df = pd.read_excel("msft.xls", sheetname='aapl) #sheetname
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18
1,2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002
2,2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82
3,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999
4,2016-02-17,51.490002,52.77,51.450001,52.419998,39670300,52.419998


In [31]:
df.to_excel("msft.xls")

In [32]:
df.to_excel("msft.xls", sheet_name="msft")

*Multiple sheets*

In [35]:
from pandas import ExcelWriter

with ExcelWriter("multisheets.xls") as writer:
    df.to_excel(writer, sheet_name="sheet1")
    df.to_excel(writer, sheet_name="sheet2")

In [36]:
df.to_excel("msft_temp.xlsx")

# Reading and Writing JSON files

In [37]:
df.head().to_json("msft.json")
!cat msft.json

{"Date":{"0":"2016-02-23","1":"2016-02-22","2":"2016-02-19","3":"2016-02-18","4":"2016-02-17"},"Open":{"0":52.34,"1":52.279999,"2":51.970001,"3":52.330002,"4":51.490002},"High":{"0":52.369999,"1":53.0,"2":52.279999,"3":52.950001,"4":52.77},"Low":{"0":50.98,"1":52.279999,"2":51.529999,"3":52.099998,"4":51.450001},"Close":{"0":51.18,"1":52.650002,"2":51.82,"3":52.189999,"4":52.419998},"Volume":{"0":28390800,"1":24854400,"2":33275400,"3":26244700,"4":39670300},"Adj Close":{"0":51.18,"1":52.650002,"2":51.82,"3":52.189999,"4":52.419998}}

In [38]:
df_from_json = pd.read_json("msft.json")
df_from_json.head(5)

Unnamed: 0,Adj Close,Close,Date,High,Low,Open,Volume
0,51.18,51.18,2016-02-23,52.369999,50.98,52.34,28390800
1,52.650002,52.650002,2016-02-22,53.0,52.279999,52.279999,24854400
2,51.82,51.82,2016-02-19,52.279999,51.529999,51.970001,33275400
3,52.189999,52.189999,2016-02-18,52.950001,52.099998,52.330002,26244700
4,52.419998,52.419998,2016-02-17,52.77,51.450001,51.490002,39670300


# HTML

In [40]:
url = "https://www.fdic.gov/bank/individual/failed/banklist.html"
banks = pd.read_html(url)
banks[0][0:5].ix[:,0:4]

Unnamed: 0,Bank Name,City,ST,CERT
0,Hometown National Bank,Longview,WA,35156
1,The Bank of Georgia,Peachtree City,GA,35259
2,Premier Bank,Denver,CO,34112
3,Edgebrook Bank,Chicago,IL,57772
4,Doral BankEn Espanol,San Juan,PR,32102


In [42]:
banks[0]

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date,Loss Share Type,Agreement Terminated,Termination Date
0,Hometown National Bank,Longview,WA,35156,Twin City Bank,"October 2, 2015","February 12, 2016",,,
1,The Bank of Georgia,Peachtree City,GA,35259,Fidelity Bank,"October 2, 2015","February 12, 2016",,,
2,Premier Bank,Denver,CO,34112,"United Fidelity Bank, fsb","July 10, 2015","December 17, 2015",none,,
3,Edgebrook Bank,Chicago,IL,57772,Republic Bank of Chicago,"May 8, 2015","July 23, 2015",none,,
4,Doral BankEn Espanol,San Juan,PR,32102,Banco Popular de Puerto Rico,"February 27, 2015","May 13, 2015",none,,
...,...,...,...,...,...,...,...,...,...,...
537,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001","August 19, 2014",none,,
538,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001","November 18, 2002",none,,
539,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001","February 18, 2003",none,,
540,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000","March 17, 2005",none,,


In [45]:
banks[0].head(2).to_html("failed_banks.html")
! head -30 failed_banks.html

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Bank Name</th>
      <th>City</th>
      <th>ST</th>
      <th>CERT</th>
      <th>Acquiring Institution</th>
      <th>Closing Date</th>
      <th>Updated Date</th>
      <th>Loss Share Type</th>
      <th>Agreement Terminated</th>
      <th>Termination Date</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Hometown National Bank</td>
      <td>Longview</td>
      <td>WA</td>
      <td>35156</td>
      <td>Twin City Bank</td>
      <td>October 2, 2015</td>
      <td>February 12, 2016</td>
      <td>NaN</td>
      <td>NaN</td>
      <td>NaN</td>
    </tr>


# HDF5

In [51]:
np.random.seed(123456)
df = pd.DataFrame(np.random.randn(8, 3),
                 index=pd.date_range('1/1/2000', periods=8),
                 columns=['A', 'B', 'C'])

store = pd.HDFStore('store.h5')
store['df'] = df
store

<class 'pandas.io.pytables.HDFStore'>
File path: store.h5
/df            frame        (shape->[8,3])

In [53]:
store = pd.HDFStore('store.h5')
df = store['df']
df

Unnamed: 0,A,B,C
2000-01-01,0.469112,-0.282863,-1.509059
2000-01-02,-1.135632,1.212112,-0.173215
2000-01-03,0.119209,-1.044236,-0.861849
2000-01-04,-2.104569,-0.494929,1.071804
2000-01-05,0.721555,-0.706771,-1.039575
2000-01-06,0.27186,-0.424972,0.56702
2000-01-07,0.276232,-1.087401,-0.67369
2000-01-08,0.113648,-1.478427,0.524988


In [54]:
df.ix[0].A = 1

store['df'] = df

pd.HDFStore('store.h5')['df'].head(2)

Unnamed: 0,A,B,C
2000-01-01,1.0,-0.282863,-1.509059
2000-01-02,-1.135632,1.212112,-0.173215


# Accessing data on the web

In [64]:
df = pd.read_csv("http://ichart.yahoo.com/table.csv?s=MSFT&" +
           "a=5&b=1&c=2014&" +
           "d=5&e=30&f=2014&" +
           "g=d&ignore=.csv")
# df = pd.read_csv("http://ichart.yahoo.com/table.csv?s=MSFT&" +
#                 "a=5&b=1&c=2014&" +
#                 "d=5&e=30&f=2014&" +
#                 "g=d&ignore=.csv")
df[:5]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2014-06-30,42.169998,42.209999,41.700001,41.700001,30805500,39.804237
1,2014-06-27,41.610001,42.290001,41.509998,42.25,74640000,40.329232
2,2014-06-26,41.93,41.939999,41.43,41.720001,23604400,39.823328
3,2014-06-25,41.700001,42.049999,41.459999,42.029999,20049100,40.119233
4,2014-06-24,41.830002,41.939999,41.560001,41.75,26509100,39.851963


In [58]:
df.describe()

Unnamed: 0,A,B,C
count,8.0,8.0,8.0
mean,-0.092212,-0.538436,-0.261697
std,1.024613,0.811842,0.908894
min,-2.104569,-1.478427,-1.509059
25%,-0.198672,-1.055027,-0.90628
50%,0.195534,-0.60085,-0.423452
75%,0.387563,-0.389445,0.535496
max,1.0,1.212112,1.071804


# sqlite

In [66]:
import sqlite3

msft = pd.read_csv('msft.csv')
msft["Symbol"] = "MSFT"
aapl = pd.read_csv('aapl.csv')
aapl["Symbol"] = "AAPL"

connection = sqlite3.connect("stocks.sqlite")
msft.to_sql("STOCK_DATA", connection, if_exists="replace")
aapl.to_sql("STOCK_DATA", connection, if_exists="append")

connection.commit()
connection.close()



*Reading*

In [67]:
connection = sqlite3.connect("stocks.sqlite")

stocks = pd.io.sql.read_sql("SELECT * FROM STOCK_DATA;",
                             connection, index_col="index")

connection.close()

stocks.head()

Unnamed: 0_level_0,Date,Open,High,Low,Close,Volume,Adj Close,Symbol
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2016-02-23,52.34,52.369999,50.98,51.18,28390800,51.18,MSFT
1,2016-02-22,52.279999,53.0,52.279999,52.650002,24854400,52.650002,MSFT
2,2016-02-19,51.970001,52.279999,51.529999,51.82,33275400,51.82,MSFT
3,2016-02-18,52.330002,52.950001,52.099998,52.189999,26244700,52.189999,MSFT
4,2016-02-17,51.490002,52.77,51.450001,52.419998,39670300,52.419998,MSFT


In [74]:
connection = sqlite3.connect("stocks.sqlite")

query = "SELECT * FROM STOCK_DATA WHERE Volume>29200100 AND Symbol='MSFT';"

items = pd.io.sql.read_sql(query, connection, index_col="index")
connection.close()

items

Unnamed: 0_level_0,Date,Open,High,Low,Close,Volume,Adj Close,Symbol
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,2016-02-19,51.970001,52.279999,51.529999,51.820000,33275400,51.820000,MSFT
4,2016-02-17,51.490002,52.770000,51.450001,52.419998,39670300,52.419998,MSFT
5,2016-02-16,50.900002,51.090000,50.130001,51.090000,35937100,51.090000,MSFT
6,2016-02-12,50.250000,50.680000,49.750000,50.500000,34243300,50.140000,MSFT
7,2016-02-11,48.680000,50.110001,48.509998,49.689999,48878600,49.335773,MSFT
...,...,...,...,...,...,...,...,...
7545,1986-03-19,28.751040,28.998720,27.999361,28.249920,47894400,0.067943,MSFT
7546,1986-03-18,29.499840,29.750399,28.500479,28.751040,67766400,0.069148,MSFT
7547,1986-03-17,28.998720,29.750399,28.998720,29.499840,133171200,0.070949,MSFT
7548,1986-03-14,27.999361,29.499840,27.999361,28.998720,308160000,0.069744,MSFT


# Remote data services

In [75]:
import pandas.io.data as web
import datetime

start = datetime.datetime(2012, 1, 1)
end = datetime.datetime(2014, 1, 27)

yahoo = web.DataReader('MSFT', 'yahoo', start, end)
yahoo.head()

The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.


AttributeError: module 'pandas.io.data' has no attribute 'DAtaReader'

conda install pandas-datareader

In [76]:
import pandas_datareader.data as web
import datetime

start = datetime.datetime(2012, 1, 1)
end = datetime.datetime(2014, 1, 27)

yahoo = web.DataReader('MSFT', 'yahoo', start, end)
yahoo.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-01-03,26.549999,26.959999,26.389999,26.77,64731500,23.773104
2012-01-04,26.82,27.469999,26.780001,27.4,80516100,24.332575
2012-01-05,27.379999,27.73,27.290001,27.68,56081400,24.58123
2012-01-06,27.530001,28.190001,27.530001,28.110001,99455500,24.963092
2012-01-09,28.049999,28.1,27.719999,27.74,59706800,24.634512


In [77]:
goog = web.DataReader("MSFT", "google", start, end)
goog.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-03,26.55,26.96,26.39,26.76,64735391
2012-01-04,26.82,27.47,26.78,27.4,80519402
2012-01-05,27.38,27.73,27.29,27.68,56082205
2012-01-06,27.53,28.19,27.52,28.1,99459469
2012-01-09,28.05,28.1,27.72,27.74,59708266


In [79]:
aapl = pd.io.data.Options('AAPL', 'yahoo')
data = aapl.get_all_data()
data.iloc[0:6, 0:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Last,Bid,Ask,Chg
Strike,Expiry,Type,Symbol,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
45.0,2016-06-17,put,AAPL160617P00045000,0.04,0.01,0.06,0.0
47.5,2017-01-20,call,AAPL170120C00047500,49.75,52.85,53.35,0.0
47.5,2017-01-20,put,AAPL170120P00047500,0.47,0.45,0.48,-0.08
47.5,2017-06-16,put,AAPL170616P00047500,0.84,0.73,0.92,-0.1
47.5,2018-01-19,call,AAPL180119C00047500,48.0,51.65,53.85,0.0
47.5,2018-01-19,put,AAPL180119P00047500,1.5,1.34,1.6,0.0


The above took a looooong time

In [80]:
data.loc[(80, slice(None), 'put'), :].iloc[0:5, 0:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Last,Bid,Ask,Chg
Strike,Expiry,Type,Symbol,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
80,2016-03-04,put,AAPL160304P00080000,0.01,0.0,0.01,0.0
80,2016-03-11,put,AAPL160311P00080000,0.02,0.01,0.02,-0.01
80,2016-03-18,put,AAPL160318P00080000,0.02,0.02,0.03,-0.04
80,2016-03-24,put,AAPL160324P00080000,0.04,0.03,0.07,-0.06
80,2016-04-01,put,AAPL160401P00080000,0.11,0.06,0.09,-0.04


In [83]:
data.loc[(80, slice('20150117','20150417'), 
             'put'), :].iloc[:, 0:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Last,Bid,Ask,Chg
Strike,Expiry,Type,Symbol,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


did the above work?

In [84]:
expiry = datetime.date(2015, 1, 5)
msft_calls = pd.io.data.Options('MSFT', 'yahoo').get_call_data(expiry=expiry)
msft_calls.iloc[0:5, 0:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Last,Bid,Ask,Chg,PctChg
Strike,Expiry,Type,Symbol,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
40.0,2016-03-04,call,MSFT160304C00040000,9.65,10.45,14.0,0,0.00%
45.0,2016-03-04,call,MSFT160304C00045000,6.2,5.8,7.65,0,0.00%
46.0,2016-03-04,call,MSFT160304C00046000,4.0,4.85,6.65,0,0.00%
46.5,2016-03-04,call,MSFT160304C00046500,2.82,4.25,6.15,0,0.00%
47.0,2016-03-04,call,MSFT160304C00047000,3.36,3.85,5.65,0,0.00%


In [85]:
expiry = datetime.date(2015, 1, 17)
aapl_calls = aapl.get_call_data(expiry=expiry)
aapl_calls.iloc[0:5, 0:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Last,Bid,Ask,Chg
Strike,Expiry,Type,Symbol,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
70,2016-03-04,call,AAPL160304C00070000,26.52,30.4,30.7,0.0
75,2016-03-04,call,AAPL160304C00075000,22.3,25.4,25.7,0.0
80,2016-03-04,call,AAPL160304C00080000,16.3,20.4,20.65,0.0
84,2016-03-04,call,AAPL160304C00084000,16.75,16.4,16.6,4.61
85,2016-03-04,call,AAPL160304C00085000,15.4,15.4,15.6,3.35


### FRED ###

In [86]:
gdp = web.DataReader('GDP', 'fred',
                    datetime.date(2012, 1, 1),
                    datetime.date(2014, 1, 27))
gdp

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2012-01-01,15973.9
2012-04-01,16121.9
2012-07-01,16227.9
2012-10-01,16297.3
2013-01-01,16440.7
2013-04-01,16526.8
2013-07-01,16727.5
2013-10-01,16957.6
2014-01-01,16984.3


In [88]:
wages = web.DataReader("A576RC1A027NBEA",
              "fred",
              datetime.date(1929, 1, 1),
              datetime.date(2013, 1, 1))
wages

Unnamed: 0_level_0,A576RC1A027NBEA
DATE,Unnamed: 1_level_1
1929-01-01,50.5
1930-01-01,46.2
1931-01-01,39.2
1932-01-01,30.5
1933-01-01,29.0
...,...
2009-01-01,6251.4
2010-01-01,6377.5
2011-01-01,6633.2
2012-01-01,6930.3


In [89]:
factors = web.DataReader("Global_Factors", "famafrench")
factors

{0:          Mkt-RF   SMB   HML    WML    RF
 Date                                    
 1990-07    0.79  0.07  0.24 -99.99  0.68
 1990-08  -10.76 -1.56  0.42 -99.99  0.66
 1990-09  -12.24  1.68  0.34 -99.99  0.60
 1990-10    9.58 -8.11 -3.29 -99.99  0.68
 1990-11   -3.87  1.62  0.68  -0.32  0.57
 ...         ...   ...   ...    ...   ...
 2015-09   -3.91 -0.28 -0.89   3.47  0.00
 2015-10    7.30 -2.26  0.21  -2.62  0.00
 2015-11   -0.30  1.69 -1.78   2.11  0.00
 2015-12   -1.74  0.93 -1.79   3.28  0.01
 2016-01   -6.29 -2.10  0.97   0.51  0.01
 
 [307 rows x 5 columns], 1:       Mkt-RF    SMB    HML    WML    RF
 Date                                   
 1991   11.86   5.17  -7.48  11.61  5.60
 1992   -9.65   2.15   4.23   9.72  3.51
 1993   19.17  10.19  16.78   6.01  2.90
 1994    2.53  -0.24   6.89   2.27  3.90
 1995   12.97  -1.92  -4.44  10.31  5.60
 ...      ...    ...    ...    ...   ...
 2011   -6.79  -5.41  -4.76   6.30  0.04
 2012   16.87  -2.55   6.41   6.35  0.06
 2013   28.6

In [90]:
import pandas_datareader.wb as wb

all_indicators = wb.get_indicators()


  data = data.sort(columns='id')


In [91]:
all_indicators.ix[:, 0:1]

Unnamed: 0,id
0,1.0.HCount.1.25usd
1,1.0.HCount.10usd
2,1.0.HCount.2.5usd
3,1.0.HCount.Mid10to50
4,1.0.HCount.Ofcl
...,...
15711,per_sionl.overlap_pop_urb
15712,per_sionl.overlap_q1_preT_tot
15713,per_sionl.overlap_q1_rur
15714,per_sionl.overlap_q1_tot


In [92]:
le_indicators = wb.search("life expectancy")
le_indicators.iloc[:3, :2]

  data = data.sort(columns='id')


Unnamed: 0,id,name
8252,SE.SCH.LIFE,"School life expectancy, primary to tertiary, b..."
8253,SE.SCH.LIFE.FE,"School life expectancy, primary to tertiary, f..."
8254,SE.SCH.LIFE.MA,"School life expectancy, primary to tertiary, m..."


In [93]:
countries = wb.get_countries()
countries.iloc[0:10].ix[:,['name', 'capitalCity', 'iso2c']]

Unnamed: 0,name,capitalCity,iso2c
0,Aruba,Oranjestad,AW
1,Afghanistan,Kabul,AF
2,Africa,,A9
3,Angola,Luanda,AO
4,Albania,Tirane,AL
5,Andorra,Andorra la Vella,AD
6,Andean Region,,L5
7,Arab World,,1A
8,United Arab Emirates,Abu Dhabi,AE
9,Argentina,Buenos Aires,AR


In [95]:
le_data_all = wb.download(indicator="SP.DYN.LE00.IN",
                         start='1980',
                         end='2014')
le_data_all

  out = out.convert_objects(convert_numeric=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,SP.DYN.LE00.IN
country,year,Unnamed: 2_level_1
Canada,2014,
Canada,2013,81.401122
Canada,2012,81.238049
Canada,2011,81.068317
Canada,2010,80.893488
...,...,...
United States,1984,74.563415
United States,1983,74.463415
United States,1982,74.360976
United States,1981,74.007317


In [98]:
le_data_all.index.levels[0]

Index(['Canada', 'Mexico', 'United States'], dtype='object', name='country')

In [101]:
le_data_all = wb.download(indicator="SP.DYN.LE00.IN",
                         country = countries['iso2c'],
                         start='1980',
                         end='2012')
le_data_all

  out = out.convert_objects(convert_numeric=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,SP.DYN.LE00.IN
country,year,Unnamed: 2_level_1
Aruba,2012,75.205756
Aruba,2011,75.081390
Aruba,2010,74.953537
Aruba,2009,74.818146
Aruba,2008,74.675732
...,...,...
Zimbabwe,1984,61.583951
Zimbabwe,1983,61.148171
Zimbabwe,1982,60.605512
Zimbabwe,1981,60.004829


In [103]:
le_data = le_data_all.reset_index().pivot(index="country", 
                                          columns="year")
le_data.ix[:,0:3]
                                          

Unnamed: 0_level_0,SP.DYN.LE00.IN,SP.DYN.LE00.IN,SP.DYN.LE00.IN
year,1980,1981,1982
country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Afghanistan,41.867537,42.526927,43.230732
Albania,70.235976,70.454463,70.685122
Algeria,58.164024,59.486756,60.786341
American Samoa,,,
Andorra,,,
...,...,...,...
West Bank and Gaza,,,
World,62.913617,63.217319,63.518728
"Yemen, Rep.",50.559537,51.541341,52.492707
Zambia,51.499195,51.217146,50.786000


In [104]:
country_with_least_expectancy = le_data.idxmin(axis=0)
country_with_least_expectancy

                year
SP.DYN.LE00.IN  1980       Cambodia
                1981       Cambodia
                1982    Timor-Leste
                1983    South Sudan
                1984    South Sudan
                           ...     
                2008        Lesotho
                2009        Lesotho
                2010        Lesotho
                2011        Lesotho
                2012        Lesotho
dtype: object

In [None]:
expectancy_for_least_country = le_data.min(axis=0)
expectancy