# Reading from Public Data APIs

You may need to 'conda install pandas-datareader' if it isn't there for you.

Documentation and examples: https://pandas-datareader.readthedocs.io/en/latest/remote_data.html

In computer programming, an application programming interface (API) is a set of subroutine definitions, protocols, and tools for building application software. In general terms, it is a set of clearly defined methods of communication between various software components. 

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
import pandas_datareader.data as web
import datetime

In [3]:
type(web)

module

In [4]:
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)

f = web.DataReader("AAPL", 'yahoo', start, end)

f.loc['2010-01-04']

Open         3.049000e+01
High         3.064286e+01
Low          3.034000e+01
Close        3.057286e+01
Adj Close    2.740653e+01
Volume       1.234324e+08
Name: 2010-01-04 00:00:00, dtype: float64

In [6]:
f.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.447144,30.478571,30.08,30.104286,26.986492,88102700
2010-01-04,30.49,30.642857,30.34,30.572857,27.406532,123432400
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.453915,150476200
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.017223,138040000
2010-01-07,30.25,30.285715,29.864286,30.082857,26.967278,119282800


In [20]:
start = datetime.datetime(2016, 11, 25)
end = datetime.datetime(2017, 6, 27)
g = web.DataReader("F", 'google', start, end )

In [22]:
g.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-21,11.77,11.9,11.77,11.79,19508608
2016-11-22,11.76,11.91,11.73,11.89,24274734
2016-11-23,11.9,12.0,11.85,11.9,22418351
2016-11-25,11.95,12.06,11.93,12.04,11439553
2016-11-28,12.02,12.05,11.89,11.92,24407192


In [17]:
g.loc['2016-11-21':'2016-11-24']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-21,11.77,11.9,11.77,11.79,19508608
2016-11-22,11.76,11.91,11.73,11.89,24274734
2016-11-23,11.9,12.0,11.85,11.9,22418351


In [9]:
df = web.DataReader("tran_sf_railac", 'eurostat')

In [10]:
df

ACCIDENT,"Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge","Collisions of trains, including collisions with obstacles within the clearance gauge",...,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total
UNIT,Number,Number,Number,Number,Number,Number,Number,Number,Number,Number,...,Number,Number,Number,Number,Number,Number,Number,Number,Number,Number
GEO,Austria,Belgium,Bulgaria,Switzerland,Channel Tunnel,Czech Republic,Germany (until 1990 former territory of the FRG),Denmark,Estonia,Greece,...,Latvia,Netherlands,Norway,Poland,Portugal,Romania,Sweden,Slovenia,Slovakia,United Kingdom
FREQ,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,...,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual,Annual
TIME_PERIOD,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
2010-01-01,3.0,5.0,2.0,5.0,0.0,3.0,13.0,0.0,1.0,4.0,...,41.0,24.0,20.0,449.0,42.0,271.0,69.0,21.0,85.0,62.0
2011-01-01,2.0,0.0,0.0,4.0,0.0,6.0,18.0,1.0,0.0,1.0,...,35.0,29.0,36.0,488.0,27.0,217.0,54.0,11.0,84.0,78.0
2012-01-01,1.0,3.0,3.0,4.0,0.0,6.0,23.0,1.0,3.0,2.0,...,25.0,30.0,19.0,379.0,36.0,215.0,47.0,14.0,96.0,75.0
2013-01-01,4.0,1.0,2.0,6.0,0.0,5.0,29.0,0.0,0.0,2.0,...,26.0,36.0,30.0,328.0,48.0,180.0,43.0,13.0,94.0,84.0
2014-01-01,1.0,3.0,4.0,0.0,0.0,13.0,32.0,0.0,0.0,1.0,...,22.0,20.0,28.0,313.0,50.0,185.0,53.0,15.0,113.0,54.0
2015-01-01,7.0,0.0,3.0,3.0,0.0,14.0,40.0,3.0,0.0,1.0,...,25.0,31.0,19.0,307.0,23.0,141.0,40.0,14.0,87.0,40.0


In [None]:
df.columns

In [14]:
# sometimes it's easier to read... but un-nesting this would be a good challenge :)
df.transpose()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TIME_PERIOD,2010-01-01 00:00:00,2011-01-01 00:00:00,2012-01-01 00:00:00,2013-01-01 00:00:00,2014-01-01 00:00:00
ACCIDENT,UNIT,GEO,FREQ,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Austria,Annual,3.0,2.0,1.0,4.0,1.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Belgium,Annual,5.0,0.0,3.0,1.0,3.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Bulgaria,Annual,2.0,0.0,3.0,2.0,4.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Switzerland,Annual,5.0,4.0,4.0,6.0,0.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Channel Tunnel,Annual,0.0,0.0,0.0,0.0,0.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Czech Republic,Annual,3.0,6.0,6.0,5.0,13.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Germany (until 1990 former territory of the FRG),Annual,13.0,18.0,23.0,29.0,32.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Denmark,Annual,0.0,1.0,1.0,0.0,0.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Estonia,Annual,1.0,0.0,3.0,0.0,0.0
"Collisions of trains, including collisions with obstacles within the clearance gauge",Number,Greece,Annual,4.0,1.0,2.0,2.0,1.0


## Import data from an URL 

In [6]:
import urllib.request, json 
import pandas as pd

with urllib.request.urlopen("https://opendata-renewables.engie.com/api/records/1.0/search/?dataset=la-haute-borne-data-2009-2012&facet=wind_turbine_name&facet=date_time") as url:
    data = json.loads(url.read())
    print(data)

{'nhits': 839100, 'parameters': {'dataset': ['la-haute-borne-data-2009-2012'], 'timezone': 'UTC', 'rows': 10, 'format': 'json', 'facet': ['wind_turbine_name', 'date_time']}, 'records': [{'datasetid': 'la-haute-borne-data-2009-2012', 'recordid': '7e8089ea75688eccb2c9924dcf403126e1bef7e3', 'fields': {'cosphi_avg': 1.0, 'rt_avg': 15.0, 'git_min': 54.400002, 'yt_avg': 22.940001, 'ws2_std': 0.67000002, 'gost_avg': 57.18, 'rs_min': 11.15, 'yt_max': 23.1, 'db1t_std': 0.34, 'rm_min': 1365.65, 'nf_min': 49.93, 'db1t_avg': 41.540001, 'nf_max': 49.98, 'rm_max': 2753.02, 'git_max': 55.650002, 'ws2_min': 2.8699999, 'rt_std': 0.0, 'dst_std': 0.36000001, 'ws2_max': 6.6500001, 'gb1t_std': 0.36000001, 'dcs_std': 70.029999, 'gb2t_avg': 64.900002, 'git_std': 0.25999999, 'ya_min': 234.98, 'git_avg': 54.959999, 'va1_avg': 1.5599999, 'p_avg': 340.48001, 'db2t_max': 37.799999, 's_std': 53.77, 'wa_max': 0.0, 'ba_std': 0.0, 'rbt_avg': 31.27, 'rm_std': 274.12, 'va1_std': 12.16, 'rm_avg': 2290.6001, 'db2t_std': 

In [7]:
data.keys()

dict_keys(['nhits', 'parameters', 'records', 'facet_groups'])

In [9]:
df = pd.DataFrame(data['records'])
df.head()

Unnamed: 0,datasetid,fields,record_timestamp,recordid
0,la-haute-borne-data-2009-2012,"{'cosphi_avg': 1.0, 'rt_avg': 15.0, 'git_min':...",2017-06-02T10:27:00+00:00,7e8089ea75688eccb2c9924dcf403126e1bef7e3
1,la-haute-borne-data-2009-2012,"{'cosphi_avg': -1.0, 'rt_avg': 14.82, 'git_min...",2017-06-02T10:27:00+00:00,e362ed16a598e83b2d35ff7adc1c7c7e09925e22
2,la-haute-borne-data-2009-2012,"{'cosphi_avg': 1.0, 'rt_avg': 24.0, 'git_min':...",2017-06-02T10:27:00+00:00,73f09bdaca4b316ccc109fd88f368032dfd39f75
3,la-haute-borne-data-2009-2012,"{'cosphi_avg': 1.0, 'rt_avg': 14.88, 'git_min'...",2017-06-02T10:27:00+00:00,1d310c07c6b0fbe4774203ba5dd6571311ff3a46
4,la-haute-borne-data-2009-2012,"{'cosphi_avg': 1.0, 'rt_avg': 23.99, 'git_min'...",2017-06-02T10:27:00+00:00,ec26480e7d20ae2b386b84f84b8b9e2a92ba1803


In [10]:
pd.DataFrame(list(df['fields']))

Unnamed: 0,ba_avg,ba_max,ba_min,ba_std,cm_avg,cm_max,cm_min,cm_std,cosphi_avg,cosphi_max,...,ws_min,ws_std,ya_avg,ya_max,ya_min,ya_std,yt_avg,yt_max,yt_min,yt_std
0,-1.0,-1.0,-1.0,0.0,2441.6899,2871.99,1620.39,238.91,1.0,0.0,...,3.15,0.64,234.98,234.98,234.98,0.0,22.940001,23.1,20.6,0.73
1,-1.0,-1.0,-1.0,0.0,2188.79,3494.3301,1233.5601,534.98999,-1.0,0.0,...,3.19,0.95,245.95,245.95,245.95,0.0,22.799999,23.0,21.1,0.6
2,-1.0,-1.0,-1.0,0.0,3078.9199,3253.8201,3008.98,54.099998,1.0,0.0,...,4.43,0.51,13.27,13.27,13.27,0.0,27.450001,29.6,27.200001,0.73
3,-1.0,-1.0,-1.0,0.0,2847.1599,4999.1401,1409.6801,847.92999,1.0,0.0,...,3.02,1.02,256.92999,256.92999,256.92999,0.0,21.440001,21.9,18.200001,1.13
4,44.990002,44.990002,44.990002,0.0,-4.71,-4.49,-4.94,0.07,1.0,0.0,...,1.51,0.26,53.880001,53.880001,53.880001,0.0,29.75,30.0,28.6,0.4
5,-1.0,-1.0,-1.0,0.0,595.33002,705.06,494.57001,44.52,-1.0,0.0,...,2.39,0.26,30.83,30.83,30.83,0.0,32.509998,32.599998,32.349998,0.07
6,-1.0,-0.58,-1.0,0.03,3898.26,5805.9302,1613.39,794.06,1.0,0.0,...,4.37,1.32,280.70001,290.95001,277.78,5.38,19.85,20.200001,17.9,0.61
7,-1.0,-1.0,-1.0,0.0,2912.2,3583.55,1479.9399,576.88,1.0,0.0,...,3.98,0.88,282.20999,286.56,265.70999,8.229999,21.940001,25.9,21.9,1.17
8,-1.0,-1.0,-1.0,0.0,1535.0601,1782.8199,1164.14,148.88,-0.99,0.0,...,2.67,0.48,265.70999,265.70999,265.70999,0.0,26.950001,27.0,25.9,0.31
9,44.990002,44.990002,44.990002,0.0,-4.16,-3.93,-4.37,0.07,1.0,0.0,...,0.0,0.0,312.89999,312.89999,312.89999,0.0,24.91,25.200001,24.9,0.06
