# Data Exploration

## Imports

In [1]:
import numpy as np
import pandas as pd
import requests

## USGS Wind Turbine Database
Example request response for wind turbine with 'case_id'=3038257. Response data contains turbine ID ('case_id'), rated capacity ('t_cap'), dimensions, coordinates ('xlong' and 'ylat'), and other relevant data. Calling the json() method on the response object gives a list of dictionaries, where each dictionary represents a turbine from the response.

In [9]:
base_path = 'https://eersc.usgs.gov/api/uswtdb/v1/'
resp = requests.get(f"{base_path}turbines?&case_id=eq.3038257")
resp.json()[0]

{'case_id': 3038257,
 'faa_ors': '19-022348',
 'faa_asn': '2011-WTE-22311-OE',
 'usgs_pr_id': 20977,
 't_state': 'IA',
 't_county': 'Story County',
 't_fips': '19169',
 'p_name': 'AG Land 4',
 'p_year': 2012,
 'p_tnum': 6,
 'p_cap': 9.6,
 't_manu': 'GE Wind',
 't_model': 'GE1.6-82.5',
 't_cap': 1600,
 't_hh': 80.0,
 't_rd': 82.5,
 't_rsa': 5345.62,
 't_ttlh': 121.3,
 't_conf_atr': 3,
 't_conf_loc': 3,
 't_img_date': '1/1/2013',
 't_img_srce': 'NAIP',
 'xlong': -93.3549,
 'ylat': 41.90419,
 'eia_id': None}

Example request for multiple turbines in USWTDB

In [22]:
params = {'offset': 0, 'limit': 5}
resp = requests.get(f"{base_path}turbines", params=params)
turbine_data = resp.json()
turbine_data

[{'case_id': 3005443,
  'faa_ors': None,
  'faa_asn': None,
  'usgs_pr_id': 5840,
  't_state': 'CA',
  't_county': 'Kern County',
  't_fips': '6029',
  'p_name': '251 Wind',
  'p_year': 1987,
  'p_tnum': 194,
  'p_cap': 18.43,
  't_manu': 'Vestas',
  't_model': None,
  't_cap': 95,
  't_hh': None,
  't_rd': None,
  't_rsa': None,
  't_ttlh': None,
  't_conf_atr': 2,
  't_conf_loc': 3,
  't_img_date': '5/8/2018',
  't_img_srce': 'Digital Globe',
  'xlong': -118.35109,
  'ylat': 35.0919,
  'eia_id': 52161},
 {'case_id': 3072704,
  'faa_ors': None,
  'faa_asn': None,
  'usgs_pr_id': 5146,
  't_state': 'CA',
  't_county': 'Kern County',
  't_fips': '6029',
  'p_name': '251 Wind',
  'p_year': 1987,
  'p_tnum': 194,
  'p_cap': 18.43,
  't_manu': 'Vestas',
  't_model': None,
  't_cap': 95,
  't_hh': None,
  't_rd': None,
  't_rsa': None,
  't_ttlh': None,
  't_conf_atr': 2,
  't_conf_loc': 3,
  't_img_date': '5/8/2018',
  't_img_srce': 'Digital Globe',
  'xlong': -118.3642,
  'ylat': 35.07764

Convert response to DataFrame

In [23]:
df = pd.DataFrame(turbine_data)
df.head()

Unnamed: 0,case_id,faa_ors,faa_asn,usgs_pr_id,t_state,t_county,t_fips,p_name,p_year,p_tnum,...,t_rd,t_rsa,t_ttlh,t_conf_atr,t_conf_loc,t_img_date,t_img_srce,xlong,ylat,eia_id
0,3005443,,,5840,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.35109,35.0919,52161
1,3072704,,,5146,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.3642,35.07764,52161
2,3072695,,,5143,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36441,35.07744,52161
3,3072661,,,5149,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36376,35.07791,52161
4,3005333,,,5109,CA,Kern County,6029,251 Wind,1987,194,...,,,,2,3,5/8/2018,Digital Globe,-118.36869,35.07529,52161


The API will overload if a request corresponds to too many records. We can work around this by calling the API in smaller batches.

In [15]:
# Initial call to API with response limit of 50 records
turbine_df = pd.DataFrame()
offset, limit = 0, 50
params = {'offset': offset, 'limit': limit}
resp = requests.get(f"{base_path}turbines", params=params)
turbine_data = resp.json()

# This loop calls the API and adds the response records to turbine_df
# until it has returned all turbines in the USWTDB
while len(turbine_data):
    df = pd.DataFrame(turbine_data)
    turbine_df = pd.concat([turbine_df, df])
    offset += limit
    resp = requests.get(f"{base_path}turbines", params=params)
    turbine_data = resp.json()

turbine_data.head()

<Response [200]>

For best practices, here's the above code as a function.

In [29]:
"""
Parameters:
    offset - record index from which API response should begin; default 0
    limit - max number of records to return in each API call; default 50
    params - dict of query parameters for API call; default None,
             initialized with default offset and limit values

Returns:
    turbine_df - pandas DataFrame object containing records for all turbines
                 corresponding to query
"""
def USWTDB_data(offset=0, limit=50, params=None):
    base_path = 'https://eersc.usgs.gov/api/uswtdb/v1/'
    if not params:
        params = {'offset': offset, 'limit': limit}
    else:
        params['offset'] = params.get('offset', offset)
        params['limit'] = params.get('limit', limit)

    turbine_df = pd.DataFrame()
    resp = requests.get(f"{base_path}turbines", params=params)
    turbine_data = resp.json()

    while len(turbine_data):
        df = pd.DataFrame(turbine_data)
        turbine_df = pd.concat([turbine_df, df])
        offset += limit
        resp = requests.get(f"{base_path}turbines", params=params)
        turbine_data = resp.json()

    return turbine_df