In [4]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests

import json
from pprint import pprint

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Call the USGS turbine database API

## Add the response to a DataFrame

In [5]:
base_url="https://eersc.usgs.gov/api/uswtdb/v1/turbines"

In [6]:
response = requests.get(base_url).json()

In [7]:
df = pd.DataFrame(response)
df

Unnamed: 0,case_id,faa_ors,faa_asn,usgs_pr_id,t_state,t_county,t_fips,p_name,p_year,p_tnum,...,t_ttlh,t_conf_atr,t_conf_loc,t_img_date,t_img_srce,xlong,ylat,eia_id,retrofit,retrofit_year
0,3001247,,,5115.0,CA,Kern County,6029,251 Wind,1987.0,194,...,,2,3,5/8/2018,Digital Globe,-118.36819,35.07579,52161.0,0,
1,3009410,,,5116.0,CA,Kern County,6029,251 Wind,1987.0,194,...,,2,3,5/8/2018,Digital Globe,-118.36809,35.07589,52161.0,0,
2,3072670,,,5112.0,CA,Kern County,6029,251 Wind,1987.0,194,...,,2,3,5/8/2018,Digital Globe,-118.36841,35.07554,52161.0,0,
3,3072663,,,5114.0,CA,Kern County,6029,251 Wind,1987.0,194,...,,2,3,5/8/2018,Digital Globe,-118.36820,35.07570,52161.0,0,
4,3072661,,,5149.0,CA,Kern County,6029,251 Wind,1987.0,194,...,,2,3,5/8/2018,Digital Globe,-118.36376,35.07791,52161.0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68787,3108971,,2020-WTW-3724-OE,,TX,Zapata County,48505,unknown Zapata County,2021.0,41,...,,1,3,2/6/2021,Digital Globe,-99.29768,27.09550,,0,
68788,3109386,,2020-WTW-3723-OE,,TX,Zapata County,48505,unknown Zapata County,2020.0,41,...,,1,3,2/6/2021,Digital Globe,-99.28698,27.09672,,0,
68789,3106411,48-174404,2019-WTW-10988-OE,,TX,Zapata County,48505,unknown Zapata County,2020.0,41,...,,1,3,2/6/2021,Digital Globe,-99.28335,27.15668,,0,
68790,3109758,,2019-WTW-10984-OE,,TX,Zapata County,48505,unknown Zapata County,2020.0,41,...,,1,3,2/6/2021,Digital Globe,-99.32201,27.12558,,0,


# Remove irrelevant columns for analysis

In [8]:
df.keys()

Index(['case_id', 'faa_ors', 'faa_asn', 'usgs_pr_id', 't_state', 't_county',
       't_fips', 'p_name', 'p_year', 'p_tnum', 'p_cap', 't_manu', 't_model',
       't_cap', 't_hh', 't_rd', 't_rsa', 't_ttlh', 't_conf_atr', 't_conf_loc',
       't_img_date', 't_img_srce', 'xlong', 'ylat', 'eia_id', 'retrofit',
       'retrofit_year'],
      dtype='object')

In [9]:
df = df.drop(columns=['case_id','faa_ors','faa_asn','usgs_pr_id','t_fips','t_rsa','t_cap','t_ttlh','t_conf_atr', 't_conf_loc','t_img_date', 't_img_srce','eia_id', 'retrofit','retrofit_year'])

df

Unnamed: 0,t_state,t_county,p_name,p_year,p_tnum,p_cap,t_manu,t_model,t_hh,t_rd,xlong,ylat
0,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36819,35.07579
1,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36809,35.07589
2,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36841,35.07554
3,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36820,35.07570
4,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36376,35.07791
...,...,...,...,...,...,...,...,...,...,...,...,...
68787,TX,Zapata County,unknown Zapata County,2021.0,41,,,,,,-99.29768,27.09550
68788,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.28698,27.09672
68789,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.28335,27.15668
68790,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.32201,27.12558


In [10]:
df = df.rename(columns={'t_state':'State','t_county':'County','p_name':'Wind Farm Name','p_year': 'Year',
                       'p_tnum':'Turbine Count','p_cap':'WF Total MW','t_manu':'Turbine Manufacturer',
                       't_model':'Turbine Model','t_hh': 'Turbine Hub Height','t_rd':'Rotor Diameter','xlong':'lng','ylat':'lat'})

In [11]:
df

Unnamed: 0,State,County,Wind Farm Name,Year,Turbine Count,WF Total MW,Turbine Manufacturer,Turbine Model,Turbine Hub Height,Rotor Diameter,lng,lat
0,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36819,35.07579
1,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36809,35.07589
2,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36841,35.07554
3,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36820,35.07570
4,CA,Kern County,251 Wind,1987.0,194,18.43,Vestas,,,,-118.36376,35.07791
...,...,...,...,...,...,...,...,...,...,...,...,...
68787,TX,Zapata County,unknown Zapata County,2021.0,41,,,,,,-99.29768,27.09550
68788,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.28698,27.09672
68789,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.28335,27.15668
68790,TX,Zapata County,unknown Zapata County,2020.0,41,,,,,,-99.32201,27.12558


In [15]:
#Clean data frame for wind farms built from 2000 forward
modern_WF = df.loc[df['Year']>=2000.0]

In [16]:
#Clean unkown wind farm

#unknown = modern_WF.loc[modern_WF['Wind Farm Name']=='unkown',]

#For i, row in modern_WF.iterrows()
    
#clean_modern_WF = modern_WF['WF Total MW'].dropna()

clean_modern_WF
index_value = modern_WF[modern_WF['Wind Farm Name']=='251 Wind'].index
print(clean_modern_WF)

#if len(index_value) > 0:
#    clean_modern_WF = modern_WF.drop(index=modern_WF.index[index_value])
#    print(clean_modern_WF)



14       30.0
15       30.0
16       30.0
17       30.0
18       30.0
         ... 
68260    35.4
68261    35.4
68262    35.4
68263    35.4
68264    35.4
Name: WF Total MW, Length: 60884, dtype: float64


In [None]:
states = ['AL','AK','AZ','AR','CA','CO','CT','DE','DC','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']
