In [2]:
# purpose of this notebook is to walk through how to grab the data we are interested
# in for the purpose of the project

In [3]:
import pandas as pd
import requests
import json 
import datetime


In [4]:
# purpose is to create a variable that stores todays date
today = datetime.date.today()

# formatting date as MM_YYYY
todays_date = today.strftime('%m_%Y')

# checking out if we did this successfully
print(todays_date)

12_2023


In [5]:
def grab_data_by_state(state, pls_save=False):
    '''
    Purpose of this function is to grab wind turbing data by statem which is the
    input that the user will provide
    '''
    # purpose is to create a variable that stores todays date
    today = datetime.date.today()

    # formatting date as MM_YYYY
    todays_date = today.strftime('%m_%Y')

    # base path that will be the first part of all url requests
    base_path = 'https://eersc.usgs.gov/api/uswtdb/v1/'

    # url segement that allows us to grab data by state
    by_state = f'turbines?&t_state=eq.{state}'

    # purpose is to walk through how to convert json --> dataframe
    url = base_path + by_state
    response = requests.get(url=url)
    json_data = json.loads(response.content)

    # now we want to convert into a pandas df
    df = pd.DataFrame(data=json_data)

    # if a save == True, then we will create a save path and store the file as xlsx
    if pls_save == True:

        # outputting data as a csv file
        save_path = f'data_files/{state}_{todays_date}.xlsx'
        df.to_excel(save_path)

        print('File saved and processed!')

    else:

        print('File saved only.')

    return df


df = grab_data_by_state(state='OK', pls_save=True)

print(df)


File saved and processed!
      case_id    faa_ors           faa_asn  usgs_pr_id t_state      t_county  \
0     3123985  40-098167  2021-WTW-7918-OE         NaN      OK  Ellis County   
1     3123544  40-097504  2021-WTW-7909-OE         NaN      OK  Ellis County   
2     3123887  40-097762  2021-WTW-7895-OE         NaN      OK  Ellis County   
3     3123765  40-097546  2021-WTW-7863-OE         NaN      OK  Ellis County   
4     3123814  40-097528  2021-WTW-7897-OE         NaN      OK  Ellis County   
...       ...        ...               ...         ...     ...           ...   
5517  3124481  40-098727   2021-WTW-796-OE         NaN      OK  Woods County   
5518  3113244  40-088121  2021-WTW-2261-OE         NaN      OK  Woods County   
5519  3123934       None  2019-WTW-8528-OE         NaN      OK  Woods County   
5520  3113250  40-088131  2021-WTW-2408-OE         NaN      OK  Woods County   
5521  3124478  40-098732   2021-WTW-794-OE         NaN      OK  Woods County   

     t_fips  

Now, we want to create a data processing function

In [18]:
# creating a data processing df
def process_data(dataframe):

    '''
    Purpose of this function is to clean up our data.
    '''
    dataframe['p_name'] = dataframe['p_name'].values.astype(str)
    dataframe.drop(dataframe[dataframe.p_name.str.contains('unknown', case=False)].index)

    return dataframe


df_test = grab_data_by_state(state='TX', pls_save=True)

processed_df = process_data(dataframe=df_test)

print(processed_df)


File saved and processed!
       case_id    faa_ors           faa_asn  usgs_pr_id t_state  \
0      3107778  48-176902  2019-WTW-5128-OE         NaN      TX   
1      3107780  48-176802  2019-WTW-5126-OE         NaN      TX   
2      3108495  48-178030  2019-WTW-9764-OE         NaN      TX   
3      3107338  48-176903  2019-WTW-5156-OE         NaN      TX   
4      3107221  48-178001  2019-WTW-9765-OE         NaN      TX   
...        ...        ...               ...         ...     ...   
18691  3127906  48-192193  2019-WTW-5932-OE         NaN      TX   
18692  3126207  48-192199  2019-WTW-5924-OE         NaN      TX   
18693  3126250  48-192142  2019-WTW-5864-OE         NaN      TX   
18694  3126933  48-192143  2019-WTW-5860-OE         NaN      TX   
18695  3040944  48-024978   2011-WTW-352-OE     41364.0      TX   

               t_county t_fips                  p_name  p_year  p_tnum  ...  \
0           Kent County  48263                 Amadeus  2020.0      96  ...   
1          