In [171]:
import pandas as pd
import json
import requests
import time

# Turn on Copy-on-write mode to addreww view vs copy issues
pd.options.mode.copy_on_write = True 

## Get County for all US NOAA weather stations

In [172]:
# Read in NOAA Ground Stations for US and Canada
ground_stations_US_CA = pd.read_csv('result_files/US_CA_Station_locations.csv')
ground_stations_US_CA


Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID
0,CA001010066,48.8667,-123.2833,4.0,BC,ACTIVE PASS,,,0
1,CA001010235,48.4000,-123.4833,17.0,BC,ALBERT HEAD,,,0
2,CA001010595,48.5833,-123.5167,85.0,BC,BAMBERTON OCEAN CEMENT,,,0
3,CA001010720,48.5000,-124.0000,351.0,BC,BEAR CREEK,,,0
4,CA001010774,48.5000,-123.3500,61.0,BC,BEAVER LAKE,,,0
...,...,...,...,...,...,...,...,...,...
81472,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0
81473,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0
81474,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0
81475,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0


In [173]:
# Reduce to just US stations
ground_stations_US = ground_stations_US_CA[ground_stations_US_CA['ID'].str.contains("^US")]
ground_stations_US

Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID
9188,US009052008,43.7333,-96.6333,482.0,SD,SIOUX FALLS (ENVIRON. CANADA),,,0
9189,US10RMHS145,40.5268,-105.1113,1569.1,CO,RMHS 1.6 SSW,,,0
9190,US10adam001,40.5680,-98.5069,598.0,NE,JUNIATA 1.5 S,,,0
9191,US10adam002,40.5093,-98.5493,601.1,NE,JUNIATA 6.0 SSW,,,0
9192,US10adam003,40.4663,-98.6537,615.1,NE,HOLSTEIN 0.1 NW,,,0
...,...,...,...,...,...,...,...,...,...
81472,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0
81473,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0
81474,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0
81475,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0


In [174]:
# Fix Index
ground_stations_US = ground_stations_US.reset_index()
ground_stations_US.drop(ground_stations_US.columns[0], axis=1, inplace=True)
ground_stations_US

Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID
0,US009052008,43.7333,-96.6333,482.0,SD,SIOUX FALLS (ENVIRON. CANADA),,,0
1,US10RMHS145,40.5268,-105.1113,1569.1,CO,RMHS 1.6 SSW,,,0
2,US10adam001,40.5680,-98.5069,598.0,NE,JUNIATA 1.5 S,,,0
3,US10adam002,40.5093,-98.5493,601.1,NE,JUNIATA 6.0 SSW,,,0
4,US10adam003,40.4663,-98.6537,615.1,NE,HOLSTEIN 0.1 NW,,,0
...,...,...,...,...,...,...,...,...,...
72284,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0
72285,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0
72286,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0
72287,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0


In [175]:
# Add county and FIPS columns
ground_stations_US.loc[:,'County'] = ''
ground_stations_US.loc[:,'FIPS'] = ''
ground_stations_US.loc[:,'StateName'] = ''

ground_stations_US

Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID,County,FIPS,StateName
0,US009052008,43.7333,-96.6333,482.0,SD,SIOUX FALLS (ENVIRON. CANADA),,,0,,,
1,US10RMHS145,40.5268,-105.1113,1569.1,CO,RMHS 1.6 SSW,,,0,,,
2,US10adam001,40.5680,-98.5069,598.0,NE,JUNIATA 1.5 S,,,0,,,
3,US10adam002,40.5093,-98.5493,601.1,NE,JUNIATA 6.0 SSW,,,0,,,
4,US10adam003,40.4663,-98.6537,615.1,NE,HOLSTEIN 0.1 NW,,,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
72284,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0,,,
72285,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0,,,
72286,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0,,,
72287,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0,,,


### Create an approach to save work in progress during lengthy API data gathering process (~9hrs anticipated)

In [197]:
# create a new DataFrame with one row and dummy values
# This is written to a csv file to initialize the csv file
#    This csv file is appended to for each call to the API in the next cell

county_csv_file = 'result_files/US_count_station_locations.csv'

data = { 'ID': ['DUMMY000000'],
         'Lat': [40.5268],
         'Lon': [-105.1113],
         'Elevation': [1569.1],
         'State': ['CO'],
         'Name':  ['Dummy Loc'],
         'GSN': ['GSN'],
         'HCN/CRN': ['HCN'],
         'WMO ID': [000000],
         'county': ['Dummy'],
         'FIPS': [80690011132024]
         }
df = pd.DataFrame(data)
print(df)
df.to_csv(county_csv_file, index=False)

saved_df = pd.read_csv(county_csv_file)
saved_df

            ID      Lat       Lon  Elevation State       Name  GSN HCN/CRN  \
0  DUMMY000000  40.5268 -105.1113     1569.1    CO  Dummy Loc  GSN     HCN   

   WMO ID county            FIPS  
0       0  Dummy  80690011132024  


Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID,county,FIPS
0,DUMMY000000,40.5268,-105.1113,1569.1,CO,Dummy Loc,GSN,HCN,0,Dummy,80690011132024


### Call FCC API to return county from lat lon and add to dataframe

In [198]:
# FCC API Terms of service: https://www.fcc.gov/reports-research/developers/api-terms-service
# API: https://geo.fcc.gov/api/census/block/find?latitude=39.6667&longitude=-104.0284&censusYear=2020&showall=true&format=json
# Response {"Block":{"FIPS":"080050071013020","bbox":[-104.042319,39.624051,-104.023144,39.682206]},"County":{"FIPS":"08005","name":"Arapahoe County"},"State":{"FIPS":"08","code":"CO","name":"Colorado"},"status":"OK","executionTime":"0"}

# Create the base url
base_url = 'https://geo.fcc.gov/api/census/block/find?'

# Determine the number of records to process
num_records = len(ground_stations_US)

# Loop through all the records and query the FCC for lat lon.
# Append the result to a CSV file to insure progress is recorded if any issues with the API over a long execution time
for i in range(2):

    # Get record i of dataframe
    record = ground_stations_US.iloc[i]
    print(f"{record['ID']}  {record['Lat']}  {record['Lon']}")

    record_dict = record.to_dict()
    print(f"record_dict {record_dict}")

    # update URL for lat lon
    url = f"{base_url}latitude={record['Lat']}&longitude={record['Lon']}&censusYear=2020&showall=true&format=json"

    # Get FIPS, county and state from FCC API
    response = requests.get(url)
    response_json = response.json()

#    print(json.dumps(response_json))

    # Assign fips, county and state from API response to the record
    # -- state is already in record.  However, StateName is the long name
    record.at['County']      = response_json['County']['name'].replace(' County', '')
    record.at['FIPS']        = response_json['Block']['FIPS']
    record.at['StateName']   = response_json['State']['name']

    print(f"type record: {type(record)}")
    df = pd.DataFrame(record)
    print(f"type df: {type(df)}")
    print(df)
#    print(df)


# NEED HELP HERE

    # Append the record to the csv file
    df.to_csv(county_csv_file, mode='a', header=False, index=False)

    # sleep 100ms for request throttling.  Any longer and the solution will not complete in a feasible time.
    # same as sleep 5 every 50 records.
    time.sleep(.1)
    
    if i % 50 == 0:
        print(f"processed {i} records")

# saved_df = pd.read_csv(county_csv_file)
# saved_df
    

US009052008  43.7333  -96.6333
record_dict {'ID': 'US009052008', 'Lat': 43.7333, 'Lon': -96.6333, 'Elevation': 482.0, 'State': 'SD', 'Name': 'SIOUX FALLS (ENVIRON. CANADA)', 'GSN': nan, 'HCN/CRN': nan, 'WMO ID': 0, 'County': '', 'FIPS': '', 'StateName': ''}
type record: <class 'pandas.core.series.Series'>
type df: <class 'pandas.core.frame.DataFrame'>
                                       0
ID                           US009052008
Lat                              43.7333
Lon                             -96.6333
Elevation                          482.0
State                                 SD
Name       SIOUX FALLS (ENVIRON. CANADA)
GSN                                  NaN
HCN/CRN                              NaN
WMO ID                                 0
County                         Minnehaha
FIPS                     460990101011112
StateName                   South Dakota
processed 0 records
US10RMHS145  40.5268  -105.1113
record_dict {'ID': 'US10RMHS145', 'Lat': 40.5268, 'Lon': -105

### Call FCC API to return county from lat lon and add to dataframe

In [88]:
# FCC API Terms of service: https://www.fcc.gov/reports-research/developers/api-terms-service
# API: https://geo.fcc.gov/api/census/block/find?latitude=39.6667&longitude=-104.0284&censusYear=2020&showall=true&format=json
# Response {"Block":{"FIPS":"080050071013020","bbox":[-104.042319,39.624051,-104.023144,39.682206]},"County":{"FIPS":"08005","name":"Arapahoe County"},"State":{"FIPS":"08","code":"CO","name":"Colorado"},"status":"OK","executionTime":"0"}

# Collect response info to help determine if errors.  These lists are provided since the data set will require 9 hours to run.
responses = []
new_data = []

base_url = 'https://geo.fcc.gov/api/census/block/find?'

num_records = len(ground_stations_US)


for i in range(2):
    # Get ID, Lat and Lon from record i of dataframe
    id = ground_stations_US.iloc[i,0]
    lat = ground_stations_US.iloc[i,1]
    lon = ground_stations_US.iloc[i,2]
    
    # update URL for lat lon
    url = f"{base_url}latitude={lat}&longitude={lon}&censusYear=2020&showall=true&format=json"

    # Get FIPS, county and state from FCC API
    response = requests.get(url)
    response_json = response.json()

    # Assign fips, county and state from API response
    fips = response_json['Block']['FIPS']
    county = response_json['County']['name'].replace(' County', '')
    state = response_json['State']['name']


    # add fips and county to record i
    # -- state is already in record.  So no need to add
    ground_stations_US.iloc[i,9] = county
    ground_stations_US.iloc[i,10] = fips

    # add response to responses list.  responses is maintained to provide insite into results if needed.
    # append info to new_data list.  new_data is maintained to provide insite into results
    # write 
    data = { 'ID': [id],
         'FIPS': [fips],
         'County': [county],
         'State': [state],
         'response': [response.text]
         }
    df = pd.DataFrame(data)
    df.to_csv('FCC_responses.csv', mode='a', index=False, header=False)


    # sleep 100ms for request throttling.  Any longer and the solution will not complete in a feasible time.
    # same as sleep 5 every 50 records.
    time.sleep(.1)
    
    if i % 50 == 0:
        print(f"processed {i} records")

processed 0 records


In [96]:
type(response.text)

str

In [109]:
#response_string = response.astype(str)
data = { 'ID': [id],
         'FIPS': [fips],
         'County': [county],
         'State': [state],
         'response': [response.text]
         }
df = pd.DataFrame(data)


df.to_csv('FCC_responses.csv', index=False)



data2 = pd.read_csv('FCC_responses.csv')
data2

Unnamed: 0,ID,FIPS,County,State,response
0,US10RMHS145,80690011132024,Larimer,Colorado,"{""Block"":{""FIPS"":""080690011132024"",""bbox"":[-10..."


In [75]:
ground_stations_US

Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID,county,FIPS
0,US009052008,43.7333,-96.6333,482.0,SD,SIOUX FALLS (ENVIRON. CANADA),,,0,Minnehaha,460990101011112
1,US10RMHS145,40.5268,-105.1113,1569.1,CO,RMHS 1.6 SSW,,,0,Larimer,080690011132024
2,US10adam001,40.5680,-98.5069,598.0,NE,JUNIATA 1.5 S,,,0,Adams,310019662002037
3,US10adam002,40.5093,-98.5493,601.1,NE,JUNIATA 6.0 SSW,,,0,Adams,310019662002130
4,US10adam003,40.4663,-98.6537,615.1,NE,HOLSTEIN 0.1 NW,,,0,Adams,310019662002292
...,...,...,...,...,...,...,...,...,...,...,...
72284,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0,,
72285,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0,,
72286,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0,,
72287,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0,,


In [76]:
# Write weather data to parquet file
ground_stations_US.to_parquet('result_files/us_stations_county.parquet.gzip', compression='gzip', engine="fastparquet")  

In [77]:
# Read weather paqquet file into a pandas dataframe to verify data integrity is maintained
ground_stations_US_rec = pd.read_parquet('result_files/us_stations_county.parquet.gzip', engine="fastparquet") 

In [78]:
ground_stations_US_rec

Unnamed: 0,ID,Lat,Lon,Elevation,State,Name,GSN,HCN/CRN,WMO ID,county,FIPS
0,US009052008,43.7333,-96.6333,482.0,SD,SIOUX FALLS (ENVIRON. CANADA),,,0,Minnehaha,460990101011112
1,US10RMHS145,40.5268,-105.1113,1569.1,CO,RMHS 1.6 SSW,,,0,Larimer,080690011132024
2,US10adam001,40.5680,-98.5069,598.0,NE,JUNIATA 1.5 S,,,0,Adams,310019662002037
3,US10adam002,40.5093,-98.5493,601.1,NE,JUNIATA 6.0 SSW,,,0,Adams,310019662002130
4,US10adam003,40.4663,-98.6537,615.1,NE,HOLSTEIN 0.1 NW,,,0,Adams,310019662002292
...,...,...,...,...,...,...,...,...,...,...,...
72284,USW00096405,60.4731,-145.3542,25.3,AK,CORDOVA 14 ESE,,CRN,0,,
72285,USW00096406,64.5014,-154.1297,78.9,AK,RUBY 44 ESE,,CRN,0,,
72286,USW00096407,66.5620,-159.0036,6.7,AK,SELAWIK 28 E,,CRN,0,,
72287,USW00096408,63.4519,-150.8747,678.2,AK,DENALI 27 N,,CRN,0,,
