NPR Stations DataFrame by County
=========
---------------
Create a Dataframe including following columns:
* FIPS Code
* County Name
* State
* City
* Station Identifier
* AM/FM
* Frequency
* Twitter Account

## Import Libraries and configuration files


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as py

In [6]:
npr_stations_csv = "./Resources/nprstations.csv"
npr_stations_df = pd.read_csv(npr_stations_csv)
npr_stations_df.head()

Unnamed: 0,City,State,Station Identifier,AM/FM Number,AM/FM Number.1
0,Birmingham,Alabama,WBHM,FM,90.3
1,Dothan,Alabama,WRWA,FM,88.7
2,Gadsden,Alabama,WSGN,FM,91.5
3,Huntsville,Alabama,WJAB,FM,90.9
4,Huntsville,Alabama,WLRH,FM,89.3


In [None]:
#Variables to be used during data collection
processed = 0
notfound = 0

#Loop to collect Lat/Long for Agency Names
for (idx, row) in npr_stations_df.iterrows():
    try:
        #Store current row value for each column
        city = (row.loc['City'])
        state = (row.loc['State'])
        #URL from Google APIs where Lat/Long values will be collected
        query_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={city},{state}&key={gkey}"
        #JSON request
        response = requests.get(query_url).json()
        #Variables to store lat/long values 
        lat = response["results"][0]["geometry"]["location"]["lat"]
        lng = response["results"][0]["geometry"]["location"]["lng"]
        #Lat/Long columns created and respective values stored in current row
        npr_stations_df.at[idx, 'Latitude'] = lat
        npr_stations_df.at[idx, 'Longitude'] = lng
        #Print to verify data is processing as expected
        print(f"{city},{state}:{lat},{lng}")
        #Count to validate how many agency names have been processed
        processed += 1
    #Error handling if some of the agency names are not found
    except:
        #Print to verify when an agency is not found
        print("City not found")
        #Count to validate how many agency names were not found
        notfound += 1
        continue
#Print to validate final count for both processed and not found agency names
print(f"{processed} cities were processed. {notfound} cities were not found.")

In [None]:
#Save DataFrame to CSV File
npr_stations_df.to_csv('npr_stations_by_city.csv')

In [None]:
#Read CSV and create Dataframe
npr_by_county_csv = 'npr_stations_by_city.csv'
npr_by_county_df = pd.read_csv(npr_by_county_csv)

#URL to get FIPS code
url = 'https://geo.fcc.gov/api/census/block/find?'
#Variables to be used during data collection
processed = 0
notfound = 0

#Loop to add FIPS code and County name to Dataframe
for (idx, row) in npr_by_county_df.iterrows():
    try:
        #Store current row value for each column
        latitude = (row.loc['Latitude'])
        longitude = (row.loc['Longitude'])
        #URL to collect FIPS code and county name
        query_url = f"{url}&latitude={latitude}&longitude={longitude}&format=json"
        #JSON Request
        code = requests.get(query_url).json()
        #Variables to store required values (FIPS codes are converted to strings to keep leading zeros in the code)
        county_fips = str(code["County"]["FIPS"]).zfill(5)
        county_name = code["County"]["name"]
        block_fips = str(code["Block"]["FIPS"]).zfill(15)
        #New columns created and values collected from JSON stored in current row 
        npr_by_county_df.at[idx, 'FIPS'] = str(county_fips)
        npr_by_county_df.at[idx, 'County Name'] = county_name
        npr_by_county_df.at[idx, 'FIPS_block'] = str(block_fips)         
        #Count to validate how many records were processed
        processed += 1
        #Print to verify that information is processed
        print(f"{county_name}:{county_fips}")
    #Error handling if a location is not found
    except:
        #Print to validate if a location is not found
        print("Location Not Found")
        #Count to validate how many records were not found
        notfound += 1
        pass
#Print to validate final count for both processed and not found locations
print(f"{processed} locations were processed. {notfound} locations were not found.")


In [None]:
#Drop rows with no FIPS 
npr_by_county_df = npr_by_county_df[npr_by_county_df['FIPS'].notnull()]

#Loop to fill leading zeros on FIPS codes
for (idx, row) in npr_by_county_df.iterrows():
    fips_z = (row.loc['FIPS'])
    fips_block_z = (row.loc['FIPS_block'])
    npr_by_county_df.at[idx, 'FIPS'] = str(fips_z).zfill(5)
    npr_by_county_df.at[idx, 'FIPS_block'] = str(fips_block_z).zfill(15)

#Count duplicate counties
npr_by_county_df['duplicate_county'] = npr_by_county_df.groupby('FIPS').count()
print(npr_by_county_df['duplicate_county'])
    
#Reorder dataframe columns
npr_by_county_df = npr_by_county_df[["FIPS", "County Name", "City",  "State", "Latitude", "Longitude", "Station Identifier", "AM/FM Number", "AM/FM Number.1","Twitter Account","FIPS_block"]]
#Sort records by FIPS code
npr_by_county_df = npr_by_county_df.sort_values(by=['FIPS'])

#Show dataframe
npr_by_county_df.head(10)