NPR Stations DataFrame by County
=========
---------------
Create a Dataframe including following columns:
* FIPS Code
* County Name
* State
* City
* Station Identifier
* AM/FM
* Frequency
* Twitter Account

## Import Libraries and configuration files


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.offline as py
import requests
import json
from config import gkey

In [7]:
#Read CSV and create dataframe
npr_stations_csv = "./Resources/nprstations_sample.csv"
npr_stations_df = pd.read_csv(npr_stations_csv)


City                   object
State                  object
Station Identifier     object
AM/FM Number           object
AM/FM Number.1        float64
dtype: object

In [8]:
#Variables to be used during data collection
processed = 0
notfound = 0

#Loop to collect Lat/Long for Cities with NPR Stations
for (idx, row) in npr_stations_df.iterrows():
    try:
        #Store current row value for each column
        city = (row.loc['City'])
        state = (row.loc['State'])
        #URL from Google APIs where Lat/Long values will be collected
        query_url = "https://maps.googleapis.com/maps/api/geocode/json?address="+str(city)+","+str(state)+"&key="+str(gkey)
        #JSON request
        response = requests.get(query_url).json()
        #Variables to store lat/long values 
        lat = response["results"][0]["geometry"]["location"]["lat"]
        lng = response["results"][0]["geometry"]["location"]["lng"]
        #Lat/Long columns created and respective values stored in current row
        npr_stations_df.at[idx, 'Latitude'] = lat
        npr_stations_df.at[idx, 'Longitude'] = lng
        #Print to verify data is processing as expected
        print(str(city)+","+str(state)+":"+str(lat)+","+str(lng))
        #Count to validate how many cities have been processed
        processed += 1
    #Error handling if some of the cities are not found
    except:
        #Print to verify when a city is not found
        print("City not found")
        #Count to validate how many cities were not found
        notfound += 1
        continue
#Print to validate final count for both processed and not found cities
print(str(processed)+" cities were processed. "+str(notfound)+" cities were not found.")

Birmingham,Alabama:33.5206608,-86.80248999999999
Dothan,Alabama:31.2232313,-85.3904888
Gadsden,Alabama:34.014264,-86.0066386
Huntsville,Alabama:34.7303688,-86.5861037
Huntsville,Alabama:34.7303688,-86.5861037
Jacksonville,Alabama:33.8137125,-85.76135359999999
Mobile,Alabama:30.6953657,-88.0398912
Montgomery,Alabama:32.3668052,-86.2999689
Montgomery/Troy,Alabama:32.3762523,-86.3108893
Muscle Shoals,Alabama:34.7448112,-87.66752919999999
Selma,Alabama:32.4073589,-87.02110069999999
Tuscaloosa,Alabama:33.2098407,-87.56917349999999
Anchorage,Alaska:61.2180556,-149.9002778
Barrow,Alaska:71.29055559999999,-156.788611
Barrow,Alaska:71.29055559999999,-156.788611
Bethel,Alaska:60.7922222,-161.7558334
Chevak,Alaska:61.5277778,-165.5863889
Dillingham,Alaska:59.03972219999999,-158.4575
Fairbanks,Alaska:64.8377778,-147.7163888
Galena,Alaska:64.7333333,-156.9275
Glennallen,Alaska:62.1097214,-145.5573019
21 cities were processed. 0 cities were not found.


In [9]:
#Save DataFrame to CSV File
npr_stations_df.to_csv('npr_stations_by_city.csv')

In [10]:
#Read CSV and create Dataframe
npr_by_county_csv = 'npr_stations_by_city.csv'
npr_by_county_df = pd.read_csv(npr_by_county_csv)

#URL to get FIPS code
url = 'https://geo.fcc.gov/api/census/block/find?'
#Variables to be used during data collection
processed = 0
notfound = 0

#Loop to add FIPS code and County name to Dataframe
for (idx, row) in npr_by_county_df.iterrows():
    try:
        #Store current row value for each column
        latitude = (row.loc['Latitude'])
        longitude = (row.loc['Longitude'])
        #URL to collect FIPS code and county name
        query_url = str(url)+"&latitude="+str(latitude)+"&longitude="+str(longitude)+"&format=json"
        #JSON Request
        code = requests.get(query_url).json()
        #Variables to store required values (FIPS codes are converted to strings to keep leading zeros in the code)
        county_fips = str(code["County"]["FIPS"]).zfill(5)
        county_name = code["County"]["name"]
        block_fips = str(code["Block"]["FIPS"]).zfill(15)
        #New columns created and values collected from JSON stored in current row 
        npr_by_county_df.at[idx, 'FIPS'] = str(county_fips)
        npr_by_county_df.at[idx, 'County Name'] = county_name
        npr_by_county_df.at[idx, 'FIPS_block'] = str(block_fips)         
        #Count to validate how many records were processed
        processed += 1
        #Print to verify that information is processed
        print(str(county_name)+":"+str(county_fips))
    #Error handling if a location is not found
    except:
        #Print to validate if a location is not found
        print("Location Not Found")
        #Count to validate how many records were not found
        notfound += 1
        pass
#Print to validate final count for both processed and not found locations
print(str(processed)+" locations were processed. "+str(notfound)+" locations were not found.")


Jefferson:01073
Houston:01069
Etowah:01055
Madison:01089
Madison:01089
Calhoun:01015
Mobile:01097
Montgomery:01101
Montgomery:01101
Colbert:01033
Dallas:01047
Tuscaloosa:01125
Anchorage:02020
North Slope:02185
North Slope:02185
Bethel:02050
Wade Hampton:02270
Dillingham:02070
Fairbanks North Star:02090
Yukon-Koyukuk:02290
Valdez-Cordova:02261


TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [11]:
#Show dataframe
npr_by_county_df.head(10)

Unnamed: 0.1,Unnamed: 0,City,State,Station Identifier,AM/FM Number,AM/FM Number.1,Latitude,Longitude,FIPS,County Name,FIPS_block
0,0,Birmingham,Alabama,WBHM,FM,90.3,33.520661,-86.80249,1073,Jefferson,10730027001097
1,1,Dothan,Alabama,WRWA,FM,88.7,31.223231,-85.390489,1069,Houston,10690406001032
2,2,Gadsden,Alabama,WSGN,FM,91.5,34.014264,-86.006639,1055,Etowah,10550012001034
3,3,Huntsville,Alabama,WJAB,FM,90.9,34.730369,-86.586104,1089,Madison,10890031001024
4,4,Huntsville,Alabama,WLRH,FM,89.3,34.730369,-86.586104,1089,Madison,10890031001024
5,5,Jacksonville,Alabama,WLJS,FM,91.9,33.813713,-85.761354,1015,Calhoun,10150021012089
6,6,Mobile,Alabama,WHIL,FM,91.3,30.695366,-88.039891,1097,Mobile,10970012001266
7,7,Montgomery,Alabama,WVAS,FM,90.7,32.366805,-86.299969,1101,Montgomery,11010015003009
8,8,Montgomery/Troy,Alabama,WTSU,FM,89.9,32.376252,-86.310889,1101,Montgomery,11010001001109
9,9,Muscle Shoals,Alabama,WQPR,FM,88.7,34.744811,-87.667529,1033,Colbert,10330207042005


In [12]:
#Drop rows with no FIPS 
npr_by_county_df = npr_by_county_df[npr_by_county_df['FIPS'].notnull()]

#Loop to fill leading zeros on FIPS codes
for (idx, row) in npr_by_county_df.iterrows():
    fips_z = (row.loc['FIPS'])
    fips_block_z = (row.loc['FIPS_block'])
    npr_by_county_df.at[idx, 'FIPS'] = str(fips_z).zfill(5)
    npr_by_county_df.at[idx, 'FIPS_block'] = str(fips_block_z).zfill(15)

In [28]:
#Show row count group by FIPS code
count_fips = npr_by_county_df[['City','FIPS']]
count_fips = count_fips.groupby('FIPS').count()
count_fips = count_fips.rename(columns={'City':'Count'})
count_fips = count_fips.sort_values(by=['Count'], ascending=[False])
count_fips.head(10)


Unnamed: 0_level_0,Count
FIPS,Unnamed: 1_level_1
2185,2
1089,2
1101,2
1015,1
2020,1
2270,1
2261,1
2090,1
2070,1
2050,1


In [20]:
#Reorder dataframe columns
npr_by_county_df = npr_by_county_df[["FIPS", "County Name", "City",  "State", "Latitude", "Longitude", "Station Identifier", "AM/FM Number.1", "AM/FM Number","Twitter Account","Alternate Account","FIPS_block"]]
#Sort records by FIPS code
npr_by_county_df = npr_by_county_df.sort_values(by=['FIPS'])

#Show dataframe
npr_by_county_df.head(20)

Unnamed: 0,FIPS,County Name,City,State,Latitude,Longitude,Station Identifier,AM/FM Number.1,AM/FM Number,FIPS_block
5,1015,Calhoun,Jacksonville,Alabama,33.813713,-85.761354,WLJS,91.9,FM,10150021012089
9,1033,Colbert,Muscle Shoals,Alabama,34.744811,-87.667529,WQPR,88.7,FM,10330207042005
10,1047,Dallas,Selma,Alabama,32.407359,-87.021101,WAPR,88.3,FM,10479566003019
2,1055,Etowah,Gadsden,Alabama,34.014264,-86.006639,WSGN,91.5,FM,10550012001034
1,1069,Houston,Dothan,Alabama,31.223231,-85.390489,WRWA,88.7,FM,10690406001032
0,1073,Jefferson,Birmingham,Alabama,33.520661,-86.80249,WBHM,90.3,FM,10730027001097
3,1089,Madison,Huntsville,Alabama,34.730369,-86.586104,WJAB,90.9,FM,10890031001024
4,1089,Madison,Huntsville,Alabama,34.730369,-86.586104,WLRH,89.3,FM,10890031001024
6,1097,Mobile,Mobile,Alabama,30.695366,-88.039891,WHIL,91.3,FM,10970012001266
8,1101,Montgomery,Montgomery/Troy,Alabama,32.376252,-86.310889,WTSU,89.9,FM,11010001001109
