In [1]:
# import dependencies
import pandas as pd
import numpy as np
import os

# creating path to CSV output
povertyCSV = (os.path.join('povertyCSV.csv'))

In [2]:
# pull in original CSV from FRED
# CSV pulled from https://geofred.stlouisfed.org/map/?th=pubugn&cc=5&rc=false&im=fractile&sb&lng=-49.6&lat=45.5&zm=2&sl&sv&am=Average&at=Not%20Seasonally%20Adjusted,%20Annual,%20Percent&dt=2015-01-01&fq=Annual&rt=county&sti=150203&un=lin
povCSVPath = 'Poverty%ByUSCounty.csv'
povData = pd.read_csv(povCSVPath)
stateCentroidPath = 'state_centroids.csv'
centroidData = pd.read_csv(stateCentroidPath)

# preview CSVs
#povData.head()
#centroidData.head()

In [3]:
# turning CSV into a dataframe
povDF = pd.DataFrame(povData, columns = ['County', 'State', '2012', '2013', '2014', '2015', '2016', '2017'])
centroidDF = pd.DataFrame(centroidData, columns = ['State', 'Latitude', 'Longitude'])
# preview DataFrames                          
#centroidDF.head()
#povDF.head()

In [4]:
# create a dictionary for each of the state abbreviations so this can be merged with CDC data flawlessly
abbrevToFullName = {'AL': 'Alabama', 'MT': 'Montana' , 'AK': 'Alaska', 'NE': 'Nebraska',
                    'AZ': 'Arizona', 'NV': 'Nevada','AR': 'Arkansas', 'NH': 'New Hampshire',
                    'CA': 'California', 'NJ': 'New Jersey', 'CO': 'Colorado', 'NM': 'New Mexico',
                    'CT': 'Connecticut', 'NY': 'New York', 'DE': 'Delaware', 'NC': 'North Carolina',
                    'FL': 'Florida', 'ND': 'North Dakota', 'GA': 'Georgia', 'OH': 'Ohio',
                    'HI': 'Hawaii', 'OK': 'Oklahoma', 'ID': 'Idaho', 'OR': 'Oregon', 'IL': 'Illinois',
                    'PA': 'Pennsylvania', 'IN': 'Indiana', 'RI': 'Rhode Island', 'IA': 'Iowa', 'SC': 'South Carolina',
                    'KS': 'Kansas', 'SD': 'South Dakota', 'KY': 'Kentucky', 'TN': 'Tennessee', 'LA': 'Louisiana',
                    'TX': 'Texas', 'ME': 'Maine', 'UT': 'Utah', 'MD': 'Maryland', 'VT': 'Vermont', 
                    'MA': 'Massachusetts', 'VA': 'Virginia', 'MI': 'Michigan', 'WA': 'Washington',
                    'MN': 'Minnesota', 'WV': 'West Virginia', 'MS': 'Mississippi', 'WI': 'Wisconsin', 
                    'MO': 'Missouri', 'WY': 'Wyoming', 'DC': 'Washington D.C.'}

# add a space in front of each key in the dictionary, original CSV has a SPACE before each abbreviation
abbrevToFullNameWithSpaces = {(" " + abbrev):full for abbrev, full in abbrevToFullName.items()}

# show adjusted dictionary with spaces added
#print(abbrevToFullNameWithSpaces)

In [5]:
# map full state name into the CSV by pairing full state name from dictionary with the state abbreviation
povDF['Full State Name'] = povDF['State'].map(abbrevToFullNameWithSpaces)
# preview dataframe with fullstate added
#povDF.head()

In [6]:
# set up what we want to new order to be in the revised dataframe
newOrder = ['Full State Name', 'State', 'County', '2012', '2013', '2014', '2015', '2016', '2017']
# reorder columns in new data frame
povertyRates = povDF.reindex(columns=newOrder)
# preview final dataFrame
povertyRates.head()

Unnamed: 0,Full State Name,State,County,2012,2013,2014,2015,2016,2017
0,Alaska,AK,"Aleutians East Borough, AK",16.5,16.7,16.4,16.8,15.5,16.7
1,Alaska,AK,"Aleutians West CA, AK",10.5,9.2,8.9,9.1,7.7,7.5
2,Alaska,AK,"Anchorage Muny, AK",7.7,7.9,8.3,8.2,8.1,8.1
3,Alaska,AK,"Bethel CA, AK",21.8,22.8,23.7,25.2,26.6,27.0
4,Alaska,AK,"Bristol Bay Borough, AK",6.2,7.9,7.2,7.2,7.3,7.1


In [7]:
groupedPovDF = povertyRates.groupby('Full State Name')
medianPovRate2012 = groupedPovDF['2012'].median()
medianPovRate2013 = groupedPovDF['2013'].median()
medianPovRate2014 = groupedPovDF['2014'].median()
medianPovRate2015 = groupedPovDF['2015'].median()
medianPovRate2016 = groupedPovDF['2016'].median()
medianPovRate2017 = groupedPovDF['2017'].median()

groupMedianDF = pd.DataFrame({'2012': medianPovRate2012,
                              '2013': medianPovRate2013,
                              '2014': medianPovRate2014, 
                              '2015': medianPovRate2015,
                              '2016': medianPovRate2016, 
                              '2017': medianPovRate2017
                             })
# preview table of all states with median poverty rates
groupMedianDF.to_csv(os.path.join('medianStates.csv'))
groupMedianDF.head()


Unnamed: 0_level_0,2012,2013,2014,2015,2016,2017
Full State Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,19.8,20.9,20.5,20.2,20.3,19.3
Alaska,10.5,10.25,10.2,11.1,10.35,10.4
Arizona,20.2,19.4,19.9,19.8,20.5,19.7
Arkansas,20.4,21.5,21.1,21.0,20.6,19.8
California,15.5,16.65,16.7,16.6,16.25,15.25


In [8]:
# add lat lon columns from the state centroid CSV
# Merge the datasets using the sate columns
povRateWithCentroids = pd.merge(groupMedianDF, centroidDF, how='left', left_on='Full State Name', right_on='State')
povRateWithCentroids = povRateWithCentroids[['State', 'Latitude', 'Longitude', '2012', '2013', '2014', '2015', '2016', '2017']]

# Save the updated dataframe as a csv
povRateWithCentroids.to_csv(os.path.join('povertyRateWithCentroids.csv'))
povRateWithCentroids.head()

Unnamed: 0,State,Latitude,Longitude,2012,2013,2014,2015,2016,2017
0,Alabama,32.7794,-86.8287,19.8,20.9,20.5,20.2,20.3,19.3
1,Alaska,64.0685,-152.2782,10.5,10.25,10.2,11.1,10.35,10.4
2,Arizona,34.2744,-111.6602,20.2,19.4,19.9,19.8,20.5,19.7
3,Arkansas,34.8938,-92.4426,20.4,21.5,21.1,21.0,20.6,19.8
4,California,37.1841,-119.4696,15.5,16.65,16.7,16.6,16.25,15.25
