In [150]:
import numpy as np
import pandas as pd
import math
from geopy import Nominatim
import json

""" Encapsulates data into a mapping from county to the relevant data about it. The filePath must link to a file that
can be read by panda. RelevantStatistics must be in the form of a list with elements being columns of the data table.
Data is stored in a panda dataframe"""

def extractCountyDataAsDataFrame(filePath, relevantStatistics = ["Deaths"], countyStatisticMap = {}):
    myData = pd.read_table(filePath)
    countyGroup = myData["County"].dropna()
    
    for county in countyGroup.as_matrix().tolist():
        statsToAdd = myData[myData.County == county][relevantStatistics]
        if not county in countyStatisticMap:
            countyStatisticMap[county] = statsToAdd
        else:
            countyStatisticMap[county] = pd.concat(countyStatisticMap[county], statsToAdd)
    
    return countyStatisticMap

""" Same as extractCountyDataAsDataFrame except the data is stored as a list of lists rather than a dataframe"""
def extractCountyDataAsList(filePath, relevantStatistics = ["Deaths"], countyStatisticMap = {}):
    myData = pd.read_table(filePath)
    relevantData = myData[["County"] + relevantStatistics]
    
    for infoList in relevantData.as_matrix().tolist():
        key = infoList[0]
        if not key in countyStatisticMap:
            countyStatisticMap[key] = [infoList[1::]]
        else:
            countyStatisticMap[key] = countyStatisticMap[key]+[infoList[1::]]
  #  del countyStatisticMap["nan"]
    return countyStatisticMap

""" Filters out keys and values from a dictionary"""
def filterDictionary(dictionary, keyFilter = lambda x: True, valueFilter = lambda x: True):
    filteredDictionary = {}
    for k, v in dictionary.items():
        if (valueFilter(v) and keyFilter(k)):
            filteredDictionary[k] = v
    return filteredDictionary

""" Maps functions onto keys or values of a dictionary"""
def mapDictionary(dictionary, keyMap = (lambda x: x), valueMap = (lambda x: x)):
    mappedDictionary = {}
    for k, v in dictionary.items():
        mappedDictionary[keyMap(k)] = valueMap(v)
    return mappedDictionary

"""Returns the gpsCoordinates for a place as a tuple of latitude and longitude"""
def gpsCoordinates(placeName):
    geolocator = Nominatim()
    location = geolocator.geocode(placeName)
    return (location.latitude, location.longitude)

"""Convert to json"""
def writeToJSON(fileName, dictionary):
    jsonFileToWrite = open(fileName + ".json", 'w')
    json.dump(dictionary, jsonFileToWrite)
    jsonFileToWrite.close()


niceCountyStatisticMap = mapDictionary(countyStatisticMap, valueMap = lambda v: v["Deaths"].as_matrix().tolist()[0])
niceCountyStatisticMap = filterDictionary(niceCountyStatisticMap, valueFilter = lambda v: v != "Suppressed")
niceCountyStatisticMap = mapDictionary(niceCountyStatisticMap, valueMap= lambda v: int(v))

#writeToJSON("DeathsByCountyUnsuppressed", niceCountyStatisticMap)
countyStatisticMap = extractCountyData("PoisoningDeaths2010_2015.txt", ["Deaths", "Crude Rate"]);
niceCountyStatisticMap;

In [123]:
countyStatisticMap = extractCountyData("PoisoningDeaths2010_2015.txt");
#list(map(gpsCoordinates, list(countyStatisticMap.keys())[:10]));
import json
dictionary = {
    "(2, 3, 5)": 0,
    "(3, 4, 5)": 1
          }
jsonTestFile = open("jsonTestFile.json", 'w')
json.dump(dictionary, jsonTestFile)
jsonTestFile.close()

jsonTestFile = open("jsonTestFile.json", 'r')
dict2 = json.load(jsonTestFile)
jsonTestFile.close()

dict2

{'(2, 3, 5)': 0, '(3, 4, 5)': 1}

In [126]:
"""Create mapping of countyName to gpsCoordinates"""
#countyGPSMap = {}
#countyStatisticMap = extractCountyData("PoisoningDeaths2010_2015.txt")
countyNames = list(countyStatisticMap.keys())
for countyName in countyNames:
    if not countyName in countyGPSMap:
        try:
            countyGPSMap[countyName] = gpsCoordinates(countyName)
        except:
            print(countyName)

Northwest Arctic Borough, AK
Petersburg Borough/Census Area, AK
Prince of Wales-Outer Ketchikan Census Area, AK
Southeast Fairbanks Census Area, AK
Wrangell-Petersburg Census Area, AK
De Kalb County, IN
Prince George's County, MD
Queen Anne's County, MD
St. Mary's County, MD
Somerset County, MD
Talbot County, MD
Washington County, MD
Wicomico County, MD
Worcester County, MD
Baltimore city, MD
Barnstable County, MA
Berkshire County, MA
Bristol County, MA
Dukes County, MA
Essex County, MA
Franklin County, MA
Hampden County, MA
Hampshire County, MA
Middlesex County, MA
Nantucket County, MA
Norfolk County, MA
Plymouth County, MA
Suffolk County, MA
Worcester County, MA
Alcona County, MI
Alger County, MI
Allegan County, MI
Alpena County, MI
Antrim County, MI
Arenac County, MI
Baraga County, MI
Barry County, MI
Bay County, MI
Benzie County, MI
Berrien County, MI
Branch County, MI
Calhoun County, MI
Cass County, MI
Charlevoix County, MI
Cheboygan County, MI
Chippewa County, MI
Clare County, MI

In [169]:
100.0*len(list(countyGPSMap.keys()))/len(list(countyStatisticMap.keys()))
#geolocator = Nominatim()
#geolocator.geocode("Alabama")
#testArray = np.array([[3,4],[5,6]], np.int32)
#testArray

#myData = pd.read_table("PoisoningDeaths2010_2015.txt")
#myData['County Code']
#myData[myData.County_Code == "Autauga County, AL"][["Deaths", "Population", "Crude Rate"]]
#myData["County"].dropna()

#countyStatisticMap["Autauga County, AL"]["Deaths"].as_matrix()

def fixString(x):
    x=str(int(x))
    while (len(x)<5):
        x = "0" + x
    return x

#countyToCode = mapDictionary(countyToCode, valueMap=fixString)
countyToCode

#countyToCode = extractCountyDataAsList("PoisoningDeaths2010_2015.txt", ["County Code"])

{'Autauga County, AL': '01001',
 'Baldwin County, AL': '01003',
 'Barbour County, AL': '01005',
 'Bibb County, AL': '01007',
 'Blount County, AL': '01009',
 'Bullock County, AL': '01011',
 'Butler County, AL': '01013',
 'Calhoun County, AL': '01015',
 'Chambers County, AL': '01017',
 'Cherokee County, AL': '01019',
 'Chilton County, AL': '01021',
 'Choctaw County, AL': '01023',
 'Clarke County, AL': '01025',
 'Clay County, AL': '01027',
 'Cleburne County, AL': '01029',
 'Coffee County, AL': '01031',
 'Colbert County, AL': '01033',
 'Conecuh County, AL': '01035',
 'Coosa County, AL': '01037',
 'Covington County, AL': '01039',
 'Crenshaw County, AL': '01041',
 'Cullman County, AL': '01043',
 'Dale County, AL': '01045',
 'Dallas County, AL': '01047',
 'DeKalb County, AL': '01049',
 'Elmore County, AL': '01051',
 'Escambia County, AL': '01053',
 'Etowah County, AL': '01055',
 'Fayette County, AL': '01057',
 'Franklin County, AL': '01059',
 'Geneva County, AL': '01061',
 'Greene County, AL'

In [171]:
writeToJSON("CountyNameToCode", countyToCode)