In [1]:
# Dependencies
%matplotlib inline

import pandas as pd
import os
import matplotlib as plt
import numpy as np

In [2]:
file = pd.read_csv("./Data Sources/hrsa_grant_data.csv")
file.head()

Unnamed: 0,DP_PRIMARY_ID,Grantee Name,Grant Number,HRSA Key Program,County Name,State Abbreviation,Grant Activity Code,Program Name,Financial Assistance,Award Year,...,Grant Project Period End Date,HHS Region,U.S. Senator Name (1),U.S. Senator Name (2),U.S. Congressional Representative Name,In U.S. - Mexico Border 100 Kilometer Area,In U.S. - Mexico Border County,Rural Status,Longitude,Latitude
0,A03HP15137,Florida International University,A03HP15137,Health Workforce,Miami-Dade County,FL,A03,Public Health Traineeship (A03),30918.0,2012,...,6/30/2013,Region IV,Bill Nelson,Marco Rubio,Carlos Curbelo,No,No,No,-80.376423,25.761029
1,A03HP15139,"REGENTS OF THE UNIVERSITY OF CALIFORNIA, THE",A03HP15139,Health Workforce,Alameda County,CA,A03,Public Health Traineeship (A03),25952.0,2012,...,6/30/2013,Region IX,Dianne Feinstein,Kamala D. Harris,Barbara Lee,No,No,No,-122.268031,37.870205
2,A03HP15140,"UNIVERSITY OF CALIFORNIA, LOS ANGELES",A03HP15140,Health Workforce,Los Angeles County,CA,A03,Public Health Traineeship (A03),70156.0,2012,...,6/30/2013,Region IX,Dianne Feinstein,Kamala D. Harris,Ted Lieu,No,No,No,-118.443371,34.058953
3,A03HP24225,UNIVERSITY OF FLORIDA,A03HP24225,Health Workforce,Alachua County,FL,A03,Public Health Traineeship (A03),12914.0,2012,...,6/30/2013,Region IV,Bill Nelson,Marco Rubio,Ted S. Yoho,No,No,No,-82.342084,29.649728
4,A03HP24226,Research Foundation Of The City University Of ...,A03HP24226,Health Workforce,New York County,NY,A03,Public Health Traineeship (A03),26076.0,2012,...,6/30/2013,Region II,Charles E. Schumer,Kirsten E. Gillibrand,Carolyn B. Maloney,No,No,No,-73.965305,40.768781


In [21]:
column_names = list(file)
column_names

['DP_PRIMARY_ID',
 'Grantee Name',
 'Grant Number',
 'HRSA Key Program',
 'County Name',
 'State Abbreviation',
 'Grant Activity Code',
 'Program Name',
 'Financial Assistance',
 'Award Year',
 'DUNS Number',
 'Address',
 'City',
 'ZIP Code',
 'Program Director Name',
 'Program Director Phone Number',
 'Program Director Email',
 'Abstract',
 'Grant Project Period End Date',
 'HHS Region',
 'U.S. Senator Name (1)',
 'U.S. Senator Name (2)',
 'U.S. Congressional Representative Name',
 'In U.S. - Mexico Border 100 Kilometer Area',
 'In U.S. - Mexico Border County',
 'Rural Status',
 'Longitude',
 'Latitude']

In [26]:
keyprogram_lst = list(file["HRSA Key Program"].unique())
keyprogram_lst

['Health Workforce',
 'Maternal and Child Health',
 'Rural Health',
 'Healthcare Systems',
 'Primary Health Care',
 'HIV/AIDS',
 'Office of the Administrator']

In [24]:
county_lst = list(file["County Name"].unique())
county_lst[0:5]

['Miami-Dade County',
 'Alameda County',
 'Los Angeles County',
 'Alachua County',
 'New York County']

In [25]:
state_lst = list(file["State Abbreviation"].unique())
state_lst[0:5]

['FL', 'CA', 'NY', 'LA', 'GA']

In [20]:
program_lst = list(file["Program Name"].unique())
program_lst[0:5]

['Public Health Traineeship (A03)',
 'Advanced Education Nursing Traineeship (A10)',
 'Maternal and Child Health Services (B04)',
 'Nurse Anesthetist Traineeships (A22)',
 'Health Careers Opportunity Program (D18)']

In [4]:
award_year_lst = list(file["Award Year"].unique())
award_year_lst = pd.Series([int(x) for x in award_year_lst]).sort_values().reset_index()
award_year_lst = list(pd.Series(award_year_lst[0]))
award_year_lst

[2012, 2013, 2014, 2015, 2016, 2017, 2018]

In [30]:
region_lst = list(file["HHS Region"].unique())
region_lst

['Region IV',
 'Region IX',
 'Region II',
 'Region VI',
 'Region I',
 'Region V',
 'Region III',
 'Region VIII',
 'Region VII',
 'Region X',
 'Not Determined']

In [33]:
senator1_lst = list(file["U.S. Senator Name (1)"].unique())
senator2_lst = list(file["U.S. Senator Name (2)"].unique())

senator_lst = senator1_lst + senator2_lst
senator_lst[0:5]

['Bill Nelson',
 'Dianne Feinstein',
 'Charles E. Schumer',
 'Bill Cassidy',
 'David Perdue']

In [34]:
congress_lst = list(file["U.S. Congressional Representative Name"].unique())
congress_lst[0:5]

['Carlos Curbelo',
 'Barbara Lee',
 'Ted Lieu',
 'Ted S. Yoho',
 'Carolyn B. Maloney']

In [6]:
lon = list(file["Longitude"])
lat = list(file["Latitude"])
latlon_dict = pd.DataFrame({
    "latitude":lat,
    "longitude":lon
})
latlon_dict.head()

Unnamed: 0,latitude,longitude
0,25.761029,-80.376423
1,37.870205,-122.268031
2,34.058953,-118.443371
3,29.649728,-82.342084
4,40.768781,-73.965305


In [7]:
award_by_loc = file[["Financial Assistance", "Latitude", "Longitude"]].apply(pd.to_numeric)
award_by_loc.head()

Unnamed: 0,Financial Assistance,Latitude,Longitude
0,30918.0,25.761029,-80.376423
1,25952.0,37.870205,-122.268031
2,70156.0,34.058953,-118.443371
3,12914.0,29.649728,-82.342084
4,26076.0,40.768781,-73.965305


In [17]:
# Create awards by state for each available year

summary_dict = {"Award Year" : "mean", "Financial Assistance" : "sum"}

award_by_state_2012 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2012]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2013 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2013]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2014 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2014]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2015 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2015]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2016 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2016]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2017 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2017]\
.groupby(["State Abbreviation"]).agg(summary_dict)

award_by_state_2018 = file[["Award Year", "State Abbreviation", "Financial Assistance"]][ file["Award Year"] == 2018]\
.groupby(["State Abbreviation"]).agg(summary_dict)