In [1]:
import folium
import pandas as pd

%matplotlib inline

### Pull in data

In [2]:
discharges = pd.read_csv('thcic-discharges-by-dischargeqtr_county_msdrg.csv')
discharges.columns = discharges.columns.str.lower()

In [3]:
discharges.tail()

Unnamed: 0,discharge_qtr,county,hcfa_drg,count
487624,2010Q2,361,91,1
487625,2010Q2,53,340,3
487626,2010Q2,397,337,1
487627,2010Q2,91,862,1
487628,2010Q2,181,539,5


### Clean up dataframe

In [4]:
# pd.read_csv reads county as an int, convert to federal FIPS (Texas format 48XXX)
discharges['county'] = discharges['county'].map(lambda state_code: '48' + str(state_code).zfill(3))

In [5]:
discharges['year'] = discharges['discharge_qtr'].str.slice(0, 4)

In [6]:
discharges.tail()

Unnamed: 0,discharge_qtr,county,hcfa_drg,count,year
487624,2010Q2,48361,91,1,2010
487625,2010Q2,48053,340,3,2010
487626,2010Q2,48397,337,1,2010
487627,2010Q2,48091,862,1,2010
487628,2010Q2,48181,539,5,2010


### Group by year and map discharges for each county

In [7]:
discharges_by_yr_county = pd.DataFrame(discharges.groupby(['year', 'county'])['count'].sum())

In [8]:
discharges_by_yr_county.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
year,county,Unnamed: 2_level_1
2008,48029,1
2008,48037,95
2008,48067,17
2008,48077,5
2008,48085,4


In [9]:
year = 2009

In [10]:
discharges_to_map = (discharges_by_yr_county
                         .query(f'year == "{year}"')
                         .reset_index())

In [11]:
discharges_to_map['count'].describe()

count       254.000000
mean      11416.724409
std       39506.583058
min          13.000000
25%         680.000000
50%        1882.500000
75%        6838.750000
max      449334.000000
Name: count, dtype: float64

In [12]:
us_county_geo = r'us-counties.json'

In [13]:
def visualize_year(year):
    discharges_to_map = (discharges_by_yr_county
                             .query(f'year == "{year}"')
                             .reset_index())
    
    map = folium.Map(location=[31.8, -97.7431], zoom_start=6)
    
    map.choropleth(geo_path=us_county_geo,
                   data=discharges_to_map,
                   columns=['county', 'count'],
                   key_on='feature.id',
                   fill_color='YlGnBu',
                   line_opacity=0.3,
                   highlight=True,
                   threshold_scale=[0, 5000, 20000, 250000, 500000])
    
    return map

In [14]:
visualize_year(2009)

In [15]:
visualize_year(2010)

---

### What can we conclude?

Nothing.... this was a simple exercise to connect dots. We aren't trying to do any kind of analysis here.

<img src="xkcd_population_heatmap.png" width=500>