#### Comparing HepVu death data vs CDC wonder death data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import itertools
from itertools import combinations
from scipy import spatial
import pickle as pickle
import gudhi
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

In [2]:
import warnings

# Ignore FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
cdc_df = gpd.read_file('./data/processed data/SVI2020 WashingtonDC counties with death rate/SVI2020_WashingtonDC_counties_with_death_rate.shp')

In [4]:
hepvu_df = gpd.read_file('./data/processed data//SVI2020 WashingtonDC counties with death rate HepVu/SVI2020_WashingtonDC_counties_with_death_rate_HepVu.shp')

In [5]:
cdc_df.head(3)

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,MP_NHPI,EP_TWOMORE,MP_TWOMORE,EP_OTHERRA,MP_OTHERRA,Shape_Leng,Shape_Area,Deaths,od_deaths_,geometry
0,11,District of Columbia,DC,11001,District of Columbia,11001,"District of Columbia, District of Columbia",61.126172,701974,0,...,0.1,2.9,0.2,0.5,0.1,0.645734,0.018374,432.0,0.615407,"POLYGON ((-77.11976 38.93434, -77.11253 38.940..."
1,24,Maryland,MD,24009,Calvert,24009,"Calvert County, Maryland",213.189517,92094,0,...,0.1,3.8,0.6,0.1,0.2,1.39213,0.06361,33.0,0.35833,"POLYGON ((-76.70121 38.71276, -76.69914 38.715..."
2,24,Maryland,MD,24017,Charles,24017,"Charles County, Maryland",457.823473,161448,0,...,0.1,4.7,0.7,0.4,0.2,2.164637,0.129562,60.0,0.371637,"POLYGON ((-77.27382 38.48356, -77.27240 38.489..."


In [6]:
hepvu_df.head(3)

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,EP_OTHERRA,MP_OTHERRA,Shape_Leng,Shape_Area,GEO ID,State Abbr,County Nam,OP_Rate_20,NOD_Rate_2,geometry
0,11,District of Columbia,DC,11001,District of Columbia,11001,"District of Columbia, District of Columbia",61.126172,701974,0,...,0.5,0.1,0.645734,0.018374,11001,DC,District of Col,32.6,58.3,"POLYGON ((-77.11976 38.93434, -77.11253 38.940..."
1,24,Maryland,MD,24009,Calvert,24009,"Calvert County, Maryland",213.189517,92094,0,...,0.1,0.2,1.39213,0.06361,24009,MD,Calvert County,45.4,34.5,"POLYGON ((-76.70121 38.71276, -76.69914 38.715..."
2,24,Maryland,MD,24017,Charles,24017,"Charles County, Maryland",457.823473,161448,0,...,0.4,0.2,2.164637,0.129562,24017,MD,Charles County,36.9,33.1,"POLYGON ((-77.27382 38.48356, -77.27240 38.489..."


In [7]:
# add "Deaths" and "od_deaths_" columns from cdc_df to hepvu_df by matching "FIPS" columns
hepvu_cdc_df = hepvu_df.merge(cdc_df[['FIPS', 'Deaths', 'od_deaths_']], on='FIPS', how='left')

In [8]:
hepvu_cdc_df

Unnamed: 0,ST,STATE,ST_ABBR,STCNTY,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,Shape_Leng,Shape_Area,GEO ID,State Abbr,County Nam,OP_Rate_20,NOD_Rate_2,geometry,Deaths,od_deaths_
0,11,District of Columbia,DC,11001,District of Columbia,11001,"District of Columbia, District of Columbia",61.126172,701974,0,...,0.645734,0.018374,11001,DC,District of Col,32.6,58.3,"POLYGON ((-77.11976 38.93434, -77.11253 38.940...",432.0,0.615407
1,24,Maryland,MD,24009,Calvert,24009,"Calvert County, Maryland",213.189517,92094,0,...,1.39213,0.06361,24009,MD,Calvert County,45.4,34.5,"POLYGON ((-76.70121 38.71276, -76.69914 38.715...",33.0,0.35833
2,24,Maryland,MD,24017,Charles,24017,"Charles County, Maryland",457.823473,161448,0,...,2.164637,0.129562,24017,MD,Charles County,36.9,33.1,"POLYGON ((-77.27382 38.48356, -77.27240 38.489...",60.0,0.371637
3,24,Maryland,MD,24021,Frederick,24021,"Frederick County, Maryland",660.590349,255955,0,...,2.120478,0.180981,24021,MD,Frederick Count,56.6,29.2,"POLYGON ((-77.67716 39.32453, -77.67637 39.330...",80.0,0.312555
4,24,Maryland,MD,24031,Montgomery,24031,"Montgomery County, Maryland",493.109662,1047661,0,...,1.841432,0.136786,24031,MD,Montgomery Coun,25.0,13.8,"POLYGON ((-77.52728 39.14624, -77.52487 39.148...",149.0,0.142222
5,24,Maryland,MD,24033,Prince George's,24033,"Prince George's County, Maryland",482.64959,910551,0,...,2.146588,0.134026,24033,MD,Prince George's,24.8,23.7,"POLYGON ((-77.07995 38.70901, -77.08080 38.710...",227.0,0.2493
6,51,Virginia,VA,51013,Arlington,51013,"Arlington County, Virginia",25.997402,236434,0,...,0.403757,0.006995,51013,VA,Arlington Count,31.1,13.3,"POLYGON ((-77.17228 38.89325, -77.16811 38.896...",38.0,0.160721
7,51,Virginia,VA,51043,Clarke,51043,"Clarke County, Virginia",175.932838,14498,0,...,1.03018,0.048052,51043,VA,Clarke County,17.0,18.0,"POLYGON ((-78.15170 39.03667, -78.14240 39.061...",0.0,0.0
8,51,Virginia,VA,51047,Culpeper,51047,"Culpeper County, Virginia",379.186591,51935,0,...,2.099423,0.102349,51047,VA,Culpeper County,21.8,32.7,"POLYGON ((-78.22915 38.53328, -78.22896 38.534...",19.0,0.365842
9,51,Virginia,VA,51059,Fairfax,51059,"Fairfax County, Virginia",391.020748,1149439,0,...,2.138721,0.108028,51059,VA,Fairfax County,26.3,10.4,"MULTIPOLYGON (((-77.53698 38.84275, -77.53496 ...",121.0,0.105269


In [11]:
# If "Deaths" is 5.0 make it "supressed" as well as "od_deaths_"
hepvu_cdc_df['Deaths'] = hepvu_cdc_df['Deaths'].apply(lambda x: 'suppressed' if x == 5.0 else x)


In [15]:
# Set 'od_deaths_' to 'suppressed' where 'Deaths' is 'suppressed'
hepvu_cdc_df.loc[hepvu_cdc_df['Deaths'] == 'suppressed', 'od_deaths_'] = 'suppressed'

In [20]:
hepvu_cdc_df[['County Nam','E_TOTPOP','Deaths','od_deaths_','NOD_Rate_2']]

Unnamed: 0,County Nam,E_TOTPOP,Deaths,od_deaths_,NOD_Rate_2
0,District of Col,701974,432.0,0.615407,58.3
1,Calvert County,92094,33.0,0.35833,34.5
2,Charles County,161448,60.0,0.371637,33.1
3,Frederick Count,255955,80.0,0.312555,29.2
4,Montgomery Coun,1047661,149.0,0.142222,13.8
5,Prince George's,910551,227.0,0.2493,23.7
6,Arlington Count,236434,38.0,0.160721,13.3
7,Clarke County,14498,0.0,0.0,18.0
8,Culpeper County,51935,19.0,0.365842,32.7
9,Fairfax County,1149439,121.0,0.105269,10.4


In [21]:
# This is how they have calculated the od_deaths_ column
# # create a new column 'od_deaths_per_1000' which is the ratio of 'od_deaths' to 'E_TOTPOP' and multiply by 1000
# metro_svi_deaths['od_deaths_per_1000'] = (metro_svi_deaths['Deaths']/metro_svi_deaths['E_TOTPOP'])*1000