In [None]:
#group df census by county and state to figure out race group percentages
#figure out which columns add up to the total numbers
#Hispanic population seems to be handled separately
#determine percentages of cases for each race in df_case
#Compare the % of cases per race and the % of that race in the state to see if there are any discrepancies

#ultimate goal: create stacked bar charts to show % of total population vs % of total cases by race group by state

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from collections import Counter
from datetime import timedelta

pd.options.display.max_columns = None

#data dictionaries
import state_abbreviations
import statepop_2019est

In [4]:
url1 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vR_xmYt4ACPDZCDJcY12kCiMiH0ODyx3E1ZvgOHB8ae1tRcjXbs_yWBOA4j4uoCEADVfC1PS2jYO68B/pub?gid=43720681&single=true&output=csv'
df_case = pd.read_csv(url1,error_bad_lines = False,encoding='latin9')

url2 = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/asrh/cc-est2019-alldata.csv'
df_census = pd.read_csv(url2,error_bad_lines=False,encoding='latin9')

In [35]:
df_census['UID'] = '840' + df_census['STATE'].astype('str').str.zfill(2) + df_census['COUNTY'].astype('str').str.zfill(3)

In [43]:
df_census.groupby(['STNAME','UID','CTYNAME']).sum()[['TOT_POP','TOT_MALE','TOT_FEMALE']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TOT_POP,TOT_MALE,TOT_FEMALE
STNAME,UID,CTYNAME,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alabama,84001001,Autauga County,1321282,643614,677668
Alabama,84001003,Baldwin County,4765126,2322350,2442776
Alabama,84001005,Barbour County,634490,337034,297456
Alabama,84001007,Bibb County,543298,291330,251968
Alabama,84001009,Blount County,1381418,681562,699856
...,...,...,...,...,...
Wyoming,84056037,Sweetwater County,1055770,547734,508036
Wyoming,84056039,Teton County,536824,279524,257300
Wyoming,84056041,Uinta County,498774,252254,246520
Wyoming,84056043,Washakie County,198540,99852,98688


In [9]:
df_census.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE,WA_MALE,WA_FEMALE,BA_MALE,BA_FEMALE,IA_MALE,IA_FEMALE,AA_MALE,AA_FEMALE,NA_MALE,NA_FEMALE,TOM_MALE,TOM_FEMALE,WAC_MALE,WAC_FEMALE,BAC_MALE,BAC_FEMALE,IAC_MALE,IAC_FEMALE,AAC_MALE,AAC_FEMALE,NAC_MALE,NAC_FEMALE,NH_MALE,NH_FEMALE,NHWA_MALE,NHWA_FEMALE,NHBA_MALE,NHBA_FEMALE,NHIA_MALE,NHIA_FEMALE,NHAA_MALE,NHAA_FEMALE,NHNA_MALE,NHNA_FEMALE,NHTOM_MALE,NHTOM_FEMALE,NHWAC_MALE,NHWAC_FEMALE,NHBAC_MALE,NHBAC_FEMALE,NHIAC_MALE,NHIAC_FEMALE,NHAAC_MALE,NHAAC_FEMALE,NHNAC_MALE,NHNAC_FEMALE,H_MALE,H_FEMALE,HWA_MALE,HWA_FEMALE,HBA_MALE,HBA_FEMALE,HIA_MALE,HIA_FEMALE,HAA_MALE,HAA_FEMALE,HNA_MALE,HNA_FEMALE,HTOM_MALE,HTOM_FEMALE,HWAC_MALE,HWAC_FEMALE,HBAC_MALE,HBAC_FEMALE,HIAC_MALE,HIAC_FEMALE,HAAC_MALE,HAAC_FEMALE,HNAC_MALE,HNAC_FEMALE,UID
0,50,1,1,Alabama,Autauga County,1,0,54571,26569,28002,21295,22002,4559,5130,119,139,200,284,29,18,367,429,21633,22391,4704,5306,277,314,300,409,42,37,25875,27386,20709,21485,4512,5091,103,115,194,280,13,9,344,406,21026,21853,4647,5258,251,282,291,398,23,27,694,616,586,517,47,39,16,24,6,4,16,9,23,23,607,538,57,48,26,32,9,11,19,10,8400 1\n1 1\n2 1\n3...
1,50,1,1,Alabama,Autauga County,1,1,3579,1866,1713,1411,1316,362,317,5,3,13,15,1,0,74,62,1479,1368,405,362,23,18,34,28,3,1,1778,1651,1337,1260,356,313,2,2,13,15,0,0,70,61,1402,1312,396,357,19,17,34,28,1,0,88,62,74,56,6,4,3,1,0,0,1,0,4,1,77,56,9,5,4,1,0,0,2,1,8400 1\n1 1\n2 1\n3...
2,50,1,1,Alabama,Autauga County,1,2,3991,2001,1990,1521,1526,399,374,14,8,17,21,1,3,49,58,1570,1583,425,403,27,19,32,42,3,4,1933,1916,1460,1465,398,372,12,2,17,21,0,3,46,53,1506,1517,423,400,25,12,30,39,1,4,68,74,61,61,1,2,2,6,0,0,1,0,3,5,64,66,2,3,2,7,2,3,2,0,8400 1\n1 1\n2 1\n3...
3,50,1,1,Alabama,Autauga County,1,3,4290,2171,2119,1658,1620,431,406,15,12,23,18,4,1,40,62,1694,1681,453,436,29,27,32,37,4,5,2105,2055,1613,1570,421,403,12,9,22,18,3,0,34,55,1643,1624,440,429,24,22,30,36,3,4,66,64,45,50,10,3,3,3,1,0,1,1,6,7,51,57,13,7,5,5,2,1,1,1,8400 1\n1 1\n2 1\n3...
4,50,1,1,Alabama,Autauga County,1,4,4290,2213,2077,1628,1585,502,424,12,7,25,14,4,2,42,45,1664,1624,525,444,23,20,39,31,6,5,2153,2026,1580,1543,495,420,12,5,23,14,1,1,42,43,1616,1580,518,439,23,18,37,30,3,4,60,51,48,42,7,4,0,2,2,0,3,1,0,2,48,44,7,5,0,2,2,1,3,1,8400 1\n1 1\n2 1\n3...


In [5]:
df_case.head()

Unnamed: 0,Date,State,Cases_Total,Cases_White,Cases_Black,Cases_LatinX,Cases_Asian,Cases_AIAN,Cases_NHPI,Cases_Multiracial,Cases_Other,Cases_Unknown,Cases_Ethnicity_Hispanic,Cases_Ethnicity_NonHispanic,Cases_Ethnicity_Unknown,Deaths_Total,Deaths_White,Deaths_Black,Deaths_LatinX,Deaths_Asian,Deaths_AIAN,Deaths_NHPI,Deaths_Multiracial,Deaths_Other,Deaths_Unknown,Deaths_Ethnicity_Hispanic,Deaths_Ethnicity_NonHispanic,Deaths_Ethnicity_Unknown
0,20200805,AK,4183.0,944.0,108.0,,98.0,461.0,137.0,101.0,73.0,2261.0,181.0,1411.0,2591.0,25.0,11.0,0.0,,2.0,9.0,3.0,0.0,0.0,0.0,0.0,25.0,0.0
1,20200805,AL,94654.0,28854.0,25330.0,,348.0,,,,4965.0,35155.0,6222.0,44538.0,43901.0,1695.0,862.0,682.0,,4.0,,,,35.0,112.0,50.0,1394.0,250.0
2,20200805,AR,46293.0,23573.0,9928.0,,659.0,129.0,2193.0,,5983.0,3828.0,10599.0,35694.0,0.0,508.0,302.0,136.0,,7.0,2.0,33.0,,30.0,0.0,45.0,465.0,0.0
3,20200805,AS,,,,,,,,,,,,,,,,,,,,,,,,,,
4,20200805,AZ,182203.0,32128.0,4282.0,44377.0,1493.0,10099.0,,,4328.0,85496.0,44377.0,52330.0,85496.0,3932.0,1538.0,121.0,1106.0,48.0,472.0,,,67.0,580.0,1106.0,2246.0,580.0
