In [1]:
import cenpy
import numpy as np
import pandas as pd
import geopandas as gpd

### Census data
Demographic data

In [2]:
acs = cenpy.products.ACS(year="latest")

In [3]:
# B02001_001E = Total population
# B02001_002E = White
# B02001_003E = Black or African American
# B02001_004E = American Indian and Alaska Native
# B02001_005E = Asian
# B02001_006E = Native Hawaiian and other Pacific Islander
# B02001_007E = Some other race

# S0101_C01_002E = Age under 5 years
# S0101_C01_003E = Age 5-9 years
# S0101_C01_004E = Age 10-14 years
# S0101_C01_005E = Age 15-19 years
# S0101_C01_006E = Age 20-24 years
# S0101_C01_007E = Age 25-29 years
# S0101_C01_008E = Age 30-34 years
# S0101_C01_009E = Age 35-39 years
# S0101_C01_010E = Age 40-44 years
# S0101_C01_011E = Age 45-49 years
# S0101_C01_012E = Age 50-54 years
# S0101_C01_013E = Age 55-59 years
# S0101_C01_014E = Age 60-64 years
# S0101_C01_015E = Age 65-69 years
# S0101_C01_016E = Age 70-74 years
# S0101_C01_017E = Age 75-79 years
# S0101_C01_018E = Age 80-84 years
# S0101_C01_019E = Age 85+ years

# S1501_C01_005E = Bachelor's degree or higher (18-24 years)
# S1501_C01_015E = Bachelor's degree or higher (25+ years)

# S1701_C02_001E = Population below poverty level
# S1901_C01_012E = Median household income
# S2301_C04_001E = Unemployment rate

# S2701_C04_001E = Uninsured

In [4]:
cols = ["B02001_001E", "B02001_002E", "B02001_003E", "B02001_004E", "B02001_005E", "B02001_006E", "B02001_007E"]

In [5]:
maryland = acs.from_state("Maryland", variables=cols, level="tract")

  return _prepare_from_string(" ".join(pjargs))


In [6]:
data = pd.read_csv("data/ACS/MD_2018ACS.csv", header=1).astype({"id": "str"})
data

Unnamed: 0,id,Age0-5,Age5-9,Age10-14,Age15-19,Age20-24,Age25-29,Age30-34,Age35-39,Age40-44,...,Age70-74,Age75-79,Age80-84,Age85+,Bachelor+ (18-24),Bachelor+ (25+),Below poverty level,Median household income,Unemployment rate,Uninsured
0,24001000100,139,114,207,367,183,94,140,205,224,...,261,190,57,49,29,379,534,44083.0,7.4,266
1,24001000200,65,111,97,187,268,236,369,265,339,...,98,83,113,55,0,335,190,52961.0,5.3,136
2,24001000300,220,160,162,132,142,120,130,218,97,...,65,116,112,148,21,335,433,43194.0,16.3,146
3,24001000400,116,221,224,142,158,121,110,166,237,...,107,150,121,60,23,336,529,43750.0,1.5,97
4,24001000500,148,90,20,276,248,128,117,83,128,...,108,91,68,57,10,199,640,26462.0,20.8,248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1401,24510280401,268,139,144,254,126,152,255,258,131,...,129,139,106,31,23,734,456,46139.0,6.2,183
1402,24510280402,70,64,81,121,80,98,175,107,103,...,31,22,62,58,0,218,219,48276.0,13.1,79
1403,24510280403,509,162,259,539,438,560,151,405,239,...,109,137,92,53,41,948,294,62945.0,6.8,47
1404,24510280404,268,201,170,55,195,396,263,120,81,...,119,63,44,70,20,182,737,37292.0,9.6,177


In [7]:
md = maryland.merge(data, left_on="GEOID", right_on="id")

In [8]:
md["TotalPop"] = md["B02001_001E"]
md["White"] = md["B02001_002E"] / md["TotalPop"]
md["Black"] = md["B02001_003E"] / md["TotalPop"]
md["Native"] = md["B02001_004E"] / md["TotalPop"]
md["Asian"] = md["B02001_005E"] / md["TotalPop"]
md["Other"] = (md["B02001_006E"] + md["B02001_007E"]) / md["TotalPop"]
md["PopDen"] = md["TotalPop"] / md.area

In [9]:
md["Age0-19"] = (md["Age0-5"] + md["Age5-9"] + md["Age10-14"] + md["Age15-19"])/md["TotalPop"]
md["Age20-39"] = (md["Age20-24"] + md["Age25-29"] + md["Age30-34"] + md["Age35-39"])/md["TotalPop"]
md["Age40-59"] = (md["Age40-44"] + md["Age45-49"] + md["Age50-54"] + md["Age55-9"])/md["TotalPop"]
md["Age60-79"] = (md["Age60-64"] + md["Age65-69"] + md["Age70-74"] + md["Age75-79"])/md["TotalPop"]
md["Age80+"] = (md["Age80-84"] + md["Age85+"])/md["TotalPop"]

In [10]:
md["Bachelor+"] = (md["Bachelor+ (18-24)"] + md["Bachelor+ (25+)"])/md["TotalPop"]
md["BelowPov"] = md["Below poverty level"]/md["TotalPop"]
md["MHI"] = md["Median household income"]/md["TotalPop"]
md["UnempRt"] = md["Unemployment rate"]/md["TotalPop"]
md["Uninsured"] = md["Uninsured"]/md["TotalPop"]

In [11]:
md[["GEOID", 'TotalPop', 'White', 'Black', 'Native', 'Asian', 'Other', 'PopDen', 'Age0-19', 'Age20-39', 'Age40-59',
       'Age60-79', 'Age80+', 'Bachelor+', 'BelowPov', 'MHI', 'UnempRt', "Uninsured", "geometry"]].to_file("data/shp/MD_tract_census.shp")

### Cases data

In [12]:
cases = pd.read_csv("data/COVID-19/MDCOVID19_MASTER_ZIP_CODE_CASES.csv", header=0)

In [13]:
cross = pd.read_csv("data/COVID-19/ZIP_TRACT.csv", header=0).astype({"TRACT": "str"})

In [14]:
cases = cross[["ZIP", "TRACT", "TOT_RATIO"]].merge(cases[["ZIP_CODE", "total06_30_2020", "total08_31_2020"]], left_on="ZIP", right_on="ZIP_CODE", how="inner")

In [15]:
cases["case0630"] = cases["TOT_RATIO"] * cases["total06_30_2020"]
cases["case0831"] = cases["TOT_RATIO"] * cases["total08_31_2020"] - cases["case0630"]

In [16]:
cases = cases.groupby("TRACT").agg({"case0630": "sum", "case0831": "sum"}).reset_index()

In [17]:
df = gpd.read_file("data/shp/MD_tract_census.shp")

In [18]:
df = df.merge(cases, left_on="GEOID", right_on="TRACT", how="left").drop(columns=["TRACT"])

In [19]:
# # Calculate the infection rate
# df["case0630"] = df["case0630"] / df["TotalPop"]
# df["case0831"] = df["case0831"] / df["TotalPop"]

In [20]:
df.to_file("data/shp/MD_df.shp")