In [1]:
import altair as alt
import pandas as pd

# Data
Data brought from https://covidtracking.com/race

In [2]:
df = pd.read_csv("./data/covid-racial-data.txt", sep="\t")

df.head()

Unnamed: 0,state,black_population,black_positive_cases,black_deaths,hispanic_population,hispanic_positive_cases,hispanic_deaths,asian_population,asian_positive_cases,asian_deaths,...,indian_deaths,hawaiian_pop,hawaiian_positive_cases,hawaiian_deaths,multiple_population,multiple_positive cases,multiple_deaths,white_population,white_positive_cases,white_deaths
0,alabama,26.80%,43.51%,45.82%,4.40%,0.00%,0.00%,1.50%,0.85%,0.80%,...,0.00%,0.10%,0.00%,0.00%,1.70%,0.00%,0.00%,65.40%,52.38%,52.19%
1,alaska,3.80%,2.32%,-,7.20%,0.00%,-,6.60%,14.24%,-,...,-,1.40%,2.65%,-,7.40%,3.31%,-,60.30%,68.21%,-
2,arizona,5.10%,5.18%,3.83%,31.60%,26.04%,14.94%,3.70%,1.96%,1.53%,...,21.46%,0.30%,0.00%,0.00%,2.90%,0.00%,0.00%,54.40%,44.14%,56.70%
3,arkansas,15.70%,36.83%,34.48%,7.70%,4.49%,0.00%,1.70%,1.44%,0.00%,...,0.00%,0.40%,0.00%,1.72%,2.20%,0.47%,0.00%,72.20%,57.11%,62.07%
4,california,6.50%,6.51%,10.68%,39.30%,44.79%,33.81%,15.30%,12.25%,16.76%,...,0.40%,0.50%,0.23%,0.98%,3.90%,0.82%,0.29%,36.80%,27.45%,35.19%


In [3]:
RACE = ["White", "Black", "Hispanic", "Asian", "Indian", "Hawaiian", "Multiple"]
COLOR = ["#009E73"]
COLORS = ["#009E73", "#E69F00", "#CC79A7", "#0072B2", "#D55E00", "#F0E442", "#56B4E9"]

In [4]:
data = pd.melt(df, id_vars=["state"]) # wide to long

data["state"] = data["state"].str.upper()
data["value"] = data["value"].apply(lambda x: x.replace("%", "") if type(x) == str else x)
data["race"] = data["variable"]
data["race"] = data["race"].apply(lambda x: x.split("_")[0].capitalize())
data["variable"] = data["variable"].apply(lambda x: x.split("_")[1].capitalize())

data.head()

Unnamed: 0,state,variable,value,race
0,ALABAMA,Population,26.8,Black
1,ALASKA,Population,3.8,Black
2,ARIZONA,Population,5.1,Black
3,ARKANSAS,Population,15.7,Black
4,CALIFORNIA,Population,6.5,Black


# Visualization

In [5]:
def theme(
    base,
    legend_orient="top-left"
):
    return base.configure_view(
        stroke=None
    ).configure_title(
        fontSize=20,
        fontWeight=500,
        anchor="start",
        subtitlePadding=0
    ).configure_axis(
        labelFontSize=12,
        labelFontWeight=300,
        titleFontSize=16,
        titleFontWeight=400,
        titlePadding=10
    ).configure_legend(
        titleFontSize=16,
        titleFontWeight=400,
        labelFontSize=14,
        labelFontWeight=300,
        titleOrient="left",
        # padding=20,
        orient=legend_orient,
        labelLimit=300
        # symbolType="stroke",
        # symbolSize=30,
    ).configure_concat(
        spacing=10
    ).configure_facet(
        spacing=20
    ).configure_header(
        labelFontSize=10,
        labelFontWeight=700
    )

In [6]:
def plot(state, value):

    chart = alt.Chart(data).transform_filter(
        alt.datum["state"] == state
    )

    deaths = chart.transform_filter(
        alt.datum["variable"] == value
    ).mark_bar(color=COLOR[0]).encode(
        x=alt.X("race:N", title=None, sort=RACE, axis=None),
        y=alt.Y(
            "value:Q", 
            title=None,
            scale=alt.Scale(domain=[0,100]), 
            axis=alt.Axis(grid=True, labels=True, ticks=False, domain=False)
        ),
        color=alt.Color("race:N", scale=alt.Scale(range=COLORS + ["black"], domain=RACE + ["Entire Population of Races in State (%)"]), title=None, sort=RACE),
    )

    population = chart.transform_filter(
        alt.datum["variable"] == "Population"
    ).mark_tick(color="black", size=10, stroke="white", thickness=3).encode(
        x=alt.X("race:N", title=None, sort=RACE, axis=None),
        y=alt.Y(
            "value:Q", 
            title=None, 
            scale=alt.Scale(domain=[0,100]), 
            axis=alt.Axis(grid=True, labels=True, ticks=False, domain=False)
        )
    )

    return (deaths + population).properties(
        width=90, height=90,
        title={
            "text": state,
            "fontSize": 10,
            "anchor": "middle",
            "fontWeight": 700
        }
    )

In [7]:
states = data["state"].unique()
h = alt.hconcat()
v = alt.vconcat()
for i in range(len(states)):
    
    h |= plot(states[i], "Deaths")
    
    if i % 8 == 7:
        v &= h
        h = alt.hconcat()

v &= h

v = v

theme(v, legend_orient="top").properties(
    title={
        "text": "Distribution of Races (%) in COVID Deaths by State",
        "color": "#E2575A",
        "subtitle": "Data from https://covidtracking.com/race",
        "subtitleFontSize": 15,
        "subtitlePadding": 10,
        "subtitleColor": "#333",
        "dx": 20,
        "dy": -10
    }
)

In [8]:
states = data["state"].unique()
h = alt.hconcat()
v = alt.vconcat()
for i in range(len(states)):
    
    h |= plot(states[i], "Positive")
    
    if i % 8 == 7:
        v &= h
        h = alt.hconcat()

v &= h

v = v

theme(v, legend_orient="top").properties(
    title={
        "text": "Distribution of Races (%) in COVID Positive Cases by State",
        "color": "#F47E20",
        "subtitle": "Data from https://covidtracking.com/race",
        "subtitleFontSize": 15,
        "subtitlePadding": 10,
        "subtitleColor": "#333",
        "dx": 20,
        "dy": -10
    }
)