In [1]:
import pandas
import numpy
from functools import reduce

pandas.options.mode.chained_assignment = None

data = {
    "circ": pandas.read_csv("input/Circulatory Mortality.csv", index_col=False),
    "resp": pandas.read_csv("input/Respiratory Mortality.csv", index_col=False),
    "all": pandas.read_csv("input/All Cause Mortality.csv", index_col=False),
    "circ_est": pandas.read_csv(
        "input/Circulatory Mortality with estimates.csv", index_col=False
    ),
    "resp_est": pandas.read_csv(
        "input/Respiratory Mortality with estimates.csv", index_col=False
    ),
    "all_est": pandas.read_csv(
        "input/All Cause Mortality with estimates.csv", index_col=False
    ),
}


def convert_ids_to_int(df):
    df = df[df["State ANSI"].notnull()]
    df["STATEFP"] = pandas.to_numeric(df["State ANSI"], downcast="integer")
    df["COUNTYFP"] = pandas.to_numeric(df["County ANSI"], downcast="integer")
    return df.drop(columns=["State ANSI", "County ANSI"])


def calculate_mortality(df):
    """
    We calculate mortality in terms of deaths per 1,000 people.
    It's easier to read than deaths per person (because that's such a small number).
    """

    if (
        "Percent Deaths 0-5" in df
        and "Percent Deaths 5-25" in df
        and "Percent Deaths 25+" in df
    ):
        df = df.rename(
            columns={
                "Percent Deaths 0-5": "mort_0-5",
                "Percent Deaths 5-25": "mort_5-25",
                "Percent Deaths 25+": "mort_25+",
            }
        )
        df["mort_0-5"] *= 1000
        df["mort_5-25"] *= 1000
        df["mort_25+"] *= 1000
    else:
        df["mort_0-5"] = (
            df["deaths_0-5"] * 1_000 / df["Population_0-5"].replace({0: numpy.nan})
        )
        df["mort_5-25"] = (
            df["deaths_5-25"] * 1_000 / df["Population_5-25"].replace({0: numpy.nan})
        )
        df["mort_25+"] = (
            df["deaths_25+"] * 1_000 / df["Population_25+"].replace({0: numpy.nan})
        )

    return df


def append_suffix(df, suffix):
    return df.rename(
        columns={
            "deaths_0-5": "deaths_0-5_" + suffix,
            "deaths_25+": "deaths_25+_" + suffix,
            "deaths_5-25": "deaths_5-25_" + suffix,
            "mort_0-5": "mort_0-5_" + suffix,
            "mort_5-25": "mort_5-25_" + suffix,
            "mort_25+": "mort_25+_" + suffix,
        }
    )


rename_map = {
    "Deaths_0-5": "deaths_0-5",
    "Deaths_25+": "deaths_25+",
    "Deaths_5-25": "deaths_5-25",
}


def clean(df, key):
    df = convert_ids_to_int(df)
    df = df.rename(columns=rename_map)
    df = calculate_mortality(df)
    df = append_suffix(df, key)
    df = df.replace({0: numpy.nan})
    df = df.round(6)
    return df


data = {key: clean(df, key) for key, df in data.items()}

data_merged = reduce(
    lambda left, right: pandas.merge(
        left,
        right,
        on=[
            "STATEFP",
            "COUNTYFP",
            "Population_0-5",
            "Population_25+",
            "Population_5-25",
        ],
    ),
    data.values(),
)

data_merged.to_csv("output/mortality.csv", index=None)
data_merged


Unnamed: 0,deaths_0-5_circ,deaths_25+_circ,deaths_5-25_circ,Population_0-5,Population_25+,Population_5-25,mort_0-5_circ,mort_25+_circ,mort_5-25_circ,STATEFP,...,deaths_5-25_resp_est,mort_0-5_resp_est,mort_25+_resp_est,mort_5-25_resp_est,deaths_0-5_all_est,deaths_25+_all_est,deaths_5-25_all_est,mort_0-5_all_est,mort_25+_all_est,mort_5-25_all_est
0,,157.0,,3191.0,37332.0,14893.0,,4.21,,1,...,,0.05,1.23,0.01,,510.0,,2.37,13.66,0.71
1,,631.0,,11660.0,147731.0,49172.0,,4.27,,1,...,,0.05,1.17,0.01,11.0,1933.0,23.0,1.20,13.08,0.47
2,,81.0,,1352.0,18326.0,6287.0,,4.42,,1,...,,0.05,4.22,0.01,,235.0,,2.37,12.82,0.71
3,,73.0,,1274.0,16121.0,5248.0,,4.53,,1,...,,0.05,0.70,0.01,,224.0,,2.37,13.89,0.71
4,,184.0,,3353.0,39867.0,14484.0,,4.62,,1,...,,0.05,2.58,0.01,,675.0,,2.37,16.93,0.71
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,,83.0,,3107.0,28510.0,12548.0,,2.91,,56,...,,0.04,0.55,0.01,,298.0,,1.16,10.45,0.45
3135,,,,1209.0,17488.0,4494.0,,,,56,...,,0.04,1.64,0.01,,74.0,,1.16,4.23,0.45
3136,,25.0,,1634.0,13119.0,6020.0,,1.91,,56,...,,0.04,1.64,0.01,,137.0,,1.16,10.44,0.45
3137,,,,474.0,5722.0,2039.0,,,,56,...,,0.04,1.64,0.01,,78.0,,1.16,13.63,0.45
