In [81]:
import pandas
import numpy
from functools import reduce

pandas.options.mode.chained_assignment = None

data = {
    "circ": pandas.read_csv("input/Circulatory Mortality.csv", index_col=False),
    "resp": pandas.read_csv("input/Respiratory Mortality.csv", index_col=False),
    "all": pandas.read_csv("input/All Cause Mortality.csv", index_col=False),
    "circ_est": pandas.read_csv(
        "input/Circulatory Mortality with estimates.csv", index_col=False
    ),
    "resp_est": pandas.read_csv(
        "input/Respiratory Mortality with estimates.csv", index_col=False
    ),
    "all_est": pandas.read_csv(
        "input/All Cause Mortality with estimates.csv", index_col=False
    ),
}


def convert_ids_to_int(df):
    df = df[df["State ANSI"].notnull()]
    df["STATEFP"] = pandas.to_numeric(df["State ANSI"], downcast="integer")
    df["COUNTYFP"] = pandas.to_numeric(df["County ANSI"], downcast="integer")
    return df.drop(columns=["State ANSI", "County ANSI"])


def calculate_mortality(df):
    df["mort_0-5"] = df["deaths_0-5"] / df["Population_0-5"].replace({0: numpy.nan})
    df["mort_5-25"] = df["deaths_5-25"] / df["Population_5-25"].replace({0: numpy.nan})
    df["mort_25+"] = df["deaths_25+"] / df["Population_25+"].replace({0: numpy.nan})
    return df


def remove_percent_deaths(df):
    if (
        "Percent Deaths 0-5" in df
        and "Percent Deaths 5-25" in df
        and "Percent Deaths 25+" in df
    ):
        return df.drop(
            columns=["Percent Deaths 0-5", "Percent Deaths 5-25", "Percent Deaths 25+"]
        )
    else:
        return df

def remove_zeros(df):
    return df


def append_suffix(df, suffix):
    return df.rename(
        columns={
            "deaths_0-5": "deaths_0-5_" + suffix,
            "deaths_25+": "deaths_25+_" + suffix,
            "deaths_5-25": "deaths_5-25_" + suffix,
            "mort_0-5": "mort_0-5_" + suffix,
            "mort_5-25": "mort_5-25_" + suffix,
            "mort_25+": "mort_25+_" + suffix,
        }
    )


rename_map = {
    "Deaths_0-5": "deaths_0-5",
    "Deaths_25+": "deaths_25+",
    "Deaths_5-25": "deaths_5-25",
}


def clean(df, key):
    df = convert_ids_to_int(df)
    df = df.rename(columns=rename_map)
    df = calculate_mortality(df)
    df = remove_percent_deaths(df)
    df = append_suffix(df, key)
    df = df.replace({0: numpy.nan})
    df = df.round(6)
    return df


data = {key: clean(df, key) for key, df in data.items()}

data_merged = reduce(
    lambda left, right: pandas.merge(
        left,
        right,
        on=[
            "STATEFP",
            "COUNTYFP",
            "Population_0-5",
            "Population_25+",
            "Population_5-25",
        ],
    ),
    data.values(),
)

data_merged.to_csv("output/mortality.csv", index=None)
data_merged


Unnamed: 0,deaths_0-5_circ,deaths_25+_circ,deaths_5-25_circ,Population_0-5,Population_25+,Population_5-25,STATEFP,COUNTYFP,mort_0-5_circ,mort_5-25_circ,...,deaths_5-25_resp_est,mort_0-5_resp_est,mort_5-25_resp_est,mort_25+_resp_est,deaths_0-5_all_est,deaths_25+_all_est,deaths_5-25_all_est,mort_0-5_all_est,mort_5-25_all_est,mort_25+_all_est
0,,157.0,,3191.0,37332.0,14893.0,1,1,,,...,,,,0.001232,,510.0,,,,0.013661
1,,631.0,,11660.0,147731.0,49172.0,1,3,,,...,,,,0.001171,11.0,1933.0,23.0,0.000943,0.000468,0.013085
2,,81.0,,1352.0,18326.0,6287.0,1,5,,,...,,,,,,235.0,,,,0.012823
3,,73.0,,1274.0,16121.0,5248.0,1,7,,,...,,,,0.000806,,224.0,,,,0.013895
4,,184.0,,3353.0,39867.0,14484.0,1,9,,,...,,,,0.002584,,675.0,,,,0.016931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3135,,83.0,,3107.0,28510.0,12548.0,56,37,,,...,,,,0.000421,,298.0,,,,0.010452
3136,,,,1209.0,17488.0,4494.0,56,39,,,...,,,,,,74.0,,,,0.004231
3137,,25.0,,1634.0,13119.0,6020.0,56,41,,,...,,,,,,137.0,,,,0.010443
3138,,,,474.0,5722.0,2039.0,56,43,,,...,,,,,,78.0,,,,0.013632
