# Localized Homicide Clearance Rates, 1995-present


In [2]:
import os
import sqlite3
import pandas as pd


## Data Import

In [3]:
def should_chdir():
    """checks whether the code is being run interactively or from command line"""
    # if both these directories exist, treat is as the correct directory
    dirs = ["input", "output"]
    if all(os.path.exists(d) for d in dirs):
        return False

    try:
        shell = get_ipython().__class__.__name__  # type: ignore
        if shell == "ZMQInteractiveShell":
            return True
        elif shell == "TerminalInteractiveShell":
            return False
        else:
            return False
    except NameError:
        return False


# step backwards if notebook is being run interactively
# or stay put if being run from command line
if should_chdir():
    os.chdir("..")


In [4]:
with sqlite3.connect("input/sqlite__temp.db") as con:
    agencies = pd.read_sql("SELECT * FROM data_agencies;", con)
    gb_cols = "ori_code, agency_name, card, year"
    reta = pd.read_sql(
        f"""
        SELECT {gb_cols}, SUM(value) as total
        FROM data_reta
        GROUP BY {gb_cols};
    """,
        con,
    )


### Merging geographies

In [5]:
df = reta.merge(
    agencies,
    how="left",
    left_on=["ori_code", "year"],
    right_on=["ori", "data_year"],
)


In [34]:
# FDLK24000 is for federal agencies, which all use the same ORI and won't be included here
df = df[df.ori_code != "FDLK24000"]

# TODO: delete this once you integrate it into the pipeline in transform task
# df[df.data_year == 2017].sort_values("total", ascending=False)
df = df[df.unique_id != "8735674e78d06fe3b1ea599b5d2ef22a018a6fb1"]

There are a number of records in which there are multiple agencies associated with the same ORI. This includes larger agencies like the Las Vegas Police Department, which I want to include in this analysis. 

In [35]:
def get_dupe_oris(d):
    """
    returns a dataframe with the ori codes and years for
    which there are multiple agency names with the same ORI code
    """
    return (
        d.groupby(["ori_code", "year"])
        .agency_name.nunique()
        .to_frame("n")
        .query("n > 1")
        .sort_values("n", ascending=False)
    )


dupe_oris = get_dupe_oris(df)
dupe_oris


Unnamed: 0_level_0,Unnamed: 1_level_0,n
ori_code,year,Unnamed: 2_level_1
CA0340200,2017,2
NJ0040700,2019,2
OK0160100,2017,2
NY0300100,2020,2
NY0300100,2019,2
...,...,...
KY0340100,2017,2
KS0890400,2020,2
KS0890400,2019,2
KS0890400,2018,2


## Analysis

In [37]:
def get_clearance_rate(crosstab_df):
    for year in df.year.unique().tolist():
        crosstab_df[(year, "clearance_rate")] = (
            crosstab_df[(year, "cleared_arrest")] / crosstab_df[(year, "actual")]
        )
    crosstab_df = crosstab_df.sort_index(axis=1, level="year", sort_remaining=False)
    return crosstab_df


def get5yravg_vs_2020(clearance_rate_df):
    cr_5yravg = clearance_rate_df[
        [(yr, "clearance_rate") for yr in range(2015, 2021)]
    ].droplevel(1, axis=1)
    cr_5yravg["5_year_avg"] = cr_5yravg.iloc[:, :-1].mean(axis=1)
    cr_5yravg = cr_5yravg[["5_year_avg", 2020]]
    cr_5yravg["change"] = (cr_5yravg[2020] - cr_5yravg["5_year_avg"]) / cr_5yravg[
        "5_year_avg"
    ]
    return cr_5yravg


### National

In [89]:
pd.crosstab(
    index=df.year,
    columns=df.card,
    values=df.total,
    aggfunc=sum,
).assign(clearance_rate=lambda df: df.cleared_arrest / df.actual)


card,actual,cleared_arrest,clearance_rate
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1995,20365,11766,0.577756
1996,17977,10531,0.585804
1997,16930,10286,0.607561
1998,15749,9866,0.626452
1999,14361,8991,0.626071
2000,14728,8598,0.583786
2001,15316,8746,0.571037
2002,15617,9099,0.582634
2003,15814,8849,0.559567
2004,15607,8774,0.562184


### By state

In [93]:
(
    pd.crosstab(
        index=[df.year, df.state_abbr],
        columns=df.card,
        values=df.total,
        aggfunc=sum,
    )
    .assign(clearance_rate=lambda df: df.cleared_arrest / df.actual)
    .stack()
    .to_frame("year")
    .unstack("state_abbr")
    .transpose()
    .droplevel(0)
)


year,1995,1995,1995,1996,1996,1996,1997,1997,1997,1998,...,2017,2018,2018,2018,2019,2019,2019,2020,2020,2020
card,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,...,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate
state_abbr,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AK,53.0,39.0,0.735849,43.0,33.0,0.767442,48.0,43.0,0.895833,36.0,...,0.887097,47.0,36.0,0.765957,69.0,51.0,0.73913,48.0,42.0,0.875
AL,466.0,127.0,0.272532,438.0,106.0,0.242009,413.0,155.0,0.375303,344.0,...,0.539043,361.0,175.0,0.484765,128.0,75.0,0.585938,218.0,107.0,0.490826
AR,259.0,203.0,0.783784,218.0,181.0,0.830275,247.0,204.0,0.825911,201.0,...,0.574899,227.0,137.0,0.603524,231.0,171.0,0.74026,308.0,176.0,0.571429
AS,,,,,,,,,,,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,
AZ,429.0,254.0,0.592075,368.0,217.0,0.589674,361.0,213.0,0.590028,364.0,...,0.63357,378.0,262.0,0.693122,347.0,218.0,0.628242,470.0,335.0,0.712766
CA,3324.0,1819.0,0.547232,2851.0,1695.0,0.594528,2525.0,1453.0,0.575446,2115.0,...,0.619639,1684.0,1077.0,0.639549,1602.0,1038.0,0.64794,2123.0,1242.0,0.585021
CO,212.0,160.0,0.754717,176.0,121.0,0.6875,181.0,83.0,0.458564,172.0,...,0.721461,213.0,164.0,0.769953,212.0,141.0,0.665094,293.0,195.0,0.665529
CT,150.0,121.0,0.806667,158.0,115.0,0.727848,124.0,97.0,0.782258,135.0,...,0.628571,86.0,52.0,0.604651,104.0,62.0,0.596154,140.0,80.0,0.571429
CZ,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,
DC,361.0,137.0,0.379501,397.0,95.0,0.239295,301.0,76.0,0.252492,260.0,...,0.706897,160.0,106.0,0.6625,166.0,113.0,0.680723,201.0,138.0,0.686567


### By MSA

In [91]:
msa = get_clearance_rate(
    pd.crosstab(
        index=[df.msa_name],
        columns=[df.year, df.card],
        values=df.total,
        aggfunc=sum,
    )
)

msa


year,1995,1995,1995,1996,1996,1996,1997,1997,1997,1998,...,2017,2018,2018,2018,2019,2019,2019,2020,2020,2020
card,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,...,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate
msa_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
"Abilene, TX",5.0,2.0,0.400000,8.0,6.0,0.750000,2.0,2.0,1.000000,5.0,...,0.857143,9.0,7.0,0.777778,6.0,5.0,0.833333,6.0,5.0,0.833333
"Aguadilla-Isabela, PR",,,,,,,,,,,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,
"Akron, OH",22.0,18.0,0.818182,20.0,3.0,0.150000,4.0,3.0,0.750000,1.0,...,0.660377,44.0,28.0,0.636364,33.0,23.0,0.696970,55.0,40.0,0.727273
"Albany, GA",16.0,15.0,0.937500,3.0,3.0,1.000000,10.0,8.0,0.800000,10.0,...,0.913043,17.0,14.0,0.823529,12.0,9.0,0.750000,23.0,13.0,0.565217
"Albany-Lebanon, OR",,,,,,,,,,,...,0.500000,1.0,2.0,2.000000,5.0,3.0,0.600000,5.0,2.0,0.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Yakima, WA",13.0,8.0,0.615385,16.0,12.0,0.750000,15.0,10.0,0.666667,10.0,...,0.380952,25.0,15.0,0.600000,19.0,10.0,0.526316,21.0,16.0,0.761905
"York-Hanover, PA",9.0,8.0,0.888889,5.0,4.0,0.800000,4.0,3.0,0.750000,11.0,...,0.521739,21.0,7.0,0.333333,19.0,7.0,0.368421,4.0,0.0,0.000000
"Youngstown-Warren-Boardman, OH-PA",72.0,0.0,0.000000,62.0,16.0,0.258065,46.0,0.0,0.000000,50.0,...,0.292683,33.0,6.0,0.181818,18.0,4.0,0.222222,18.0,6.0,0.333333
"Yuba City, CA",6.0,5.0,0.833333,7.0,3.0,0.428571,10.0,9.0,0.900000,7.0,...,1.000000,8.0,6.0,0.750000,11.0,10.0,0.909091,7.0,6.0,0.857143


In [92]:
msa_5yravg = get5yravg_vs_2020(msa)
msa_5yravg


year,5_year_avg,2020,change
msa_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Abilene, TX",0.911833,0.833333,-0.086090
"Aguadilla-Isabela, PR",,,
"Akron, OH",0.616586,0.727273,0.179516
"Albany, GA",0.845799,0.565217,-0.331736
"Albany-Lebanon, OR",1.025000,0.400000,-0.609756
...,...,...,...
"Yakima, WA",0.549073,0.761905,0.387621
"York-Hanover, PA",0.509012,0.000000,-1.000000
"Youngstown-Warren-Boardman, OH-PA",0.232576,0.333333,0.433224
"Yuba City, CA",0.770280,0.857143,0.112768


### By agency

In [40]:
agency = get_clearance_rate(
    pd.crosstab(
        index=[df.ori_code, df.agency_name],
        columns=[df.year, df.card],
        values=df.total,
        aggfunc=sum,
    )
)
agency


Unnamed: 0_level_0,year,1995,1995,1995,1996,1996,1996,1997,1997,1997,1998,...,2017,2018,2018,2018,2019,2019,2019,2020,2020,2020
Unnamed: 0_level_1,card,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,...,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate
ori_code,agency_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
AK0010100,ANCHORAGE,29.0,19.0,0.655172,25.0,16.0,0.64,23.0,21.0,0.913043,19.0,...,1.037037,26.0,14.0,0.538462,32.0,21.0,0.656250,18.0,15.0,0.833333
AK0010200,FAIRBANKS,2.0,2.0,1.000000,3.0,3.0,1.00,6.0,5.0,0.833333,4.0,...,0.600000,4.0,8.0,2.000000,3.0,2.0,0.666667,3.0,3.0,1.000000
AK0010300,JUNEAU,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,...,3.000000,1.0,1.0,1.000000,3.0,2.0,0.666667,1.0,2.0,2.000000
AK0010400,KETCHIKAN,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,...,1.000000,1.0,1.0,1.000000,0.0,0.0,,1.0,1.0,1.000000
AK0010500,KODIAK,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WYDI00600,BIGHORN CANYON NRA,,,,,,,,,,,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,
WYDI05000,WIND RIVER AGENCY,,,,,,,,,,,...,1.000000,1.0,0.0,0.000000,1.0,1.0,1.000000,0.0,0.0,
WYDI05000,WIND RIVER TRIBAL,,,,,,,,,,,...,,,,,,,,,,
WYWHP0000,WYOMING HIGHWAY PATROL,0.0,0.0,,0.0,0.0,,0.0,0.0,,0.0,...,,0.0,0.0,,0.0,0.0,,0.0,0.0,


In [41]:
agency_5yravg = get5yravg_vs_2020(agency)
agency_5yravg


Unnamed: 0_level_0,year,5_year_avg,2020,change
ori_code,agency_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AK0010100,ANCHORAGE,0.750745,0.833333,0.110008
AK0010200,FAIRBANKS,0.910476,1.000000,0.098326
AK0010300,JUNEAU,inf,2.000000,
AK0010400,KETCHIKAN,1.000000,1.000000,0.000000
AK0010500,KODIAK,1.000000,,
...,...,...,...,...
WYDI00600,BIGHORN CANYON NRA,,,
WYDI05000,WIND RIVER AGENCY,0.750000,,
WYDI05000,WIND RIVER TRIBAL,,,
WYWHP0000,WYOMING HIGHWAY PATROL,,,


### Selecting only "core" agencies

In [42]:
AGENCIES = [
    "CA0194200",
    "CA0380100",
    "CA0340400",
    "TXDPD0000",
    "TX2201200",
    "TXHPD0000",
    "FL0130600",
    "CODPD0000",
    "PAPEP0000",
    "PAPPD0000",
    "NY0303000",
    "MN0271100",
    "MI8234900",
    "MA0130100",
    "MDBPD0000",
]


In [43]:
agency_subset = agency[agency.index.get_level_values("ori_code").isin(AGENCIES)].copy()
agency_subset


Unnamed: 0_level_0,year,1995,1995,1995,1996,1996,1996,1997,1997,1997,1998,...,2017,2018,2018,2018,2019,2019,2019,2020,2020,2020
Unnamed: 0_level_1,card,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,...,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate,actual,cleared_arrest,clearance_rate
ori_code,agency_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
CA0194200,LOS ANGELES,849.0,482.0,0.567727,711.0,408.0,0.57384,576.0,341.0,0.592014,426.0,...,0.740214,258.0,197.0,0.763566,258.0,197.0,0.763566,351.0,193.0,0.549858
CA0340400,SACRAMENTO,57.0,41.0,0.719298,43.0,46.0,1.069767,41.0,30.0,0.731707,31.0,...,0.538462,36.0,21.0,0.583333,34.0,30.0,0.882353,42.0,30.0,0.714286
CA0380100,SAN FRANCISCO,99.0,25.0,0.252525,82.0,47.0,0.573171,59.0,46.0,0.779661,58.0,...,0.482143,46.0,44.0,0.956522,40.0,26.0,0.65,48.0,36.0,0.75
CODPD0000,DENVER,81.0,63.0,0.777778,64.0,40.0,0.625,69.0,36.0,0.521739,51.0,...,0.684211,65.0,49.0,0.753846,67.0,50.0,0.746269,97.0,63.0,0.649485
FL0130600,MIAMI,110.0,40.0,0.363636,124.0,0.0,0.0,98.0,0.0,0.0,86.0,...,0.403846,27.0,19.0,0.703704,43.0,28.0,0.651163,61.0,27.0,0.442623
MA0130100,BOSTON,96.0,46.0,0.479167,59.0,37.0,0.627119,43.0,20.0,0.465116,34.0,...,0.438596,56.0,28.0,0.5,42.0,13.0,0.309524,58.0,17.0,0.293103
MDBPD0000,BALTIMORE,325.0,149.0,0.458462,330.0,205.0,0.621212,312.0,170.0,0.544872,313.0,...,0.517544,309.0,131.0,0.423948,348.0,108.0,0.310345,299.0,141.0,0.471572
MI8234900,DETROIT,475.0,0.0,0.0,428.0,251.0,0.586449,469.0,204.0,0.434968,430.0,...,0.518797,263.0,136.0,0.51711,275.0,141.0,0.512727,328.0,139.0,0.42378
MN0271100,MINNEAPOLIS,96.0,0.0,0.0,83.0,0.0,0.0,58.0,11.0,0.189655,58.0,...,0.666667,31.0,13.0,0.419355,46.0,19.0,0.413043,79.0,30.0,0.379747
NY0303000,NEW YORK,1177.0,344.0,0.292268,983.0,634.0,0.644964,770.0,676.0,0.877922,633.0,...,0.917808,295.0,256.0,0.867797,319.0,275.0,0.862069,468.0,125.0,0.267094


In [84]:
agency_5yravg[agency_5yravg.index.get_level_values("ori_code").isin(AGENCIES)]


Unnamed: 0_level_0,year,5_year_avg,2020,change
ori_code,agency_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA0194200,LOS ANGELES,0.736397,0.549858,-0.253314
CA0340400,SACRAMENTO,0.58075,0.714286,0.229936
CA0380100,SAN FRANCISCO,0.656727,0.75,0.142028
CODPD0000,DENVER,0.692525,0.649485,-0.062151
FL0130600,MIAMI,0.600712,0.442623,-0.26317
MA0130100,BOSTON,0.494952,0.293103,-0.407814
MDBPD0000,BALTIMORE,0.387028,0.471572,0.218443
MI8234900,DETROIT,0.409912,0.42378,0.033834
MN0271100,MINNEAPOLIS,0.510512,0.379747,-0.256145
NY0303000,NEW YORK,0.803269,0.267094,-0.667491
