# US Murder Rate Analysis


In [1]:
import os
import sqlite3
import pandas as pd
from utils import nb_run_from_command


In [2]:
if nb_run_from_command():
    os.chdir("..")


In [3]:
with sqlite3.connect("input/sqlite__temp.db") as con:
    agencies = pd.read_sql(
        """
            SELECT ori, ucr_agency_name, state_abbr, population
            FROM data_agencies
            WHERE data_year = 2020;
        """,
        con,
        index_col=["ori"],
    )
    gb_cols = "ori_code, card, year"
    reta = (
        pd.read_sql(
            f"""
            SELECT {gb_cols}, sum(value) as total
            FROM data_reta
            GROUP BY {gb_cols}
            HAVING year = 2020;
        """,
            con,
        )
        .pivot(index=["ori_code"], columns="card", values="total")
        .drop("cleared_arrest", axis=1)
    )


In [4]:
df = (
    reta.join(agencies)
    .assign(murder_rate=lambda df: df.actual / df.population)
    .assign(per_100k=lambda df: df.murder_rate * 100_000)
    .query("actual > 0 & population > 0")
    .sort_values("murder_rate", ascending=False)
)
df


Unnamed: 0_level_0,actual,ucr_agency_name,state_abbr,population,murder_rate,per_100k
ori_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
IL0822200,1,SAUGET,IL,167.0,0.005988,598.802395
NC0800200,6,EAST SPENCER,NC,1552.0,0.003866,386.597938
SC0450100,1,HEMINGWAY,SC,388.0,0.002577,257.731959
NC0350400,1,BUNN,NC,390.0,0.002564,256.410256
NJ0171200,11,SALEM,NJ,4661.0,0.002360,236.000858
...,...,...,...,...,...,...
OR0030000,1,CLACKAMAS,OR,242246.0,0.000004,0.412804
OR0340000,1,WASHINGTON,OR,243892.0,0.000004,0.410018
CA0302600,1,IRVINE,CA,297069.0,0.000003,0.336622
VA0530000,1,LOUDOUN,VA,359411.0,0.000003,0.278233


In [5]:
df.to_csv("output/murder_rate_by_agency.csv")
