In [1]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from scipy import stats
import matplotlib.pyplot as plt

rng = np.random.default_rng(1)

In [2]:
data = pd.read_excel("homicide.xlsx").fillna(0).replace(" ", 0)
data["Year"], data["Victim Age"], data["Perpetrator Age"] = data["Year"].astype(int), data["Victim Age"].astype(int), data["Perpetrator Age"].astype(int)

In [3]:
data = data[~(data["Victim Age"] > 99) & ~(data["Victim Age"] < 1) & ~(data["Perpetrator Age"] > 99) & ~(data["Perpetrator Age"] < 1)]

Question 1

In [4]:
q1 = data[((data["Year"] == 2010) | (data["Year"] == 2011) | (data["Year"] == 2012)) & (data["State"] == "Maryland")]
q1

Unnamed: 0,Record ID,Agency Code,Agency Name,Agency Type,City,State,Year,Month,Incident,Crime Type,...,Victim Ethnicity,Perpetrator Sex,Perpetrator Age,Perpetrator Race,Perpetrator Ethnicity,Relationship,Weapon,Victim Count,Perpetrator Count,Record Source
571510,571511,MD00102,Cumberland,Municipal Police,Allegany,Maryland,2010,August,1,Murder or Manslaughter,...,Unknown,Male,46,Black,Unknown,Acquaintance,Blunt Object,0,0,FBI
571511,571512,MD00103,Frostburg,Municipal Police,Allegany,Maryland,2010,April,1,Murder or Manslaughter,...,Unknown,Male,21,Black,Unknown,Unknown,Shotgun,0,0,FBI
571512,571513,MD00110,Unknown,Municipal Police,Allegany,Maryland,2010,August,1,Murder or Manslaughter,...,Unknown,Male,42,White,Unknown,Brother,Knife,0,0,FBI
571513,571514,MD00201,Annapolis,Municipal Police,Anne Arundel,Maryland,2010,April,1,Murder or Manslaughter,...,Unknown,Female,38,Black,Unknown,Acquaintance,Knife,0,0,FBI
571514,571515,MD00201,Annapolis,Municipal Police,Anne Arundel,Maryland,2010,June,1,Murder or Manslaughter,...,Unknown,Male,17,Black,Unknown,Unknown,Blunt Object,0,0,FBI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
616109,616110,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2012,December,2,Murder or Manslaughter,...,Unknown,Female,32,Black,Unknown,Son,Knife,0,0,FBI
616110,616111,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2012,December,3,Murder or Manslaughter,...,Unknown,Male,24,Black,Unknown,Acquaintance,Knife,0,0,FBI
616111,616112,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2012,December,4,Murder or Manslaughter,...,Unknown,Female,19,Black,Unknown,Unknown,Unknown,0,0,FBI
616116,616117,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2012,December,9,Murder or Manslaughter,...,Unknown,Male,31,Black,Unknown,Unknown,Handgun,0,1,FBI


In [12]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(go.Histogram(x=q1["Victim Age"], name = "Victim Age"), row=1, col=1)

fig.update_xaxes(title="Victim Age", row=1, col=1)
fig.update_yaxes(title="Number of Deaths", row=1, col=1)
fig.update_layout(title_text="Distribution of Victim Ages in Maryland from 2010-2012", width = 700, height = 400)

fig.show()

In [39]:
mean = np.mean(q1["Victim Age"])
se = np.std(q1["Victim Age"])/np.sqrt(len(q1["Victim Age"]))

print("Mean: " + str(np.round(mean, 3)) + ", Standard Error: " + str(np.round(se, 3)))
print("95% Confidence Interval: [" + str(np.round(mean - 1.984 * se, 3)) + ", " + str(np.round(mean + 1.984 * se, 3)) + "]")

Mean: 35.985, Standard Error: 0.688
95% Confidence Interval: [34.62, 37.351]


Question 2

In [26]:
q2 = data[(data["Year"] == 2010) & (data["State"] == "Maryland")]
q2p1 = q2["Victim Age"]
q2p2 = q2["Perpetrator Age"]
q2

Unnamed: 0,Record ID,Agency Code,Agency Name,Agency Type,City,State,Year,Month,Incident,Crime Type,...,Victim Ethnicity,Perpetrator Sex,Perpetrator Age,Perpetrator Race,Perpetrator Ethnicity,Relationship,Weapon,Victim Count,Perpetrator Count,Record Source
571510,571511,MD00102,Cumberland,Municipal Police,Allegany,Maryland,2010,August,1,Murder or Manslaughter,...,Unknown,Male,46,Black,Unknown,Acquaintance,Blunt Object,0,0,FBI
571511,571512,MD00103,Frostburg,Municipal Police,Allegany,Maryland,2010,April,1,Murder or Manslaughter,...,Unknown,Male,21,Black,Unknown,Unknown,Shotgun,0,0,FBI
571512,571513,MD00110,Unknown,Municipal Police,Allegany,Maryland,2010,August,1,Murder or Manslaughter,...,Unknown,Male,42,White,Unknown,Brother,Knife,0,0,FBI
571513,571514,MD00201,Annapolis,Municipal Police,Anne Arundel,Maryland,2010,April,1,Murder or Manslaughter,...,Unknown,Female,38,Black,Unknown,Acquaintance,Knife,0,0,FBI
571514,571515,MD00201,Annapolis,Municipal Police,Anne Arundel,Maryland,2010,June,1,Murder or Manslaughter,...,Unknown,Male,17,Black,Unknown,Unknown,Blunt Object,0,0,FBI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571934,571935,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2010,December,1,Murder or Manslaughter,...,Unknown,Male,49,Black,Unknown,Acquaintance,Blunt Object,0,0,FBI
571942,571943,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2010,December,9,Murder or Manslaughter,...,Unknown,Male,42,Black,Unknown,Acquaintance,Handgun,0,0,FBI
571946,571947,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2010,December,13,Murder or Manslaughter,...,Unknown,Male,19,Black,Unknown,Unknown,Handgun,0,2,FBI
571947,571948,MDBPD00,Baltimore,Municipal Police,Baltimore city,Maryland,2010,December,14,Murder or Manslaughter,...,Unknown,Male,21,Black,Unknown,Stranger,Handgun,0,1,FBI


In [29]:
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Histogram(x=q2p1, name = "Victim Age"), row=1, col=1)
fig.add_trace(go.Histogram(x=q2p2, name = "Perpetrator Age"), row=1, col=2)

fig.update_xaxes(title="Age", row=1, col=1)
fig.update_yaxes(title="Count", row=1, col=1)
fig.update_layout(title_text="Distribution of Victim and Perpetrator Ages in Maryland in 2010", width = 1200, height = 400)

fig.show()

In [38]:
mean1 = np.mean(q2p1)
mean2 = np.mean(q2p2)

se = np.sqrt(np.std(q2p1) ** 2/(len(q2p1)) + np.std(q2p2) ** 2/(len(q2p2)))

print("Victim Age Mean: " + str(np.round(mean1, 3)) + ", Perpetrator Age Mean: " + str(np.round(mean2, 3)) + ", Standard Error: " + str(np.round(se, 3)))
print("T-value: " + str(np.round((mean1-mean2)/se, 3)))

Victim Age Mean: 35.599, Perpetrator Age Mean: 30.387, Standard Error: 1.431
T-value: 3.642


Question 3

In [46]:
q3p1 = data[(data["Year"] == 2000) & (data["State"] == "Maryland")]
q3p2 = data[(data["Year"] == 2002) & (data["State"] == "Maryland")]

In [47]:
prop1 = len(q3p1[(q3p1["Perpetrator Race"] == "White") & (q3p1["Perpetrator Sex"] == "Male")])/len(q3p1)
prop2 = len(q3p2[(q3p2["Perpetrator Race"] == "White") & (q3p2["Perpetrator Sex"] == "Male")])/len(q3p2)

pprop  = (len(q3p1[(q3p1["Perpetrator Race"] == "White") & (q3p1["Perpetrator Sex"] == "Male")]) + len(q3p1[(q3p1["Perpetrator Race"] == "White") & (q3p1["Perpetrator Sex"] == "Male")]))/(len(q3p1) + len(q3p2))
se = (np.sqrt(pprop*(1-pprop)/len(q3p1)) + np.sqrt(pprop*(1-pprop)/len(q3p2)))

print("Before Proportion: " + str(np.round(prop1, 3)) + ", After Proportion: " + str(np.round(prop2, 3)) + ", Standard Error: " + str(np.round(se, 3)))
print("Z-value: " + str(np.round((prop1-prop2)/se, 3)))

Before Proportion: 0.214, After Proportion: 0.265, Standard Error: 0.053
Z-value: -0.962
