## Independent T test 

In [1]:
# Import Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Converting CSV into DataFrame and dropping useless column
combined = pd.read_csv("Resources/total_df.csv")
combined = combined.drop(columns=["Unnamed: 0", "Classification"])

# Creating a new DataFrame with only state data
state_filtered = combined[combined["County"] == "state"]
state_filtered

Unnamed: 0,Year,County,Total Population,Total Crime,Orig_DF
160,2012,state,6817770,301233,Violence + Murder
161,2013,state,6882400,324057,Violence + Murder
162,2014,state,6968170,349880,Violence + Murder
163,2015,state,7061410,362040,Violence + Murder
164,2016,state,7183700,415368,Violence + Murder
360,2012,state,6817770,148982,Robbery
361,2013,state,6882400,166443,Robbery
362,2014,state,6968170,186992,Robbery
363,2015,state,7061410,190404,Robbery
364,2016,state,7183700,220813,Robbery


## Overall Crimes

In [2]:
# Creating a DataFrame with only data from overall crimes
# and using the year column as an index
overall_df = state_filtered[state_filtered["Orig_DF"] == "Overall"]
overall_df.set_index('Year', inplace=True)

# T test on overall crimes
overall_pre = overall_df["Total Crime"].loc[2012:2014]
overall_after = overall_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(overall_pre, overall_after, equal_var=False)

Ttest_indResult(statistic=-2.042338232262239, pvalue=0.1172629749169466)

## Violence + Murder Related Crimes

In [3]:
# Creating a DataFrame with only data from violence + murder crimes
# and using the year column as an index
viol_df = state_filtered[state_filtered["Orig_DF"] == "Violence + Murder"]
viol_df.set_index('Year', inplace=True)

# T test on violence + murder related crimes
viol_pre = viol_df["Total Crime"].loc[2012:2014]
viol_after = viol_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(viol_pre, viol_after, equal_var=False)

Ttest_indResult(statistic=-2.0667450870816424, pvalue=0.11585096393346576)

## Robbery Related Crimes

In [4]:
# Creating a DataFrame with only data from robbery related crimes
# and using the year column as an index
rob_df = state_filtered[state_filtered["Orig_DF"] == "Robbery"]
rob_df.set_index('Year', inplace=True)

# T test on robbery related crimes
rob_pre = rob_df["Total Crime"].loc[2012:2014]
rob_after = rob_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(rob_pre, rob_after, equal_var=False)

Ttest_indResult(statistic=-2.0775028640323834, pvalue=0.10635536485330326)

## Property Related Crimes

In [5]:
# Creating a DataFrame with only data from property related crimes
# and using the year column as an index
prop_df = state_filtered[state_filtered["Orig_DF"] == "Properties"]
prop_df.set_index('Year', inplace=True)

# T test on property related crimes
prop_pre = prop_df["Total Crime"].loc[2012:2014]
prop_after = prop_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(prop_pre, prop_after, equal_var=False)

Ttest_indResult(statistic=-2.014294752520673, pvalue=0.12357132482811234)

## Drug Related Crimes

In [6]:
# Creating a DataFrame with only data from drug related crimes
# and using the year column as an index
drug_df = state_filtered[state_filtered["Orig_DF"] == "Drug"]
drug_df.set_index('Year', inplace=True)

# T test on drug related crimes
drug_pre = drug_df["Total Crime"].loc[2012:2014]
drug_after = drug_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(drug_pre, drug_after, equal_var=False)

Ttest_indResult(statistic=-1.104626934986356, pvalue=0.35841707947389007)

## Sexual Related Crimes

In [7]:
# Creating a DataFrame with only data from sexual related crimes
# and using the year column as an index
sex_df = state_filtered[state_filtered["Orig_DF"] == "Sexual"]
sex_df.set_index('Year', inplace=True)

# T test on sexual related crimes
sex_pre = sex_df["Total Crime"].loc[2012:2014]
sex_after = sex_df["Total Crime"].loc[2014:2016]

stats.ttest_ind(sex_pre, sex_after, equal_var=False)

Ttest_indResult(statistic=-2.0018060269651468, pvalue=0.14351855153342058)