### Crime in the time of Corona - Modesto Data analysis of Domestic Violence data

In [33]:
## Add dependencies: Pandas
import pandas as pd
import os # needed to use the os.path.join method to load the files
from sqlalchemy import create_engine # for integrating with PostgreSQL
from config import db_password
import numpy as np
from scipy import stats
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt # for graphing with Matplotlib

#### Incidents: Load cleaned data and explore.

In [2]:
# Load the CSV Files into a dataframes.
all_incidents_df = pd.read_csv("../Resources/CleanedData/Cleaned_Incidents.csv")
all_incidents_df.head()

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber
0,MP20000019 - 10851 VC,MP20000019,"Jan 1, 2020, 8:52:18 AM","Jan 1, 2020, 8:52:18 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,920 PINE TREE LN,MODESTO,CALIFORNIA,95351.0,-121.0225978,37.61682379,0,0,0,,FELONY,2020-01-01,7
1,MP20000040 - 10851 VC,MP20000040,"Jan 1, 2020, 2:11:00 PM","Jan 1, 2020, 11:25:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,901 N CARPENTER RD,MO,,,-121.0309447,37.64774644,0,0,0,,FELONY,2020-01-01,7
2,MP20000062 - 10851 VC,MP20000062,"Jan 1, 2020, 3:28:00 PM","Jan 1, 2020, 3:20:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1956 EL SERENO ST,MO,,95358.0,-121.0224092,37.60376174,0,0,0,,FELONY,2020-01-01,7
3,MP20000069 - 10851 VC,MP20000069,"Jan 1, 2020, 6:57:00 PM","Jan 1, 2020, 4:30:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,3401 DALE RD,MO,,,-121.0529289,37.68949614,0,0,0,,FELONY,2020-01-01,7
4,MP20000084 - 10851 VC,MP20000084,"Jan 1, 2020, 10:20:00 PM","Jan 1, 2020, 7:00:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1808 USTICK RD,MO,,95358.0,-121.0114855,37.602862,0,0,0,,FELONY,2020-01-01,7


In [3]:
# Review 
all_incidents_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2186 entries, 0 to 2185
Data columns (total 19 columns):
CID                   2186 non-null object
Number                2186 non-null object
dateReported          2186 non-null object
startDate             2186 non-null object
offenseCode           2186 non-null object
offenseDescription    2186 non-null object
streetAddress         2182 non-null object
cityDescription       2138 non-null object
stateDescription      1532 non-null object
zipCode               1875 non-null object
longitude             2168 non-null object
latitude              2168 non-null object
Booked                2186 non-null int64
DAComplaint           2186 non-null int64
Cited                 2186 non-null int64
burglaryFactor        395 non-null object
felonyMisdemeanor     2186 non-null object
dateIncident          2186 non-null object
weekNumber            2186 non-null int64
dtypes: int64(4), object(15)
memory usage: 324.6+ KB


In [4]:
# convert the dateReported field date format 
all_incidents_df['dateReported']= pd.to_datetime(all_incidents_df['dateReported'])
all_incidents_df['dateReported'] = all_incidents_df['dateReported'].dt.normalize()
all_incidents_df.head()

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber
0,MP20000019 - 10851 VC,MP20000019,2020-01-01,"Jan 1, 2020, 8:52:18 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,920 PINE TREE LN,MODESTO,CALIFORNIA,95351.0,-121.0225978,37.61682379,0,0,0,,FELONY,2020-01-01,7
1,MP20000040 - 10851 VC,MP20000040,2020-01-01,"Jan 1, 2020, 11:25:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,901 N CARPENTER RD,MO,,,-121.0309447,37.64774644,0,0,0,,FELONY,2020-01-01,7
2,MP20000062 - 10851 VC,MP20000062,2020-01-01,"Jan 1, 2020, 3:20:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1956 EL SERENO ST,MO,,95358.0,-121.0224092,37.60376174,0,0,0,,FELONY,2020-01-01,7
3,MP20000069 - 10851 VC,MP20000069,2020-01-01,"Jan 1, 2020, 4:30:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,3401 DALE RD,MO,,,-121.0529289,37.68949614,0,0,0,,FELONY,2020-01-01,7
4,MP20000084 - 10851 VC,MP20000084,2020-01-01,"Jan 1, 2020, 7:00:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1808 USTICK RD,MO,,95358.0,-121.0114855,37.602862,0,0,0,,FELONY,2020-01-01,7


In [5]:
# Create a Shutdown marker around March 19th field which is week 18.
all_incidents_df["pre_Mar19"]=all_incidents_df["weekNumber"] <= 18
all_incidents_df.info() 
all_incidents_df.head()    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2186 entries, 0 to 2185
Data columns (total 20 columns):
CID                   2186 non-null object
Number                2186 non-null object
dateReported          2186 non-null datetime64[ns]
startDate             2186 non-null object
offenseCode           2186 non-null object
offenseDescription    2186 non-null object
streetAddress         2182 non-null object
cityDescription       2138 non-null object
stateDescription      1532 non-null object
zipCode               1875 non-null object
longitude             2168 non-null object
latitude              2168 non-null object
Booked                2186 non-null int64
DAComplaint           2186 non-null int64
Cited                 2186 non-null int64
burglaryFactor        395 non-null object
felonyMisdemeanor     2186 non-null object
dateIncident          2186 non-null object
weekNumber            2186 non-null int64
pre_Mar19             2186 non-null bool
dtypes: bool(1), datetime64[ns](

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber,pre_Mar19
0,MP20000019 - 10851 VC,MP20000019,2020-01-01,"Jan 1, 2020, 8:52:18 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,920 PINE TREE LN,MODESTO,CALIFORNIA,95351.0,-121.0225978,37.61682379,0,0,0,,FELONY,2020-01-01,7,True
1,MP20000040 - 10851 VC,MP20000040,2020-01-01,"Jan 1, 2020, 11:25:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,901 N CARPENTER RD,MO,,,-121.0309447,37.64774644,0,0,0,,FELONY,2020-01-01,7,True
2,MP20000062 - 10851 VC,MP20000062,2020-01-01,"Jan 1, 2020, 3:20:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1956 EL SERENO ST,MO,,95358.0,-121.0224092,37.60376174,0,0,0,,FELONY,2020-01-01,7,True
3,MP20000069 - 10851 VC,MP20000069,2020-01-01,"Jan 1, 2020, 4:30:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,3401 DALE RD,MO,,,-121.0529289,37.68949614,0,0,0,,FELONY,2020-01-01,7,True
4,MP20000084 - 10851 VC,MP20000084,2020-01-01,"Jan 1, 2020, 7:00:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1808 USTICK RD,MO,,95358.0,-121.0114855,37.602862,0,0,0,,FELONY,2020-01-01,7,True


In [6]:
# Drop weeks before week 7.  Summary view shows those weeks have backdated incidents which could skew data and are very small.
all_incidents_df = all_incidents_df[all_incidents_df["weekNumber"] > 7]
all_incidents_df

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber,pre_Mar19
18,MP20000452 - 10851 VC,MP20000452,2020-01-06,"Jan 5, 2020, 8:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,2332 DORA ST,MO,,95354,-120.9531072,37.6406858,0,0,0,,FELONY,2020-01-06,8,True
19,MP20000508 - 10851 VC,MP20000508,2020-01-06,"Jan 6, 2020, 3:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,3005 DEGAS LN,MO,,95355,-120.9384432,37.67011616,0,0,0,,FELONY,2020-01-06,8,True
20,MP20000655 - 10851 VC,MP20000655,2020-01-08,"Dec 23, 2019, 9:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1229 BRIGHTON AVE,MODESTO,CALIFORNIA,95355,-120.9735367,37.66048877,0,0,0,,FELONY,2020-01-08,8,True
21,MP20000677 - 10851 VC,MP20000677,2020-01-08,"Jan 8, 2020, 2:53:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,225 EL PASO AVE,MO,,95351,-120.9909686,37.61430697,0,0,0,,FELONY,2020-01-08,8,True
22,MP20000689 - 10851 VC,MP20000689,2020-01-09,"Jan 8, 2020, 11:30:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1714 MONTEREY AVE,MO,,95354,-120.9678945,37.62702549,0,0,0,,FELONY,2020-01-09,8,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2181,MP20011015 - 664 /460 (B) PC,MP20011015,2020-04-20,"Apr 20, 2020, 8:27:42 AM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,805 ROSEMORE AVE,MODESTO,CALIFORNIA,95358,-121.0403569,37.64626935,1,0,0,,FELONY,2020-04-20,23,False
2182,MP20011115 - 664 /460 (B) PC,MP20011115,2020-04-21,"Apr 21, 2020, 4:30:00 AM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,1507 9TH ST,MODESTO,CALIFORNIA,95354,-121.0071811,37.64462689,0,0,0,,FELONY,2020-04-21,23,False
2183,MP20011642 - 664 /460 (B) PC,MP20011642,2020-04-26,"Apr 25, 2020, 11:00:00 PM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,901 N CARPENTER RD,MODESTO,CALIFORNIA,,-121.0313248,37.64626722,0,0,0,NON-RESIDENCE - DAY (6AM - 6PM),FELONY,2020-04-26,23,False
2184,MP20011846 - 664 /460 (B) PC,MP20011846,2020-04-28,"Apr 27, 2020, 6:25:00 AM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,101 E GLENN AVE,MODESTO,CALIFORNIA,95358,-120.9928975,37.60282055,0,0,0,,FELONY,2020-04-28,24,False


In [7]:
# Drop week 24 due to incomplete data.
all_incidents_df = all_incidents_df[all_incidents_df["weekNumber"] < 24]
all_incidents_df

Unnamed: 0,CID,Number,dateReported,startDate,offenseCode,offenseDescription,streetAddress,cityDescription,stateDescription,zipCode,longitude,latitude,Booked,DAComplaint,Cited,burglaryFactor,felonyMisdemeanor,dateIncident,weekNumber,pre_Mar19
18,MP20000452 - 10851 VC,MP20000452,2020-01-06,"Jan 5, 2020, 8:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,2332 DORA ST,MO,,95354,-120.9531072,37.6406858,0,0,0,,FELONY,2020-01-06,8,True
19,MP20000508 - 10851 VC,MP20000508,2020-01-06,"Jan 6, 2020, 3:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,3005 DEGAS LN,MO,,95355,-120.9384432,37.67011616,0,0,0,,FELONY,2020-01-06,8,True
20,MP20000655 - 10851 VC,MP20000655,2020-01-08,"Dec 23, 2019, 9:00:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1229 BRIGHTON AVE,MODESTO,CALIFORNIA,95355,-120.9735367,37.66048877,0,0,0,,FELONY,2020-01-08,8,True
21,MP20000677 - 10851 VC,MP20000677,2020-01-08,"Jan 8, 2020, 2:53:00 AM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,225 EL PASO AVE,MO,,95351,-120.9909686,37.61430697,0,0,0,,FELONY,2020-01-08,8,True
22,MP20000689 - 10851 VC,MP20000689,2020-01-09,"Jan 8, 2020, 11:30:00 PM",10851 VC,10851 VC AUTO THEFT 07 Motor Vehicle Theft 44,1714 MONTEREY AVE,MO,,95354,-120.9678945,37.62702549,0,0,0,,FELONY,2020-01-09,8,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2180,MP20010680 - 664 /460 (B) PC,MP20010680,2020-04-16,"Apr 16, 2020, 5:48:05 PM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,145 CAMELLIA WAY,MODESTO,CALIFORNIA,95354,-120.9711297,37.6393954,0,0,1,RESIDENCE - DAY (6AM - 6PM),FELONY,2020-04-16,22,False
2181,MP20011015 - 664 /460 (B) PC,MP20011015,2020-04-20,"Apr 20, 2020, 8:27:42 AM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,805 ROSEMORE AVE,MODESTO,CALIFORNIA,95358,-121.0403569,37.64626935,1,0,0,,FELONY,2020-04-20,23,False
2182,MP20011115 - 664 /460 (B) PC,MP20011115,2020-04-21,"Apr 21, 2020, 4:30:00 AM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,1507 9TH ST,MODESTO,CALIFORNIA,95354,-121.0071811,37.64462689,0,0,0,,FELONY,2020-04-21,23,False
2183,MP20011642 - 664 /460 (B) PC,MP20011642,2020-04-26,"Apr 25, 2020, 11:00:00 PM",664 /460 (B) PC,664 /460 (B) PC ATTEMPTED BURGLARY:SECOND DEGR...,901 N CARPENTER RD,MODESTO,CALIFORNIA,,-121.0313248,37.64626722,0,0,0,NON-RESIDENCE - DAY (6AM - 6PM),FELONY,2020-04-26,23,False


In [8]:
# How Many weeks are we working with
all_incidents_df['weekNumber'].value_counts()
#all_incidents_df.sort_values(by=['weekNumber'])
#sort_values(by=['col1'])
#sort_values(by='col1', ascending=False)

23    157
19    150
10    144
18    143
22    137
14    132
16    130
17    128
15    128
9     128
21    116
13    112
11    112
20    110
12    103
8      99
Name: weekNumber, dtype: int64

In [9]:
# Generate a  Aggravated Assault field. Codes include 243,244, 245,273 
all_incidents_df["agg_assault"]=all_incidents_df["offenseCode"].str.contains('243|244|245|273')
all_incidents_df['agg_assault'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


False    1480
True      549
Name: agg_assault, dtype: int64

In [10]:
# Generate a Battery field. Code 243
all_incidents_df["battery"]=all_incidents_df["offenseCode"].str.contains('243')
all_incidents_df['battery'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


False    1798
True      231
Name: battery, dtype: int64

In [11]:
# Generate an Assault with Deadly Weapons field. Code 245
all_incidents_df["deadly"]=all_incidents_df["offenseCode"].str.contains('245')
all_incidents_df['deadly'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


False    1879
True      150
Name: deadly, dtype: int64

In [12]:
# Generate a Domestice Violence field. Code 273
all_incidents_df["dom_viol"]=all_incidents_df["offenseCode"].str.contains('273')
all_incidents_df['dom_viol'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


False    1862
True      167
Name: dom_viol, dtype: int64

In [13]:
# Filter the dataframe
filtered_violence_df = all_incidents_df[["CID","weekNumber","pre_Mar19","agg_assault","dom_viol","battery","deadly"]].copy() 
filtered_violence_df.head()                                  

Unnamed: 0,CID,weekNumber,pre_Mar19,agg_assault,dom_viol,battery,deadly
18,MP20000452 - 10851 VC,8,True,False,False,False,False
19,MP20000508 - 10851 VC,8,True,False,False,False,False
20,MP20000655 - 10851 VC,8,True,False,False,False,False
21,MP20000677 - 10851 VC,8,True,False,False,False,False
22,MP20000689 - 10851 VC,8,True,False,False,False,False


#### Compare overall crime before and after the shutdown.

In [14]:
#Quick data review
all_incidents_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2029 entries, 18 to 2185
Data columns (total 24 columns):
CID                   2029 non-null object
Number                2029 non-null object
dateReported          2029 non-null datetime64[ns]
startDate             2029 non-null object
offenseCode           2029 non-null object
offenseDescription    2029 non-null object
streetAddress         2026 non-null object
cityDescription       1983 non-null object
stateDescription      1418 non-null object
zipCode               1742 non-null object
longitude             2013 non-null object
latitude              2013 non-null object
Booked                2029 non-null int64
DAComplaint           2029 non-null int64
Cited                 2029 non-null int64
burglaryFactor        359 non-null object
felonyMisdemeanor     2029 non-null object
dateIncident          2029 non-null object
weekNumber            2029 non-null int64
pre_Mar19             2029 non-null bool
agg_assault           2029 non-

In [15]:
# Get the means for reporting
mx =all_incidents_df[all_incidents_df.pre_Mar19 == True].groupby("weekNumber")["CID"].count().mean()
my =all_incidents_df[all_incidents_df.pre_Mar19 == False].groupby("weekNumber")["CID"].count().mean()
print(mx,my)

123.54545454545455 134.0


In [16]:
# Generate the x and y for t-test. Run t-test
x =all_incidents_df[all_incidents_df.pre_Mar19 == True].groupby("weekNumber")["CID"].count()
y =all_incidents_df[all_incidents_df.pre_Mar19 == False].groupby("weekNumber")["CID"].count()
stats.ttest_ind(x,y)

Ttest_indResult(statistic=-1.1532733515844291, pvalue=0.26810856349369866)

#### Compare aggravated assualt before and after the shutdown.

In [17]:
# Reduce overall DF to only aggravated assualt
agg_only_df = filtered_violence_df[(filtered_violence_df['agg_assault']==True)] 
agg_only_df.info()
agg_only_df.head()
agg_only_df

<class 'pandas.core.frame.DataFrame'>
Int64Index: 549 entries, 514 to 1103
Data columns (total 7 columns):
CID            549 non-null object
weekNumber     549 non-null int64
pre_Mar19      549 non-null bool
agg_assault    549 non-null bool
dom_viol       549 non-null bool
battery        549 non-null bool
deadly         549 non-null bool
dtypes: bool(5), int64(1), object(1)
memory usage: 15.5+ KB


Unnamed: 0,CID,weekNumber,pre_Mar19,agg_assault,dom_viol,battery,deadly
514,MP20000780 - 243 (D) PC,8,True,True,False,True,False
515,MP20000949 - 243 (D) PC,8,True,True,False,True,False
516,MP20002392 - 243 (D) PC,10,True,True,False,True,False
517,MP20002639 - 243 (D) PC,11,True,True,False,True,False
518,MP20003162 - 243 (D) PC,11,True,True,False,True,False
...,...,...,...,...,...,...,...
1099,MP20011524 - 273.5 (A) PC,23,False,True,True,False,False
1100,MP20011584 - 273.5 (A) PC,23,False,True,True,False,False
1101,MP20011609 - 273.5 (A) PC,23,False,True,True,False,False
1102,MP20011629 - 273.5 (A) PC,23,False,True,True,False,False


In [18]:
# Get the means for reporting
mx =agg_only_df[agg_only_df.pre_Mar19 == True].groupby(["weekNumber"])["CID"].count().mean()
my =agg_only_df[agg_only_df.pre_Mar19 == False].groupby(["weekNumber"])["CID"].count().mean()
print(mx,my)

31.545454545454547 40.4


In [19]:
# Generate the x and y for t-test. Run t-test
x =agg_only_df[agg_only_df.pre_Mar19 == True].groupby(["weekNumber"])["CID"].count()
y =agg_only_df[agg_only_df.pre_Mar19 == False].groupby(["weekNumber"])["CID"].count()
stats.ttest_ind(x,y)

Ttest_indResult(statistic=-3.4129339672020262, pvalue=0.004203348009218367)

#### Compare domestic violence before and after the shutdown.

In [20]:
# Create a Domestice Violence only dataframe
dom_only_df = filtered_violence_df[(filtered_violence_df['dom_viol']==True)] 
dom_only_df.info()
dom_only_df.head()
dom_only_df
# Leaves 167 cases

<class 'pandas.core.frame.DataFrame'>
Int64Index: 167 entries, 937 to 1103
Data columns (total 7 columns):
CID            167 non-null object
weekNumber     167 non-null int64
pre_Mar19      167 non-null bool
agg_assault    167 non-null bool
dom_viol       167 non-null bool
battery        167 non-null bool
deadly         167 non-null bool
dtypes: bool(5), int64(1), object(1)
memory usage: 4.7+ KB


Unnamed: 0,CID,weekNumber,pre_Mar19,agg_assault,dom_viol,battery,deadly
937,MP20000624 - 273.5 (A) PC,8,True,True,True,False,False
938,MP20000649 - 273.5 (A) PC,8,True,True,True,False,False
939,MP20000688 - 273.5 (A) PC,8,True,True,True,False,False
940,MP20000769 - 273.5 (A) PC,8,True,True,True,False,False
941,MP20000947 - 273.5 (A) PC,8,True,True,True,False,False
...,...,...,...,...,...,...,...
1099,MP20011524 - 273.5 (A) PC,23,False,True,True,False,False
1100,MP20011584 - 273.5 (A) PC,23,False,True,True,False,False
1101,MP20011609 - 273.5 (A) PC,23,False,True,True,False,False
1102,MP20011629 - 273.5 (A) PC,23,False,True,True,False,False


In [21]:
# Get the means for reporting
x =dom_only_df[dom_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
y =dom_only_df[dom_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
print(x,y)

9.818181818181818 11.8


In [22]:
# Generate the x and y for t-test. Run t-test
x =dom_only_df[dom_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
y =dom_only_df[dom_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
stats.ttest_ind(x,y)

Ttest_indResult(statistic=-0.9345102416780008, pvalue=0.365880251385987)

#### Compare battery before and after the shutdown.

In [23]:
# Create Battery only dataframe
bat_only_df = filtered_violence_df[(filtered_violence_df['battery']==True)] 
bat_only_df.info()
bat_only_df.head()
bat_only_df
# Leaves 231 cases

<class 'pandas.core.frame.DataFrame'>
Int64Index: 231 entries, 514 to 767
Data columns (total 7 columns):
CID            231 non-null object
weekNumber     231 non-null int64
pre_Mar19      231 non-null bool
agg_assault    231 non-null bool
dom_viol       231 non-null bool
battery        231 non-null bool
deadly         231 non-null bool
dtypes: bool(5), int64(1), object(1)
memory usage: 6.5+ KB


Unnamed: 0,CID,weekNumber,pre_Mar19,agg_assault,dom_viol,battery,deadly
514,MP20000780 - 243 (D) PC,8,True,True,False,True,False
515,MP20000949 - 243 (D) PC,8,True,True,False,True,False
516,MP20002392 - 243 (D) PC,10,True,True,False,True,False
517,MP20002639 - 243 (D) PC,11,True,True,False,True,False
518,MP20003162 - 243 (D) PC,11,True,True,False,True,False
...,...,...,...,...,...,...,...
763,MP20003729 - 243.4 (A) PC,12,True,True,False,True,False
764,MP20006567 - 243.4 (A) PC,16,True,True,False,True,False
765,MP20007605 - 243.4 (A) PC,17,True,True,False,True,False
766,MP20010489 - 243.4 (A) PC,22,False,True,False,True,False


In [24]:
# Get the means for reporting
x =bat_only_df[bat_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
y =bat_only_df[bat_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
print(x,y)

13.272727272727273 17.0


In [25]:
# Generate the x and y for t-test. Run t-test
x =bat_only_df[bat_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
y =bat_only_df[bat_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
stats.ttest_ind(x,y)

Ttest_indResult(statistic=-2.4859901566224125, pvalue=0.026164959739710298)

#### Compare assault with deadly weapon before and after the shutdown.

In [26]:
# Create Deadly Weapon only dataframe
dw_only_df = filtered_violence_df[(filtered_violence_df['deadly']==True)] 
dw_only_df.info()
dw_only_df.head()
dw_only_df
# Leaves 150 cases

<class 'pandas.core.frame.DataFrame'>
Int64Index: 150 entries, 774 to 927
Data columns (total 7 columns):
CID            150 non-null object
weekNumber     150 non-null int64
pre_Mar19      150 non-null bool
agg_assault    150 non-null bool
dom_viol       150 non-null bool
battery        150 non-null bool
deadly         150 non-null bool
dtypes: bool(5), int64(1), object(1)
memory usage: 4.2+ KB


Unnamed: 0,CID,weekNumber,pre_Mar19,agg_assault,dom_viol,battery,deadly
774,MP20000690 - 245 (A)(1) PC,8,True,True,False,False,True
775,MP20000795 - 245 (A)(1) PC,8,True,True,False,False,True
776,MP20000964 - 245 (A)(1) PC,8,True,True,False,False,True
777,MP20001034 - 245 (A)(1) PC,8,True,True,False,False,True
778,MP20001556 - 245 (A)(1) PC,9,True,True,False,False,True
...,...,...,...,...,...,...,...
923,MP20010905 - 245 (A)(4) PC,22,False,True,False,False,True
924,MP20011486 - 245 (A)(4) PC,23,False,True,False,False,True
925,MP20011524 - 245 (A)(4) PC,23,False,True,False,False,True
926,MP20011584 - 245 (A)(4) PC,23,False,True,False,False,True


In [75]:
# Get the means for reporting
x =dw_only_df[dw_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
y =dw_only_df[dw_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count().mean()
print(x,y)

8.363636363636363 11.6


In [76]:
# Generate the x and y for t-test. Run t-test
x =dw_only_df[dw_only_df.pre_Mar19 == True].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
y =dw_only_df[dw_only_df.pre_Mar19 == False].groupby(["weekNumber", "pre_Mar19"])["CID"].count()
stats.ttest_ind(x,y)

Ttest_indResult(statistic=-1.822572607118714, pvalue=0.08979251697417426)