In [25]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [26]:
df = pd.read_csv('Cleaned_Crime.csv', low_memory=False)
df.head()

Unnamed: 0.1,Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,FBI Code,Year,Location
0,0,11556037,JC103643,01/03/2019 07:20:00 PM,0000X W RWY 27R,2890,PUBLIC PEACE VIOLATION,OTHER VIOLATION,AIRCRAFT,False,False,1654,16,26,2019,"(42.002816387, -87.90609433)"
1,1,11626027,JC188126,03/16/2019 05:58:00 PM,001XX N WELLS ST,460,BATTERY,SIMPLE,STREET,False,False,122,1,08B,2019,"(41.88336939, -87.633860272)"
2,2,11622422,JC183696,03/12/2019 10:00:00 PM,008XX E 38TH PL,820,THEFT,$500 AND UNDER,RESIDENTIAL YARD (FRONT/BACK),False,False,212,2,06,2019,"(41.825346902, -87.606780575)"
3,3,11625922,JC185669,03/14/2019 06:42:00 PM,074XX N PAULINA ST,460,BATTERY,SIMPLE,RESIDENCE,False,False,2422,24,08B,2019,"(42.016541612, -87.672499325)"
4,4,11622907,JC185406,03/14/2019 04:03:00 PM,008XX E 38TH PL,5002,OTHER OFFENSE,OTHER VEHICLE OFFENSE,STREET,False,True,212,2,26,2019,"(41.825298645, -87.6069609)"


In [27]:
df = df.drop(['Unnamed: 0','ID','Date','Block','IUCR','Primary Type','Description','Beat','Location','Case Number'], axis=1)
df.head()

Unnamed: 0,Location Description,Arrest,Domestic,District,FBI Code,Year
0,AIRCRAFT,False,False,16,26,2019
1,STREET,False,False,1,08B,2019
2,RESIDENTIAL YARD (FRONT/BACK),False,False,2,06,2019
3,RESIDENCE,False,False,24,08B,2019
4,STREET,False,True,2,26,2019


In [28]:
def crime_type(x):
    if x in ['01A','02','03','04A','04B']:
        return 'Violent crime'
    elif x in ['05','06','07','09']:
        return 'Property crime'
    elif x in ['01B','08A','08B','10','11','12','13','14','15','16','17','18','19','20','22','24','26']:
        return 'Less serious offense'

def arrest(x):
    if x:
        return 'Arrest'
    else:
        return 'No Arrest'

def domestic(x):
    if x:
        return 'Domestic'
    else:
        return 'Not Domestic'
    
def location_description(x):
    if 'VEHICLE' in x or x == 'AUTO' or 'TAXI' in x or x == 'TRUCK':
        return 'VEHICLE'
    elif 'CHA' in x and x != 'CURRENCY EXCHANGE':
        return 'CHICAGO HOUSING AUTHORITY'
    elif 'CTA' in x:
        return 'CHICAGO TRANSIT AUTHORITY'
    elif 'COLLEGE' in x:
        return 'COLLEGE'
    elif 'RESIDEN' in x or x == 'APARTMENT' or x == 'BASEMENT' or x == 'COACH HOUSE':
        return 'RESIDENTIAL'
    elif 'AIRPORT' in x:
        return 'AIRPORT'
    elif 'BARBERSHOP' in x or 'BARBER SHOP' in x:
        return 'BARBERSHOP'
    elif 'CHURCH' in x:
        return 'CHURCH'
    elif 'DRIVEWAY' in x:
        return 'DRIVEWAY'
    elif 'FACTORY' in x:
        return 'FACTORY'
    elif 'GARAGE' in x or 'PARKING LOT' in x:
        return 'GARAGE/PARKING LOT'
    elif 'GAS STATION' in x or 'CONVENIENCE STORE' in x:
        return 'GAS STATION/CONVENIENCE STORE'
    elif 'HIGHWAY' in x:
        return 'HIGHWAY'
    elif 'HOSPITAL' in x:
        return 'HOSPITAL'
    elif 'HOTEL' in x or 'MOTEL' in x:
        return 'HOTEL/MOTEL'
    elif 'LAKE' in x or 'RIVER' in x or x == 'LAGOON':
        return 'LAKEFRONT/WATERFRONT/RIVERBANK'
    elif 'MEDICAL' in x:
        return 'MEDICAL/DENTAL OFFICE'
    elif 'THEATER' in x:
        return 'THEATER'
    elif 'NURSING' in x:
        return 'NURSING HOME'
    elif 'OFFICE' in x:
        return 'OFFICE'
    elif 'JAIL' in x:
        return 'JAIL'
    elif 'POLICE' in x:
        return 'POLICE FACILITY/VEH PARKING LOT'
    elif 'POOL' in x:
        return 'POOLROOM'
    elif 'SCHOOL' in x:
        return 'SCHOOL'
    elif 'RAILROAD' in x:
        return 'RAILROAD PROPERTY'
    elif 'STADIUM' in x:
        return 'SPORTS ARENA/STADIUM'
    elif 'TAVERN' in x:
        return 'TAVERN/LIQUOR STORE'
    elif 'VACANT' in x:
        return 'VACANT LOT/LAND'
    elif 'WATERCRAFT' in x:
        return 'BOAT/WATERCRAFT'
    elif x == 'CREDIT UNION' or x == 'BANK':
        return 'BANK/CREDIT UNION'
    elif 'GOVERNMENT BUILDING' in x:
        return 'GOVERNMENT BUILDING'
    else:
        return x

In [29]:
df['FBI Code'] = df['FBI Code'].apply(crime_type)
df['Arrest'] = df['Arrest'].apply(arrest)
df['Domestic'] = df['Domestic'].apply(domestic)
df['Location Description'] = df['Location Description'].apply(location_description)

In [30]:
Domestic = pd.get_dummies(df['Domestic'],drop_first=False)
District = pd.get_dummies(df['District'],drop_first=True)
FBI_Code = pd.get_dummies(df['FBI Code'],drop_first=True)
Year = pd.get_dummies(df['Year'],drop_first=True)
Arrest = pd.get_dummies(df['Arrest'],drop_first=False)
Location = pd.get_dummies(df['Location Description'],drop_first=True)

In [31]:
df = pd.concat([Arrest,Domestic,District,FBI_Code,Location,Year],axis=1)

In [32]:
df.head()

Unnamed: 0,Arrest,No Arrest,Domestic,Not Domestic,2,3,4,5,6,7,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,1,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,1,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [33]:
frequent_itemsets = apriori(df, min_support=0.02, use_colnames=True, low_memory=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.274664,(Arrest)
1,0.725336,(No Arrest)
2,0.133542,(Domestic)
3,0.866458,(Not Domestic)
4,0.047203,(2)
...,...,...
233,0.020577,"(Not Domestic, 2003, Property crime)"
234,0.020044,"(Not Domestic, 2004, Property crime)"
235,0.022864,"(STREET, Not Domestic, Violent crime)"
236,0.083629,"(No Arrest, RESIDENTIAL, Not Domestic, Propert..."


In [34]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Arrest),(Not Domestic),0.274664,0.866458,0.248425,0.904470,1.043871,0.010441,1.397904
1,(Domestic),(No Arrest),0.133542,0.725336,0.107304,0.803517,1.107786,0.010441,1.397904
2,(No Arrest),(Not Domestic),0.725336,0.866458,0.618032,0.852064,0.983387,-0.010441,0.902701
3,(Not Domestic),(No Arrest),0.866458,0.725336,0.618032,0.713286,0.983387,-0.010441,0.957973
4,(2),(No Arrest),0.047203,0.725336,0.034057,0.721510,0.994725,-0.000181,0.986262
...,...,...,...,...,...,...,...,...,...
236,"(RESIDENTIAL, Not Domestic, Property crime)",(No Arrest),0.087016,0.725336,0.083629,0.961067,1.324996,0.020512,7.054863
237,"(RESIDENTIAL, Property crime)","(No Arrest, Not Domestic)",0.091413,0.618032,0.083629,0.914841,1.480248,0.027132,4.485361
238,"(No Arrest, STREET, Property crime)",(Not Domestic),0.089434,0.866458,0.088796,0.992872,1.145898,0.011306,18.734673
239,"(STREET, Not Domestic, Property crime)",(No Arrest),0.093264,0.725336,0.088796,0.952102,1.312636,0.021149,5.734347


In [47]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
rules[rules['lift'] >= 1.2].sort_values(by=['lift'])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
222,"(8, Property crime)",(No Arrest),0.022534,0.725336,0.020001,0.887596,1.223703,0.003656,2.443544
152,"(Not Domestic, Property crime)",(No Arrest),0.309823,0.725336,0.277516,0.895725,1.23491,0.05279,2.634032
22,(Property crime),(No Arrest),0.315753,0.725336,0.282956,0.896133,1.235473,0.05393,2.644375
162,(OTHER),"(No Arrest, Not Domestic)",0.037795,0.618032,0.029156,0.771439,1.248218,0.005798,1.671184
225,"(STREET, Property crime)",(No Arrest),0.093959,0.725336,0.089434,0.951837,1.312271,0.021282,5.702839
239,"(STREET, Not Domestic, Property crime)",(No Arrest),0.093264,0.725336,0.088796,0.952102,1.312636,0.021149,5.734347
224,"(RESIDENTIAL, Property crime)",(No Arrest),0.091413,0.725336,0.087673,0.959085,1.322263,0.021368,6.713055
236,"(RESIDENTIAL, Not Domestic, Property crime)",(No Arrest),0.087016,0.725336,0.083629,0.961067,1.324996,0.020512,7.054863
153,(Property crime),"(No Arrest, Not Domestic)",0.315753,0.618032,0.277516,0.878904,1.4221,0.082371,3.154247
237,"(RESIDENTIAL, Property crime)","(No Arrest, Not Domestic)",0.091413,0.618032,0.083629,0.914841,1.480248,0.027132,4.485361
