In [1]:
'''
# Example 2 for genrules
# Using NIBRS dummy data
# See https://github.com/apwheele/apwheele.github.io/tree/master/MathPosts/association_rules
# For explanation behind NIBRS data
# If you want to read original NIRBS data
# use https://dl.dropbox.com/sh/puws33uebzt9ckd/AACL3wBhZDr3P_ZbsbUxltERa/NIBRS_2012.csv?dl=0
'''

import pandas as pd
from src import genrules

# If you run multiple times, please download the data locally
# Dropbox will cut you/me off eventually
#ndum = pd.read_csv(r'D:\Dropbox\Dropbox\Documents\BLOG\Posted_Python\association_rules\NIBRS_DummyDat.csv')
ndum = pd.read_csv('https://dl.dropbox.com/sh/puws33uebzt9ckd/AADVM86qPJVqP4RHWkWfGBzpa/NIBRS_DummyDat.csv?dl=0')
ndum.head()

Unnamed: 0,ass_Argument,ass_OtherFelony,ass_LoversQuarrelAssault,ass_LEO_Assault,ass_DrugGangAssault,ass_NegManslaughter,ass_JustHomicide,rel_Family,rel_Known,ucr_Larceny,...,drug_AlcoholUse,drug_DrugUse,drug_ComputerUse,weap_Fists,weap_OtherWeap,weap_Firearm,weap_MVWeap,weap_Knife,weap_NoWeap,weap_BluntObject
0,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
# Aggregating (genrules algo can take weights)
group_vars = list(ndum)
ndum['weight'] = 1
ndum_agg = ndum.groupby(group_vars, as_index=False).sum() # sums the weight variable

print(ndum_agg.shape)
ndum_agg.describe().T

(7099, 35)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ass_Argument,7099.0,0.211297,0.408258,0.0,0.0,0.0,0.0,1.0
ass_OtherFelony,7099.0,0.200873,0.400682,0.0,0.0,0.0,0.0,1.0
ass_LoversQuarrelAssault,7099.0,0.091421,0.288228,0.0,0.0,0.0,0.0,1.0
ass_LEO_Assault,7099.0,0.071419,0.257541,0.0,0.0,0.0,0.0,1.0
ass_DrugGangAssault,7099.0,0.03902,0.193655,0.0,0.0,0.0,0.0,1.0
ass_NegManslaughter,7099.0,0.005071,0.071036,0.0,0.0,0.0,0.0,1.0
ass_JustHomicide,7099.0,0.001127,0.033553,0.0,0.0,0.0,0.0,1.0
rel_Family,7099.0,0.268066,0.442983,0.0,0.0,0.0,1.0,1.0
rel_Known,7099.0,0.454148,0.497928,0.0,0.0,0.0,1.0,1.0
ucr_Larceny,7099.0,0.121989,0.327296,0.0,0.0,0.0,0.0,1.0


In [3]:
# The genrules algorithm put 0's for these dummy variables as none
# It will not include None in the rules

y_var = 'ass_LEO_Assault'
x_vars = group_vars.copy() #[0:7]
x_vars.remove(y_var)

print(ndum_agg[[y_var]].head())

ndum_agg.replace([0],[None], inplace=True)
ndum_agg[[y_var]].head()

   ass_LEO_Assault
0                0
1                0
2                0
3                0
4                0


Unnamed: 0,ass_LEO_Assault
0,
1,
2,
3,
4,


In [4]:
# Now conduct the ge algorithm

# Set up object to search through all 3 pairs
ge = genrules.genrules(data=ndum_agg,y_var=y_var,x_vars=x_vars,w_var='weight',k=3)

# No evolutions, just check the pop (takes about 2 minutes)
ge.evolve(rep=0)

Creating initial pop, starting at 2021-12-13 14:16:30.784037
Total N of initial population 40989 (finished @ 2021-12-13 14:16:50.715576)

Creating initial leaderboard @ 2021-12-13 14:16:50.715576
Initial candidates added to leaderboard 15
Finished Initial leaderboard @ 2021-12-13 14:18:52.677938


In [5]:
# We can check out the top rules in the current leaderboard
tb = ge.leaderboard
tb[['relrisk','pval','tot_n','out_n','label']]

Unnamed: 0,relrisk,pval,tot_n,out_n,label
0,5.883202,0.0,288,101,"{'ucr_Drug': 1, 'weap_MVWeap': 1}"
1,6.058401,0.0,280,101,"{'ucr_Assault': 1, 'ucr_Drug': 1, 'weap_MVWeap..."
2,5.248131,0.0,114,40,"{'ucr_Fraud_StolenProp': 1, 'weap_MVWeap': 1}"
3,5.392287,0.0,111,40,"{'ucr_Fraud_StolenProp': 1, 'ucr_Assault': 1, ..."
4,3.180358,1.31446e-07,72,16,"{'weap_Firearm': 1, 'weap_MVWeap': 1}"
5,3.015784,4.316636e-10,125,26,"{'ucr_MVTheft': 1, 'weap_MVWeap': 1}"
6,3.22561,8.885315e-08,71,16,"{'ucr_Assault': 1, 'weap_Firearm': 1, 'weap_MV..."
7,3.162738,4.124483e-06,54,12,"{'ucr_WeaponViol': 1, 'weap_Firearm': 1, 'weap..."
8,3.143694,7.944445e-11,120,26,"{'ucr_MVTheft': 1, 'ucr_Assault': 1, 'weap_MVW..."
9,3.127846,9.066082e-09,92,20,"{'rel_Known': 1, 'ucr_Drug': 1, 'weap_MVWeap': 1}"
