Import Libraries/ Read Data from GitHub



In [14]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# upload NYC Crime data for 2020
url = 'https://raw.githubusercontent.com/duketran1996/NYC-Crime/main/clean-dataset/nypd_arrest_data_clean_2020.csv'
df = pd.read_csv(url)

In [15]:
#upload NYC Census Data

url1 = 'https://raw.githubusercontent.com/duketran1996/NYC-Crime/main/association_rule/nyc_population_census_2019.csv'
df_pop = pd.read_csv(url1)

In [None]:
 df.columns
# df.shape

Index(['Unnamed: 0', 'ARREST_KEY', 'ARREST_DATE', 'PD_CD', 'PD_DESC', 'KY_CD',
       'OFNS_DESC', 'LAW_CODE', 'LAW_CAT_CD', 'ARREST_BORO', 'ARREST_PRECINCT',
       'JURISDICTION_CODE', 'AGE_GROUP', 'PERP_SEX', 'PERP_RACE', 'Latitude',
       'Longitude', 'New Georeferenced Column'],
      dtype='object')

Group Datasets by Borough and Race

In [16]:
df_crime_race_dist = df.groupby(['ARREST_BORO','PERP_RACE'])['ARREST_KEY'].count()
df_crime_race_dist = df_crime_race_dist.to_frame()

In [17]:
df_pop_race_dist = df_pop.groupby(['BOROUGH','RACE'])['POPULATION'].sum()
df_pop_race_dist = df_pop_race_dist.to_frame()

Join Datasets of Crime and Population to find normalised rate of crime by race in every borough

In [18]:
df_joined = pd.concat([df_crime_race_dist, df_pop_race_dist], axis=1, join="inner")


### **Normalise Crime Rate of Race by Population for each Borough**

---



---



In [19]:
normalise_race_of_crime = ((df_joined['ARREST_KEY']/df_joined['POPULATION'])*100)

In [20]:
normalise_race_of_crime

Bronx          AMERICAN INDIAN/ALASKAN NATIVE    0.145886
               BLACK                             2.498957
               WHITE                             0.181405
Brooklyn       AMERICAN INDIAN/ALASKAN NATIVE    0.408004
               BLACK                             2.613589
               WHITE                             0.395582
Manhattan      AMERICAN INDIAN/ALASKAN NATIVE    0.332583
               BLACK                             5.791108
               WHITE                             0.386982
Queens         AMERICAN INDIAN/ALASKAN NATIVE    0.477816
               BLACK                             2.555573
               WHITE                             0.319768
Staten Island  AMERICAN INDIAN/ALASKAN NATIVE    0.510051
               BLACK                             4.394112
               WHITE                             0.578191
dtype: float64

**Observation: ***
Normalised data shows black has much higher rate of crime per borough. 

But in my opinion this doesnot tell much as the data can be skewed. 

There can be 100 Black individuals arrested for 11923 crimes committed in Queens. While there could be 3445	white individual arrested for 3445 crimes in Queens.

### **ASSOCIATION RULES**

---



---



Based on suggestions from: https://pbpython.com/market-basket-analysis.html

Function to Onehot encode occurence count of offense.

In [None]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

Finding association of offenses likely to occur together in **Manhattan** on a given day.

In [None]:
basket_man = (df[df['ARREST_BORO'] =="Manhattan"]
          .groupby(['ARREST_DATE', 'OFNS_DESC'])['ARREST_KEY'].count().unstack().reset_index().fillna(0).set_index('ARREST_DATE'))
basket_man

OFNS_DESC,ADMINISTRATIVE CODE,AGRICULTURE & MRKTS LAW-UNCLASSIFIED,ALCOHOLIC BEVERAGE CONTROL LAW,ANTICIPATORY OFFENSES,ARSON,ASSAULT 3 & RELATED OFFENSES,BURGLAR'S TOOLS,BURGLARY,CHILD ABANDONMENT/NON SUPPORT,CRIMINAL MISCHIEF & RELATED OFFENSES,CRIMINAL TRESPASS,DANGEROUS DRUGS,DANGEROUS WEAPONS,DISORDERLY CONDUCT,ENDAN WELFARE INCOMP,ESCAPE 3,FELONY ASSAULT,FOR OTHER AUTHORITIES,FORGERY,FRAUDS,FRAUDULENT ACCOSTING,GAMBLING,GRAND LARCENY,GRAND LARCENY OF MOTOR VEHICLE,HARRASSMENT 2,"HOMICIDE-NEGLIGENT,UNCLASSIFIE",HOMICIDE-NEGLIGENT-VEHICLE,INTOXICATED AND IMPAIRED DRIVING,JOSTLING,KIDNAPPING AND RELATED OFFENSES,"LOITERING/GAMBLING (CARDS, DIC",MISCELLANEOUS PENAL LAW,MOVING INFRACTIONS,MURDER & NON-NEGL. MANSLAUGHTE,NEW YORK CITY HEALTH CODE,NYS LAWS-UNCLASSIFIED FELONY,OFF. AGNST PUB ORD SENSBLTY &,OFFENSES AGAINST PUBLIC ADMINISTRATION,OFFENSES AGAINST PUBLIC SAFETY,OFFENSES AGAINST THE PERSON,OFFENSES INVOLVING FRAUD,OFFENSES RELATED TO CHILDREN,OTHER OFFENSES RELATED TO THEF,OTHER STATE LAWS,OTHER STATE LAWS (NON PENAL LAW),OTHER TRAFFIC INFRACTION,PETIT LARCENY,POSSESSION OF STOLEN PROPERTY,PROSTITUTION & RELATED OFFENSES,RAPE,ROBBERY,SEX CRIMES,THEFT OF SERVICES,THEFT-FRAUD,UNAUTHORIZED USE OF A VEHICLE,VEHICLE AND TRAFFIC LAWS
ARREST_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1
01/01/2020,0.0,0.0,0.0,0.0,0.0,22.0,0.0,4.0,0.0,10.0,2.0,4.0,4.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,2.0,1.0,8.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,2.0,5.0,1.0,0.0,0.0,3.0,1.0,1.0,0.0,0.0,2.0
01/02/2020,0.0,0.0,0.0,2.0,0.0,23.0,2.0,2.0,0.0,36.0,9.0,17.0,4.0,0.0,0.0,0.0,6.0,3.0,1.0,1.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,3.0,8.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,2.0,17.0,2.0,0.0,0.0,10.0,1.0,4.0,0.0,0.0,0.0
01/03/2020,0.0,0.0,0.0,0.0,0.0,11.0,0.0,3.0,0.0,2.0,4.0,8.0,3.0,0.0,0.0,0.0,4.0,0.0,2.0,1.0,3.0,0.0,6.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,5.0,0.0,1.0,0.0,0.0,1.0,7.0,0.0,0.0,0.0,0.0,4.0,1.0,0.0,1.0,11.0,0.0,4.0,0.0,2.0,1.0,1.0,0.0,0.0,5.0
01/04/2020,0.0,0.0,3.0,0.0,0.0,13.0,0.0,7.0,0.0,5.0,5.0,8.0,1.0,0.0,0.0,0.0,7.0,0.0,1.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,5.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,26.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
01/05/2020,0.0,0.0,0.0,0.0,0.0,11.0,3.0,2.0,1.0,1.0,4.0,7.0,1.0,0.0,0.0,0.0,7.0,0.0,1.0,1.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,8.0,0.0,1.0,2.0,0.0,4.0,0.0,0.0,1.0,13.0,0.0,0.0,0.0,9.0,1.0,0.0,0.0,0.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12/27/2020,0.0,0.0,0.0,0.0,0.0,10.0,0.0,13.0,0.0,13.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,4.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,3.0
12/28/2020,0.0,0.0,0.0,0.0,0.0,8.0,0.0,6.0,0.0,4.0,1.0,10.0,1.0,0.0,0.0,0.0,13.0,3.0,0.0,1.0,0.0,0.0,22.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0
12/29/2020,0.0,0.0,0.0,0.0,0.0,8.0,1.0,41.0,0.0,2.0,1.0,13.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,1.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,9.0,0.0,2.0,0.0,0.0,1.0,7.0,0.0,1.0,1.0,0.0,2.0,0.0,0.0,1.0,8.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,2.0
12/30/2020,0.0,0.0,0.0,0.0,0.0,15.0,0.0,5.0,0.0,5.0,1.0,13.0,4.0,0.0,0.0,0.0,5.0,2.0,5.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,3.0,4.0,0.0,1.0,4.0,0.0,0.0,0.0,0.0,1.0,19.0,1.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,2.0


There are a lot of zeros in the data but we also need to make sure any positive values are converted to a 1 and anything less the 0 is set to 0. This step will complete the one hot encoding of the data

In [None]:
basket_sets_man = basket_man.applymap(encode_units)
basket_sets_man 

OFNS_DESC,ADMINISTRATIVE CODE,AGRICULTURE & MRKTS LAW-UNCLASSIFIED,ALCOHOLIC BEVERAGE CONTROL LAW,ANTICIPATORY OFFENSES,ARSON,ASSAULT 3 & RELATED OFFENSES,BURGLAR'S TOOLS,BURGLARY,CHILD ABANDONMENT/NON SUPPORT,CRIMINAL MISCHIEF & RELATED OFFENSES,CRIMINAL TRESPASS,DANGEROUS DRUGS,DANGEROUS WEAPONS,DISORDERLY CONDUCT,ENDAN WELFARE INCOMP,ESCAPE 3,FELONY ASSAULT,FOR OTHER AUTHORITIES,FORGERY,FRAUDS,FRAUDULENT ACCOSTING,GAMBLING,GRAND LARCENY,GRAND LARCENY OF MOTOR VEHICLE,HARRASSMENT 2,"HOMICIDE-NEGLIGENT,UNCLASSIFIE",HOMICIDE-NEGLIGENT-VEHICLE,INTOXICATED AND IMPAIRED DRIVING,JOSTLING,KIDNAPPING AND RELATED OFFENSES,"LOITERING/GAMBLING (CARDS, DIC",MISCELLANEOUS PENAL LAW,MOVING INFRACTIONS,MURDER & NON-NEGL. MANSLAUGHTE,NEW YORK CITY HEALTH CODE,NYS LAWS-UNCLASSIFIED FELONY,OFF. AGNST PUB ORD SENSBLTY &,OFFENSES AGAINST PUBLIC ADMINISTRATION,OFFENSES AGAINST PUBLIC SAFETY,OFFENSES AGAINST THE PERSON,OFFENSES INVOLVING FRAUD,OFFENSES RELATED TO CHILDREN,OTHER OFFENSES RELATED TO THEF,OTHER STATE LAWS,OTHER STATE LAWS (NON PENAL LAW),OTHER TRAFFIC INFRACTION,PETIT LARCENY,POSSESSION OF STOLEN PROPERTY,PROSTITUTION & RELATED OFFENSES,RAPE,ROBBERY,SEX CRIMES,THEFT OF SERVICES,THEFT-FRAUD,UNAUTHORIZED USE OF A VEHICLE,VEHICLE AND TRAFFIC LAWS
ARREST_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1
01/01/2020,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,1
01/02/2020,0,0,0,1,0,1,1,1,0,1,1,1,1,0,0,0,1,1,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,0,1,1,0,1,1,1,0,0,1,1,1,0,0,0
01/03/2020,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,1,1,0,1,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,1
01/04/2020,0,0,1,0,0,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,1
01/05/2020,0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12/27/2020,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,0,0,1
12/28/2020,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1
12/29/2020,0,0,0,0,0,1,1,1,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,0,1,0,0,1,1,1,0,0,1,1,0,0,0,1
12/30/2020,0,0,0,0,0,1,0,1,0,1,1,1,1,0,0,0,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1


In [None]:
frequent_itemsets_man = apriori(basket_sets_man, min_support=0.4, use_colnames=True)

In [None]:
rules_man = association_rules(frequent_itemsets_man, metric="lift", min_threshold=1)
rules_man.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ASSAULT 3 & RELATED OFFENSES),(BURGLARY),1.0,0.934426,0.934426,0.934426,1.0,0.0,1.0
1,(BURGLARY),(ASSAULT 3 & RELATED OFFENSES),0.934426,1.0,0.934426,1.0,1.0,0.0,inf
2,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL MISCHIEF & RELATED OFFENSES),1.0,0.969945,0.969945,0.969945,1.0,0.0,1.0
3,(CRIMINAL MISCHIEF & RELATED OFFENSES),(ASSAULT 3 & RELATED OFFENSES),0.969945,1.0,0.969945,1.0,1.0,0.0,inf
4,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL TRESPASS),1.0,0.655738,0.655738,0.655738,1.0,0.0,1.0


### Same process is repeated for each Borough 

---



Finding association of offenses likely to occur together in **Bronx** on a given day.

In [None]:
basket_brx = (df[df['ARREST_BORO'] =="Bronx"]
          .groupby(['ARREST_DATE', 'OFNS_DESC'])['ARREST_KEY'].count().unstack().reset_index().fillna(0).set_index('ARREST_DATE'))

In [None]:
basket_sets_brx = basket_brx.applymap(encode_units)

In [None]:
frequent_itemsets_brx = apriori(basket_sets_brx, min_support=0.4, use_colnames=True)

In [None]:
rules_brx = association_rules(frequent_itemsets_brx, metric="lift", min_threshold=1)
rules_brx.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BURGLARY),(ASSAULT 3 & RELATED OFFENSES),0.745902,1.0,0.745902,1.0,1.0,0.0,inf
1,(ASSAULT 3 & RELATED OFFENSES),(BURGLARY),1.0,0.745902,0.745902,0.745902,1.0,0.0,1.0
2,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL MISCHIEF & RELATED OFFENSES),1.0,0.989071,0.989071,0.989071,1.0,0.0,1.0
3,(CRIMINAL MISCHIEF & RELATED OFFENSES),(ASSAULT 3 & RELATED OFFENSES),0.989071,1.0,0.989071,1.0,1.0,0.0,inf
4,(DANGEROUS DRUGS),(ASSAULT 3 & RELATED OFFENSES),0.931694,1.0,0.931694,1.0,1.0,0.0,inf


Finding association of offenses likely to occur together in **Queens** on a given day.

In [None]:
basket_qns = (df[df['ARREST_BORO'] =="Queens"]
          .groupby(['ARREST_DATE', 'OFNS_DESC'])['ARREST_KEY'].count().unstack().reset_index().fillna(0).set_index('ARREST_DATE'))

In [None]:
basket_sets_qns = basket_qns.applymap(encode_units)
frequent_itemsets_qns = apriori(basket_sets_qns, min_support=0.4, use_colnames=True)

In [None]:
rules_qns = association_rules(frequent_itemsets_qns, metric="lift", min_threshold=1)
rules_qns.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(BURGLARY),(ASSAULT 3 & RELATED OFFENSES),0.781421,1.0,0.781421,1.0,1.0,0.0,inf
1,(ASSAULT 3 & RELATED OFFENSES),(BURGLARY),1.0,0.781421,0.781421,0.781421,1.0,0.0,1.0
2,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL MISCHIEF & RELATED OFFENSES),1.0,0.978142,0.978142,0.978142,1.0,0.0,1.0
3,(CRIMINAL MISCHIEF & RELATED OFFENSES),(ASSAULT 3 & RELATED OFFENSES),0.978142,1.0,0.978142,1.0,1.0,0.0,inf
4,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL TRESPASS),1.0,0.418033,0.418033,0.418033,1.0,0.0,1.0


Finding association of offenses likely to occur together in **Brooklyn** on a given day.

In [None]:
basket_brk = (df[df['ARREST_BORO'] =="Brooklyn"]
          .groupby(['ARREST_DATE', 'OFNS_DESC'])['ARREST_KEY'].count().unstack().reset_index().fillna(0).set_index('ARREST_DATE'))

In [None]:
basket_sets_brk = basket_brk.applymap(encode_units)
frequent_itemsets_brk = apriori(basket_sets_brk, min_support=0.5, use_colnames=True)

In [None]:
rules_brk = association_rules(frequent_itemsets_brk, metric="lift", min_threshold=1)
rules_brk.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ASSAULT 3 & RELATED OFFENSES),(BURGLARY),1.0,0.907104,0.907104,0.907104,1.0,0.0,1.0
1,(BURGLARY),(ASSAULT 3 & RELATED OFFENSES),0.907104,1.0,0.907104,1.0,1.0,0.0,inf
2,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL MISCHIEF & RELATED OFFENSES),1.0,0.994536,0.994536,0.994536,1.0,0.0,1.0
3,(CRIMINAL MISCHIEF & RELATED OFFENSES),(ASSAULT 3 & RELATED OFFENSES),0.994536,1.0,0.994536,1.0,1.0,0.0,inf
4,(CRIMINAL TRESPASS),(ASSAULT 3 & RELATED OFFENSES),0.546448,1.0,0.546448,1.0,1.0,0.0,inf


Finding association of offenses likely to occur together in **Staten Island** on a given day.

In [None]:
basket_si = (df[df['ARREST_BORO'] =="Staten Island"]
          .groupby(['ARREST_DATE', 'OFNS_DESC'])['ARREST_KEY'].count().unstack().reset_index().fillna(0).set_index('ARREST_DATE'))
basket_sets_si = basket_si.applymap(encode_units)
frequent_itemsets_si = apriori(basket_sets_si, min_support=0.5, use_colnames=True)

In [None]:
rules_si = association_rules(frequent_itemsets_si, metric="lift", min_threshold=1)
rules_si.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ASSAULT 3 & RELATED OFFENSES),(CRIMINAL MISCHIEF & RELATED OFFENSES),0.870879,0.651099,0.57967,0.665615,1.022295,0.012642,1.043412
1,(CRIMINAL MISCHIEF & RELATED OFFENSES),(ASSAULT 3 & RELATED OFFENSES),0.651099,0.870879,0.57967,0.890295,1.022295,0.012642,1.176986
2,(ASSAULT 3 & RELATED OFFENSES),(DANGEROUS DRUGS),0.870879,0.57967,0.521978,0.599369,1.033983,0.017155,1.049169
3,(DANGEROUS DRUGS),(ASSAULT 3 & RELATED OFFENSES),0.57967,0.870879,0.521978,0.900474,1.033983,0.017155,1.297357
4,(ASSAULT 3 & RELATED OFFENSES),(FELONY ASSAULT),0.870879,0.708791,0.620879,0.712934,1.005845,0.003608,1.014431
