In [3]:
!pip install conda



In [5]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.18.0-py2.py3-none-any.whl (1.3 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.18.0


In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

In [2]:
tit = pd.read_csv("G:/data sceince/Python/Association Rules/Titanic.csv")

In [3]:
tit.head()

Unnamed: 0,Class,Gender,Age,Survived
0,3rd,Male,Child,No
1,3rd,Male,Child,No
2,3rd,Male,Child,No
3,3rd,Male,Child,No
4,3rd,Male,Child,No


In [4]:
tit.shape

(2201, 4)

In [None]:
# Creating dummy variables as our data is categorical in nature

In [9]:
titanic = pd.get_dummies(tit)

In [10]:
titanic.head()

Unnamed: 0,Class_1st,Class_2nd,Class_3rd,Class_Crew,Gender_Female,Gender_Male,Age_Adult,Age_Child,Survived_No,Survived_Yes
0,0,0,1,0,0,1,0,1,1,0
1,0,0,1,0,0,1,0,1,1,0
2,0,0,1,0,0,1,0,1,1,0
3,0,0,1,0,0,1,0,1,1,0
4,0,0,1,0,0,1,0,1,1,0


In [None]:
# Now in case of this data set so many combinations are possible so, very large number of rules will be created and it is not desired.
# In order to get less number of rules we will calculate frequent sets using Apriori Algorithm. 

In [31]:
frequent_sets = apriori(titanic, min_support=0.1, use_colnames=True)
frequent_sets

Unnamed: 0,support,itemsets
0,0.14766,(Class_1st)
1,0.129487,(Class_2nd)
2,0.320763,(Class_3rd)
3,0.40209,(Class_Crew)
4,0.213539,(Gender_Female)
5,0.786461,(Gender_Male)
6,0.950477,(Age_Adult)
7,0.676965,(Survived_No)
8,0.323035,(Survived_Yes)
9,0.144934,"(Class_1st, Age_Adult)"


In [12]:
# Creating rules by using association rules function

In [32]:
rules = association_rules(frequent_sets, metric = 'lift', min_threshold = 0.9)  # Min_threshold is the value of lift ratio we are assuming
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Class_1st),(Age_Adult),0.147660,0.950477,0.144934,0.981538,1.032680,0.004587,2.682493
1,(Age_Adult),(Class_1st),0.950477,0.147660,0.144934,0.152486,1.032680,0.004587,1.005694
2,(Class_2nd),(Age_Adult),0.129487,0.950477,0.118582,0.915789,0.963505,-0.004492,0.588085
3,(Age_Adult),(Class_2nd),0.950477,0.129487,0.118582,0.124761,0.963505,-0.004492,0.994601
4,(Class_3rd),(Gender_Male),0.320763,0.786461,0.231713,0.722380,0.918520,-0.020555,0.769177
...,...,...,...,...,...,...,...,...,...
99,"(Survived_No, Gender_Male)","(Class_Crew, Age_Adult)",0.619718,0.402090,0.304407,0.491202,1.221623,0.055225,1.175143
100,(Age_Adult),"(Class_Crew, Survived_No, Gender_Male)",0.950477,0.304407,0.304407,0.320268,1.052103,0.015075,1.023334
101,(Class_Crew),"(Survived_No, Age_Adult, Gender_Male)",0.402090,0.603816,0.304407,0.757062,1.253795,0.061619,1.630802
102,(Survived_No),"(Class_Crew, Gender_Male, Age_Adult)",0.676965,0.391640,0.304407,0.449664,1.148157,0.039280,1.105434


In [None]:
# Based on the lift ratio calculated we will decide which rule is more important for finding the consequent items. If the lift ratio is greater than 1 that means the rule is very important. 
# Now in order to see which rule has the highest lift ratio we will sort the data in descending order using sort_values function

In [33]:
rules.sort_values('lift',ascending=False)[0:20]       # [0:20] means we are viewing only 1st 21 rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
64,"(Gender_Female, Age_Adult)",(Survived_Yes),0.193094,0.323035,0.143571,0.743529,2.301699,0.081195,2.639542
65,(Survived_Yes),"(Gender_Female, Age_Adult)",0.323035,0.193094,0.143571,0.444444,2.301699,0.081195,1.452431
19,(Gender_Female),(Survived_Yes),0.213539,0.323035,0.156293,0.731915,2.265745,0.087312,2.525187
18,(Survived_Yes),(Gender_Female),0.323035,0.213539,0.156293,0.483826,2.265745,0.087312,1.523634
66,(Gender_Female),"(Survived_Yes, Age_Adult)",0.213539,0.297138,0.143571,0.67234,2.262724,0.080121,2.145099
63,"(Survived_Yes, Age_Adult)",(Gender_Female),0.297138,0.213539,0.143571,0.48318,2.262724,0.080121,1.521732
97,"(Class_Crew, Survived_No)","(Gender_Male, Age_Adult)",0.30577,0.757383,0.304407,0.995542,1.31445,0.072822,54.427079
96,"(Gender_Male, Age_Adult)","(Class_Crew, Survived_No)",0.757383,0.30577,0.304407,0.40192,1.31445,0.072822,1.160764
46,"(Gender_Male, Age_Adult)",(Class_Crew),0.757383,0.40209,0.39164,0.517097,1.286022,0.087104,1.238157
47,(Class_Crew),"(Gender_Male, Age_Adult)",0.40209,0.757383,0.39164,0.974011,1.286022,0.087104,9.33548


In [None]:
# Final step

In [21]:
# We will only extract the rules where the lift ratio is greater than 1

In [34]:
[rules.lift>1]
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Class_1st),(Age_Adult),0.147660,0.950477,0.144934,0.981538,1.032680,0.004587,2.682493
1,(Age_Adult),(Class_1st),0.950477,0.147660,0.144934,0.152486,1.032680,0.004587,1.005694
2,(Class_2nd),(Age_Adult),0.129487,0.950477,0.118582,0.915789,0.963505,-0.004492,0.588085
3,(Age_Adult),(Class_2nd),0.950477,0.129487,0.118582,0.124761,0.963505,-0.004492,0.994601
4,(Class_3rd),(Gender_Male),0.320763,0.786461,0.231713,0.722380,0.918520,-0.020555,0.769177
...,...,...,...,...,...,...,...,...,...
99,"(Survived_No, Gender_Male)","(Class_Crew, Age_Adult)",0.619718,0.402090,0.304407,0.491202,1.221623,0.055225,1.175143
100,(Age_Adult),"(Class_Crew, Survived_No, Gender_Male)",0.950477,0.304407,0.304407,0.320268,1.052103,0.015075,1.023334
101,(Class_Crew),"(Survived_No, Age_Adult, Gender_Male)",0.402090,0.603816,0.304407,0.757062,1.253795,0.061619,1.630802
102,(Survived_No),"(Class_Crew, Gender_Male, Age_Adult)",0.676965,0.391640,0.304407,0.449664,1.148157,0.039280,1.105434
