In [9]:
#Here we want to understand which features are strongly associated to survived = yes. eg. if 1st class, Male->Yes OR female->yes
#Install 'mlxtend' Library if not installed already
!pip install mlxtend

In [1]:
import mlxtend

In [2]:
import pandas as pd 
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

In [10]:
df = pd.read_csv('Titanic (1).csv')
df

Unnamed: 0,Class,Gender,Age,Survived
0,3rd,Male,Child,No
1,3rd,Male,Child,No
2,3rd,Male,Child,No
3,3rd,Male,Child,No
4,3rd,Male,Child,No
...,...,...,...,...
2196,Crew,Female,Adult,Yes
2197,Crew,Female,Adult,Yes
2198,Crew,Female,Adult,Yes
2199,Crew,Female,Adult,Yes


In [11]:
df.shape

(2201, 4)

In [12]:
df['Class'].value_counts()

Class
Crew    885
3rd     706
1st     325
2nd     285
Name: count, dtype: int64

In [13]:
df['Gender'].value_counts()

Gender
Male      1731
Female     470
Name: count, dtype: int64

In [14]:
df['Age'].value_counts()

Age
Adult    2092
Child     109
Name: count, dtype: int64

In [15]:
df['Survived'].value_counts()

Survived
No     1490
Yes     711
Name: count, dtype: int64

## Pre-Processing

As the data is not in transaction formation

In [16]:
df = pd.get_dummies(df)
df

Unnamed: 0,Class_1st,Class_2nd,Class_3rd,Class_Crew,Gender_Female,Gender_Male,Age_Adult,Age_Child,Survived_No,Survived_Yes
0,False,False,True,False,False,True,False,True,True,False
1,False,False,True,False,False,True,False,True,True,False
2,False,False,True,False,False,True,False,True,True,False
3,False,False,True,False,False,True,False,True,True,False
4,False,False,True,False,False,True,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...
2196,False,False,False,True,True,False,True,False,False,True
2197,False,False,False,True,True,False,True,False,False,True
2198,False,False,False,True,True,False,True,False,False,True
2199,False,False,False,True,True,False,True,False,False,True


In [20]:
df.shape

(2201, 10)

## Apriori Algorithm

In [23]:
frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)#df-dataframe, min_support default=0.5
# min_support is a float between 0 and 1 for minumum support of the itemsets returned.
# The support is computed as the fraction transactions_where_item(s)_occur / total_transactions.
frequent_itemsets # output is frequent itemsets: 1-item itemsets, 2-item itemsets etc. with min support criteria

Unnamed: 0,support,itemsets
0,0.14766,(Class_1st)
1,0.129487,(Class_2nd)
2,0.320763,(Class_3rd)
3,0.40209,(Class_Crew)
4,0.213539,(Gender_Female)
5,0.786461,(Gender_Male)
6,0.950477,(Age_Adult)
7,0.676965,(Survived_No)
8,0.323035,(Survived_Yes)
9,0.144934,"(Age_Adult, Class_1st)"


In [24]:
rules = association_rules(frequent_itemsets, metric = 'lift', min_threshold = 0.7) #min_threshold is confidence 
rules 
rules.sort_values('lift',ascending = False)[0:20]        # here we considering lift ratio

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
65,"(Age_Adult, Gender_Female)",(Survived_Yes),0.193094,0.323035,0.143571,0.743529,2.301699,1.0,0.081195,2.639542,0.700873,0.385366,0.621146,0.593987
68,(Survived_Yes),"(Age_Adult, Gender_Female)",0.323035,0.193094,0.143571,0.444444,2.301699,1.0,0.081195,1.452431,0.835403,0.385366,0.311499,0.593987
19,(Gender_Female),(Survived_Yes),0.213539,0.323035,0.156293,0.731915,2.265745,1.0,0.087312,2.525187,0.710327,0.410992,0.60399,0.60787
18,(Survived_Yes),(Gender_Female),0.323035,0.213539,0.156293,0.483826,2.265745,1.0,0.087312,1.523634,0.825219,0.410992,0.343674,0.60787
69,(Gender_Female),"(Age_Adult, Survived_Yes)",0.213539,0.297138,0.143571,0.67234,2.262724,1.0,0.080121,2.145099,0.709577,0.391089,0.533821,0.57776
64,"(Age_Adult, Survived_Yes)",(Gender_Female),0.297138,0.213539,0.143571,0.48318,2.262724,1.0,0.080121,1.521732,0.793974,0.391089,0.342854,0.57776
100,"(Age_Adult, Gender_Male)","(Survived_No, Class_Crew)",0.757383,0.30577,0.304407,0.40192,1.31445,1.0,0.072822,1.160764,0.986022,0.401198,0.138498,0.698731
97,"(Survived_No, Class_Crew)","(Age_Adult, Gender_Male)",0.30577,0.757383,0.304407,0.995542,1.31445,1.0,0.072822,54.427079,0.344592,0.401198,0.981627,0.698731
48,"(Age_Adult, Gender_Male)",(Class_Crew),0.757383,0.40209,0.39164,0.517097,1.286022,1.0,0.087104,1.238157,0.916706,0.510059,0.192348,0.745554
49,(Class_Crew),"(Age_Adult, Gender_Male)",0.40209,0.757383,0.39164,0.974011,1.286022,1.0,0.087104,9.33548,0.371976,0.510059,0.892882,0.745554


In [26]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Age_Adult),(Class_1st),0.950477,0.147660,0.144934,0.152486,1.032680,1.0,0.004587,1.005694,0.639010,0.152050,0.005661,0.567012
1,(Class_1st),(Age_Adult),0.147660,0.950477,0.144934,0.981538,1.032680,1.0,0.004587,2.682493,0.037128,0.152050,0.627212,0.567012
8,(Survived_No),(Class_3rd),0.676965,0.320763,0.239891,0.354362,1.104747,1.0,0.022745,1.052040,0.293515,0.316547,0.049466,0.551119
9,(Class_3rd),(Survived_No),0.320763,0.676965,0.239891,0.747875,1.104747,1.0,0.022745,1.281251,0.139592,0.316547,0.219513,0.551119
10,(Class_Crew),(Gender_Male),0.402090,0.786461,0.391640,0.974011,1.238474,1.0,0.075412,8.216621,0.322047,0.491448,0.878295,0.735995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,"(Class_Crew, Gender_Male)","(Survived_No, Age_Adult)",0.391640,0.653339,0.304407,0.777262,1.189676,1.0,0.048533,1.556362,0.262074,0.411043,0.357476,0.621594
102,(Survived_No),"(Class_Crew, Age_Adult, Gender_Male)",0.676965,0.391640,0.304407,0.449664,1.148157,1.0,0.039280,1.105434,0.399458,0.398335,0.095378,0.613463
103,(Age_Adult),"(Survived_No, Class_Crew, Gender_Male)",0.950477,0.304407,0.304407,0.320268,1.052103,1.0,0.015075,1.023334,1.000000,0.320268,0.022802,0.660134
104,(Class_Crew),"(Survived_No, Age_Adult, Gender_Male)",0.402090,0.603816,0.304407,0.757062,1.253795,1.0,0.061619,1.630802,0.338549,0.433938,0.386805,0.630600
