# Import libraries 

In [4]:
!pip install mlxtend
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder


Collecting mlxtend
  Downloading mlxtend-0.21.0-py2.py3-none-any.whl (1.3 MB)
     ---------------------------------------- 1.3/1.3 MB 1.4 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.21.0


In [5]:
titanic = pd.read_csv("Titanic.csv")
titanic.head()

Unnamed: 0,Class,Gender,Age,Survived
0,3rd,Male,Child,No
1,3rd,Male,Child,No
2,3rd,Male,Child,No
3,3rd,Male,Child,No
4,3rd,Male,Child,No


# Pre-Processing
As the data is not in transaction formation 
We are using transaction Encoder

In [6]:
df=pd.get_dummies(titanic)
df.head()

Unnamed: 0,Class_1st,Class_2nd,Class_3rd,Class_Crew,Gender_Female,Gender_Male,Age_Adult,Age_Child,Survived_No,Survived_Yes
0,0,0,1,0,0,1,0,1,1,0
1,0,0,1,0,0,1,0,1,1,0
2,0,0,1,0,0,1,0,1,1,0
3,0,0,1,0,0,1,0,1,1,0
4,0,0,1,0,0,1,0,1,1,0


# Apriori Algorithm 

In [15]:
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.320763,(Class_3rd)
1,0.40209,(Class_Crew)
2,0.213539,(Gender_Female)
3,0.786461,(Gender_Male)
4,0.950477,(Age_Adult)
5,0.676965,(Survived_No)
6,0.323035,(Survived_Yes)
7,0.231713,"(Gender_Male, Class_3rd)"
8,0.284871,"(Age_Adult, Class_3rd)"
9,0.239891,"(Survived_No, Class_3rd)"


In [16]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gender_Male),(Class_3rd),0.786461,0.320763,0.231713,0.294627,0.918520,-0.020555,0.962947
1,(Class_3rd),(Gender_Male),0.320763,0.786461,0.231713,0.722380,0.918520,-0.020555,0.769177
2,(Age_Adult),(Class_3rd),0.950477,0.320763,0.284871,0.299713,0.934375,-0.020008,0.969941
3,(Class_3rd),(Age_Adult),0.320763,0.950477,0.284871,0.888102,0.934375,-0.020008,0.442572
4,(Survived_No),(Class_3rd),0.676965,0.320763,0.239891,0.354362,1.104747,0.022745,1.052040
...,...,...,...,...,...,...,...,...,...
65,"(Age_Adult, Class_Crew)","(Gender_Male, Survived_No)",0.402090,0.619718,0.304407,0.757062,1.221623,0.055225,1.565346
66,(Gender_Male),"(Age_Adult, Survived_No, Class_Crew)",0.786461,0.305770,0.304407,0.387060,1.265851,0.063931,1.132622
67,(Survived_No),"(Gender_Male, Age_Adult, Class_Crew)",0.676965,0.391640,0.304407,0.449664,1.148157,0.039280,1.105434
68,(Class_Crew),"(Gender_Male, Survived_No, Age_Adult)",0.402090,0.603816,0.304407,0.757062,1.253795,0.061619,1.630802


#####  An leverage value of 0 indicates independence. Range will be [-1 1]
A high conviction value means that the consequent is highly depending on the antecedent and range [0 inf]

In [17]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
63,"(Survived_No, Class_Crew)","(Gender_Male, Age_Adult)",0.30577,0.757383,0.304407,0.995542,1.31445,0.072822,54.427079
62,"(Gender_Male, Age_Adult)","(Survived_No, Class_Crew)",0.757383,0.30577,0.304407,0.40192,1.31445,0.072822,1.160764
37,(Class_Crew),"(Gender_Male, Age_Adult)",0.40209,0.757383,0.39164,0.974011,1.286022,0.087104,9.33548
32,"(Gender_Male, Age_Adult)",(Class_Crew),0.757383,0.40209,0.39164,0.517097,1.286022,0.087104,1.238157
41,(Gender_Male),"(Survived_No, Class_Crew)",0.786461,0.30577,0.304407,0.38706,1.265851,0.063931,1.132622
40,"(Survived_No, Class_Crew)",(Gender_Male),0.30577,0.786461,0.304407,0.995542,1.265851,0.063931,47.903983
66,(Gender_Male),"(Age_Adult, Survived_No, Class_Crew)",0.786461,0.30577,0.304407,0.38706,1.265851,0.063931,1.132622
59,"(Age_Adult, Survived_No, Class_Crew)",(Gender_Male),0.30577,0.786461,0.304407,0.995542,1.265851,0.063931,47.903983
68,(Class_Crew),"(Gender_Male, Survived_No, Age_Adult)",0.40209,0.603816,0.304407,0.757062,1.253795,0.061619,1.630802
57,"(Gender_Male, Survived_No, Age_Adult)",(Class_Crew),0.603816,0.40209,0.304407,0.504138,1.253795,0.061619,1.2058


In [18]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
4,(Survived_No),(Class_3rd),0.676965,0.320763,0.239891,0.354362,1.104747,0.022745,1.05204
5,(Class_3rd),(Survived_No),0.320763,0.676965,0.239891,0.747875,1.104747,0.022745,1.281251
6,(Gender_Male),(Class_Crew),0.786461,0.40209,0.39164,0.497978,1.238474,0.075412,1.191004
7,(Class_Crew),(Gender_Male),0.40209,0.786461,0.39164,0.974011,1.238474,0.075412,8.216621
8,(Age_Adult),(Class_Crew),0.950477,0.40209,0.40209,0.42304,1.052103,0.019913,1.036311
9,(Class_Crew),(Age_Adult),0.40209,0.950477,0.40209,1.0,1.052103,0.019913,inf
10,(Survived_No),(Class_Crew),0.676965,0.40209,0.30577,0.451678,1.123325,0.033569,1.090436
11,(Class_Crew),(Survived_No),0.40209,0.676965,0.30577,0.760452,1.123325,0.033569,1.348519
12,(Gender_Male),(Age_Adult),0.786461,0.950477,0.757383,0.963027,1.013204,0.00987,1.339441
13,(Age_Adult),(Gender_Male),0.950477,0.786461,0.757383,0.796845,1.013204,0.00987,1.051116


In [19]:
help(association_rules)

Help on function association_rules in module mlxtend.frequent_patterns.association_rules:

association_rules(df, metric='confidence', min_threshold=0.8, support_only=False)
    Generates a DataFrame of association rules including the
    metrics 'score', 'confidence', and 'lift'
    
    Parameters
    -----------
    df : pandas DataFrame
      pandas DataFrame of frequent itemsets
      with columns ['support', 'itemsets']
    
    metric : string (default: 'confidence')
      Metric to evaluate if a rule is of interest.
      **Automatically set to 'support' if `support_only=True`.**
      Otherwise, supported metrics are 'support', 'confidence', 'lift',
      'leverage', and 'conviction'
      These metrics are computed as follows:
    
      - support(A->C) = support(A+C) [aka 'support'], range: [0, 1]
    
      - confidence(A->C) = support(A+C) / support(A), range: [0, 1]
    
      - lift(A->C) = confidence(A->C) / support(C), range: [0, inf]
    
      - leverage(A->C) = suppo