# Tutorial 5: Association Rules Mining


## Dataset definition and preprocessing

### As the apriori algorithm does not accept numerical values, the income is discretized

In [103]:
# bank dataset preprocessing
import pandas as pd
df = pd.read_csv("./bank-data.csv")
del df["id"]
df["income"] = pd.cut(df["income"],10)
dataset = []
for index, row in df.iterrows():
    row = [col+"="+str(row[col]) for col in list(df)]
    dataset.append(row)

### Apriori algorithm find the frequent items in a dataset so that they are used in association rules

In [104]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)

frequent_itemsets

#frequentItemsets, supports = apriori(dataset, 0.3)

#for f in frequentItemsets:
#    print("{} - {}".format(f,supports[f]))

Unnamed: 0,support,itemsets
0,0.506667,(car=NO)
1,0.493333,(car=YES)
2,0.438333,(children=0)
3,0.758333,(current_act=YES)
4,0.34,(married=NO)
5,0.66,(married=YES)
6,0.651667,(mortgage=NO)
7,0.348333,(mortgage=YES)
8,0.543333,(pep=NO)
9,0.456667,(pep=YES)


### Finally, the association rules are generated from this list

#### Sorted by confidence > 0,7

In [110]:
from mlxtend.frequent_patterns import association_rules

rules_confidence = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
rules_confidence

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(car=NO),(current_act=YES),0.506667,0.758333,0.391667,0.773026,1.019375,0.007444,1.064734
1,(car=YES),(current_act=YES),0.493333,0.758333,0.366667,0.743243,0.980101,-0.007444,0.941228
2,(car=YES),(save_act=YES),0.493333,0.69,0.348333,0.706081,1.023306,0.007933,1.054713
3,(children=0),(current_act=YES),0.438333,0.758333,0.331667,0.756654,0.997785,-0.000736,0.993099
4,(married=YES),(current_act=YES),0.66,0.758333,0.488333,0.739899,0.975691,-0.012167,0.929126
5,(mortgage=NO),(current_act=YES),0.651667,0.758333,0.501667,0.769821,1.015149,0.007486,1.049907
6,(pep=NO),(current_act=YES),0.543333,0.758333,0.406667,0.748466,0.986988,-0.005361,0.960772
7,(pep=YES),(current_act=YES),0.456667,0.758333,0.351667,0.770073,1.015481,0.005361,1.051058
8,(region=INNER_CITY),(current_act=YES),0.448333,0.758333,0.341667,0.762082,1.004943,0.001681,1.015755
9,(current_act=YES),(save_act=YES),0.758333,0.69,0.531667,0.701099,1.016085,0.008417,1.037132


#### Sorted by support > 0,4

In [120]:
rules_support = association_rules(frequent_itemsets, metric="support", min_threshold=0.4)
rules_support

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(current_act=YES),(married=YES),0.758333,0.66,0.488333,0.643956,0.975691,-0.012167,0.954938
1,(married=YES),(current_act=YES),0.66,0.758333,0.488333,0.739899,0.975691,-0.012167,0.929126
2,(current_act=YES),(mortgage=NO),0.758333,0.651667,0.501667,0.661538,1.015149,0.007486,1.029167
3,(mortgage=NO),(current_act=YES),0.651667,0.758333,0.501667,0.769821,1.015149,0.007486,1.049907
4,(current_act=YES),(pep=NO),0.758333,0.543333,0.406667,0.536264,0.986988,-0.005361,0.984755
5,(pep=NO),(current_act=YES),0.543333,0.758333,0.406667,0.748466,0.986988,-0.005361,0.960772
6,(current_act=YES),(save_act=YES),0.758333,0.69,0.531667,0.701099,1.016085,0.008417,1.037132
7,(save_act=YES),(current_act=YES),0.69,0.758333,0.531667,0.770531,1.016085,0.008417,1.053158
8,(married=YES),(mortgage=NO),0.66,0.651667,0.435,0.659091,1.011393,0.0049,1.021778
9,(mortgage=NO),(married=YES),0.651667,0.66,0.435,0.667519,1.011393,0.0049,1.022615


#### Sorted by lift > 1

In [125]:
rules_lift = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules_lift

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(car=NO),(current_act=YES),0.506667,0.758333,0.391667,0.773026,1.019375,0.007444,1.064734
1,(current_act=YES),(car=NO),0.758333,0.506667,0.391667,0.516484,1.019375,0.007444,1.020303
2,(car=NO),(married=YES),0.506667,0.66,0.336667,0.664474,1.006778,0.002267,1.013333
3,(married=YES),(car=NO),0.66,0.506667,0.336667,0.510101,1.006778,0.002267,1.00701
4,(car=YES),(mortgage=NO),0.493333,0.651667,0.323333,0.655405,1.005737,0.001844,1.01085
5,(mortgage=NO),(car=YES),0.651667,0.493333,0.323333,0.496164,1.005737,0.001844,1.005618
6,(car=YES),(save_act=YES),0.493333,0.69,0.348333,0.706081,1.023306,0.007933,1.054713
7,(save_act=YES),(car=YES),0.69,0.493333,0.348333,0.504831,1.023306,0.007933,1.02322
8,(married=YES),(children=0),0.66,0.438333,0.3,0.454545,1.036986,0.0107,1.029722
9,(children=0),(married=YES),0.438333,0.66,0.3,0.684411,1.036986,0.0107,1.077349
