In [1]:
import pandas as pd
import warnings
import csv
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
warnings.filterwarnings("ignore") 

# 1. Data Preprocesing

In [2]:
data = [['Milk','Onion','Nutmeg','Kidney Beans','Eggs','Yougurt'],
      ['Dill','Onion','Nutmeg','Kidney Beans','Eggs','Yougurt'],
      ['Milk','Apple','Kidney Beans','Eggs'],
      ['Milk','Unicorn','Corn','Kidney Beans','Yougurt'],
      ['Corn','Onion','Kidney Beans','Ice Cream','Eggs']]

data

[['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yougurt'],
 ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yougurt'],
 ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
 ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yougurt'],
 ['Corn', 'Onion', 'Kidney Beans', 'Ice Cream', 'Eggs']]

In [3]:
# Data perlu diubah ke dalam bentuk dataframe
# kolom menunjukkan list unique item, sedangkan baris menunjukkan ada/tidaknya item tersebut

trf = TransactionEncoder()
data_trf = trf.fit(data).transform(data)

df = pd.DataFrame(data_trf,columns=trf.columns_)
df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice Cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yougurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


# 2. Modeling

## 2.1 Menghitung nilai support

In [4]:
# Nilai Support Kidney Beans = 1 artinya item tersebut selalu muncul di setiap transaksi
# Egg hampir selalu muncul di setiap transaksi
frequent_itemsets = apriori(df, min_support=0.047, use_colnames=True)
frequent_itemsets.sort_values('support',ascending=False,inplace=True)
frequent_itemsets

Unnamed: 0,support,itemsets
5,1.0,(Kidney Beans)
3,0.8,(Eggs)
27,0.8,"(Eggs, Kidney Beans)"
30,0.6,"(Onion, Eggs)"
8,0.6,(Onion)
...,...,...
56,0.2,"(Onion, Corn, Kidney Beans)"
55,0.2,"(Milk, Corn, Kidney Beans)"
54,0.2,"(Onion, Corn, Ice Cream)"
53,0.2,"(Kidney Beans, Corn, Ice Cream)"


## 2.2 Pembentukan aturan
- Jika rules yang terbentu adalah A -> B, maka A disebut antecedents, sedangkan B disebut consequents
- Tidak hanya itemlist, setiap rules juga akan memiliki nilai support dan confidence
- lift dapat menunjukkan kekuatan aturan yang dibuat. semakin besar nilai lift, semakin bagus aturan tersebut
- leverage hampir sama dengan lift, namun yang dihitung adalah perbedaannya.
- nilai conviction yang tinggi berarti consequents sangat tergantung pada antecedents.

In [5]:
# Interpretasi Rules
# 1.Orang yang membeli (Kidney Beans) akan membeli (Eggs)
# 2.Orang yang membeli (Eggs) akan membeli (Kidney Beans)
rules1 = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules1.sort_values('lift',ascending=False,inplace=True)
rules1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1181,(Ice Cream),"(Onion, Corn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0
701,"(Milk, Corn, Kidney Beans)","(Unicorn, Yougurt)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0
563,(Unicorn),"(Milk, Yougurt, Corn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0
1048,"(Corn, Eggs)",(Ice Cream),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0
266,"(Ice Cream, Kidney Beans)","(Onion, Corn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0
...,...,...,...,...,...,...,...,...,...,...
382,"(Milk, Nutmeg, Yougurt, Eggs)",(Kidney Beans),0.2,1.0,0.2,1.0,1.0,0.00,inf,0.0
131,(Kidney Beans),"(Nutmeg, Yougurt)",1.0,0.4,0.4,0.4,1.0,0.00,1.0,0.0
1084,"(Eggs, Ice Cream)",(Kidney Beans),0.2,1.0,0.2,1.0,1.0,0.00,inf,0.0
1085,(Kidney Beans),"(Eggs, Ice Cream)",1.0,0.2,0.2,0.2,1.0,0.00,1.0,0.0


In [6]:
# Misal kita ingin membuat aturan lainnya yaitu
# 1. minimum antecedentnya adalah tiga
# 2. confidence > 0.8
# 3. lift >  2.5

rules1['total_antecedents'] = rules1['antecedents'].apply(lambda x: len(x))
rules2 = rules1[(rules1.total_antecedents >= 3) & 
                (rules1.confidence > 0.8) & 
                (rules1.lift > 2.5)]

rules2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,total_antecedents
701,"(Milk, Corn, Kidney Beans)","(Unicorn, Yougurt)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
707,"(Milk, Yougurt, Corn)","(Unicorn, Kidney Beans)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
706,"(Unicorn, Yougurt, Kidney Beans)","(Milk, Corn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
704,"(Yougurt, Corn, Kidney Beans)","(Milk, Unicorn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
260,"(Onion, Corn, Kidney Beans)",(Ice Cream),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
205,"(Yougurt, Corn, Kidney Beans)",(Unicorn),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
218,"(Milk, Corn, Kidney Beans)",(Unicorn),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
554,"(Milk, Yougurt, Corn)",(Unicorn),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
703,"(Milk, Unicorn, Kidney Beans)","(Yougurt, Corn)",0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,3
666,"(Corn, Onion, Eggs, Kidney Beans)",(Ice Cream),0.2,0.2,0.2,1.0,5.0,0.16,inf,1.0,4


In [8]:
rules1[(rules1.support >= 0.80) & (rules1.confidence >= 0.80)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,total_antecedents
1,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0,0.0,1
0,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf,0.0,1
