## Percobaan dengan 15 Transaksi

In [7]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [8]:
data = pd.read_excel("../datasets/percobaan.xlsx")
data.head(100)

Unnamed: 0,Transaction,Item
0,1,Medialuna
1,1,Pastry
2,1,Coffe
3,1,Tea
4,2,Hot Chocolate
5,2,Jam
6,2,Cookies
7,3,Coffe
8,3,Pastry
9,3,Bread


## Grouping berdasarkan Transaksi dan Item

In [9]:
item_count = data.groupby(["Transaction", "Item"])["Item"].count().reset_index(name="Count")

In [10]:
item_count.head(10)

Unnamed: 0,Transaction,Item,Count
0,1,Coffe,1
1,1,Medialuna,1
2,1,Pastry,1
3,1,Tea,1
4,2,Cookies,1
5,2,Hot Chocolate,1
6,2,Jam,1
7,3,Bread,1
8,3,Coffe,1
9,3,Pastry,1


## Membuat Pivot Table

In [11]:
item_count_pivot = item_count.pivot_table(index='Transaction' , columns='Item', values='Count', aggfunc='sum').fillna(0)
item_count_pivot.head(100)

Item,Basket,Bread,Coffe,Cookies,Ellen,Hot Chocolate,Jam,Juice,Medialuna,Muffin,Pastry,Tartine,Tea
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
2,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
5,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
6,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0
7,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
9,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [18]:
item_count_pivot = item_count_pivot
item_count_pivot.head()

Item,Basket,Bread,Coffe,Cookies,Ellen,Hot Chocolate,Jam,Juice,Medialuna,Muffin,Pastry,Tartine,Tea
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0,0,1,0,0,0,0,0,1,0,1,0,1
2,0,0,0,1,0,1,1,0,0,0,0,0,0
3,0,1,1,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,1,1,1,0,0
5,0,1,1,0,0,0,0,0,1,0,0,0,0


## Mengubah data kuantitas menjadi 1 / 0

In [19]:
def encode(x):
    if x <= 0:
        return 0
    elif x >= 1:
        return 1

item_count_pivot = item_count_pivot.applymap(encode)
item_count_pivot.head()

Item,Basket,Bread,Coffe,Cookies,Ellen,Hot Chocolate,Jam,Juice,Medialuna,Muffin,Pastry,Tartine,Tea
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0,0,1,0,0,0,0,0,1,0,1,0,1
2,0,0,0,1,0,1,1,0,0,0,0,0,0
3,0,1,1,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,1,1,1,0,0
5,0,1,1,0,0,0,0,0,1,0,0,0,0


## Mencari Frequent Itemsets

In [20]:
support = 0.3
frequent_items = apriori(item_count_pivot.astype('bool'), min_support= support, use_colnames=True)
frequent_items.sort_values("support", ascending=False).head(100)

Unnamed: 0,support,itemsets
1,0.733333,(Coffe)
0,0.466667,(Bread)
2,0.466667,(Medialuna)
3,0.333333,(Pastry)
4,0.333333,"(Coffe, Bread)"
5,0.333333,"(Medialuna, Coffe)"


## Mencari Association Rule

In [15]:
metric = "confidence"
min_threshold = 0.6

rules = association_rules(frequent_items, metric=metric, min_threshold=min_threshold)[["antecedents","consequents","support","confidence","lift"]]
rules.sort_values('confidence', ascending=False, inplace=True)
rules.head(100)

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Bread),(Coffe),0.333333,0.714286,0.974026
1,(Medialuna),(Coffe),0.333333,0.714286,0.974026


In [16]:
from mlxtend.frequent_patterns import fpgrowth

In [17]:
support = 0.3
frequent_items = fpgrowth(item_count_pivot, min_support= support, use_colnames=True)
metric = "confidence"
min_threshold = 0.6

rules = association_rules(frequent_items, metric=metric, min_threshold=min_threshold)[["antecedents","consequents","support","confidence","lift"]]
rules.sort_values('confidence', ascending=False, inplace=True)
rules.head(100)




Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Medialuna),(Coffe),0.333333,0.714286,0.974026
1,(Bread),(Coffe),0.333333,0.714286,0.974026
