In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# Muat data dan atur kolom indeks
items = pd.read_csv('items.csv', index_col='inisial')
transactions = pd.read_csv('transactions.csv', index_col='id_transaksi')

In [4]:
items

Unnamed: 0_level_0,jenis_obat
inisial,Unnamed: 1_level_1
A,Antibiotik
B,Ekspektoran
C,Vitamin&Mineral
D,Hemostatik
E,Psikofarmaka


In [5]:
transactions

Unnamed: 0_level_0,item_yang_dibeli
id_transaksi,Unnamed: 1_level_1
1,AB
2,ABCDE
3,BCD
4,ABCD
5,ABC
6,ABE
7,D
8,ACD
9,ABC
10,BCE


In [6]:
# Membuat dataframe baru agar kompatibel dengan proses perhitungan apriori

rows, column = transactions.shape

transaction_dict = {
    "id_transaksi": [],
    "A": [],
    "B": [],
    "C": [],
    "D": [],
    "E": [],
}

def place_item(item, i):
    if transactions.loc[i].str.contains(item).iloc[0]:
        transaction_dict[item].append(1)
    else:
        transaction_dict[item].append(0)

for i in range(1, rows + 1):
    transaction_dict["id_transaksi"].append(i)
    for item in items.index.to_list():
        place_item(item, i)

basket = pd.DataFrame(transaction_dict)
basket.set_index('id_transaksi', inplace=True)
basket


Unnamed: 0_level_0,A,B,C,D,E
id_transaksi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,1,0,0,0
2,1,1,1,1,1
3,0,1,1,1,0
4,1,1,1,1,0
5,1,1,1,0,0
6,1,1,0,0,1
7,0,0,0,1,0
8,1,0,1,1,0
9,1,1,1,0,0
10,0,1,1,0,1


In [7]:
# Menentukan itemset dengan minimal support sebesar 0.2
frequent_itemsets = apriori(basket, min_support=0.2, use_colnames=True)
print(frequent_itemsets)

     support   itemsets
0   0.666667        (A)
1   0.750000        (B)
2   0.666667        (C)
3   0.583333        (D)
4   0.250000        (E)
5   0.583333     (B, A)
6   0.416667     (C, A)
7   0.333333     (D, A)
8   0.500000     (B, C)
9   0.333333     (B, D)
10  0.250000     (B, E)
11  0.416667     (C, D)
12  0.333333  (B, A, C)
13  0.250000  (B, A, D)
14  0.250000  (C, A, D)
15  0.250000  (B, D, C)


In [8]:
# Menentukan rules dengan minimal confidence sebesar 0.6
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)
rules[['antecedents', 'consequents', 'confidence']]

Unnamed: 0,antecedents,consequents,confidence
0,(B),(A),0.777778
1,(A),(B),0.875
2,(C),(A),0.625
3,(A),(C),0.625
4,(B),(C),0.666667
5,(C),(B),0.75
6,(E),(B),1.0
7,(C),(D),0.625
8,(D),(C),0.714286
9,"(B, C)",(A),0.666667
