In [1]:
# Instal library
%pip install mlxtend
import pandas as pd
import pickle
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth




In [2]:
# Load dataset dari file Excel
data = pd.read_excel("C:\\Users\\Republic Of Gamers\\Downloads\\BulanMei2022.xlsx")

In [3]:
# Preprocessing data
# Mengubah atribut yang dibutuhkan (order no, item name, qty) menjadi format transaksi
transactions = data.groupby('order no').apply(lambda x: list(x['item name'])).tolist()

In [4]:
# Encode transaksi menggunakan TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

In [5]:
# Langkah 4: Pemodelan
# Penerapan algoritma FP-Growth untuk mendapatkan frequent itemsets
frequent_itemsets = fpgrowth(df_encoded, min_support=0.01, use_colnames=True)

In [6]:
# Sortir frequent itemsets berdasarkan support
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

In [7]:
# Membuat FP-Tree berdasarkan frequent itemsets
fp_tree = {}
for index, row in frequent_itemsets.iterrows():
    current_node = fp_tree
    for item in row['itemsets']:
        if item not in current_node:
            current_node[item] = {'count': row['support'], 'children': {}}
        else:
            current_node[item]['count'] += row['support']
        current_node = current_node[item]['children']

In [8]:
# Fungsi untuk pembangkitan Conditional Pattern Base
def generate_conditional_pattern_base(tree, prefix):
    conditional_pattern_base = {}

    for item, node in tree.items():
        new_prefix = prefix.copy()
        new_prefix.append(item)
        support = node['count']

        # Tambahkan ke conditional pattern base
        conditional_pattern_base[tuple(new_prefix)] = support

        # Rekursif untuk anak-anak node
        conditional_pattern_base.update(generate_conditional_pattern_base(node['children'], new_prefix))

    return conditional_pattern_base

In [9]:
# Menggunakan FP-Tree untuk pembangkitan Conditional Pattern Base
prefix_example = []
conditional_pattern_base_result = generate_conditional_pattern_base(fp_tree, prefix_example)

In [10]:
# Tampilan hasil
print("4. Pembangkitan Conditional Pattern Base:")
for pattern, support in conditional_pattern_base_result.items():
    print(f"{pattern}: {support}")

4. Pembangkitan Conditional Pattern Base:
('Medan Jaya 2000',): 0.0949845895208742
('Kerupuk Putih Jaring 2.000',): 0.10927430652843934
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): 0.017091622303166153
('Roti Ikan Hiu Kacang',): 0.130568786775007
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): 0.017371812832726253
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): 0.012608573830204538
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): 0.011768002241524236
('Kerupuk Kuning',): 0.13617259736620901
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): 0.022135051835247967
('Kerupuk Kuning', 'Medan Jaya 2000'): 0.014569907537125245
('Kerupuk Kuning', 'Kerupuk Kulit'): 0.010647240123283833
('Pok-Pok Chicken',): 0.09106192210703279
('Pok-Pok Chicken', 'Medan Jaya 2000'): 0.017932193891846457
('Nabati Wafer Coklat 46 gr',): 0.11403754553096104
('Nabati Wafer Coklat 46 gr', 'Nabati Wafer Keju 46 gr'): 0.016531241244045952
('Nabati Wafer Coklat 46 gr', 'Roti Ikan Hiu Kacang'): 0.01569066965536564

In [11]:
# Fungsi untuk pembangkitan Conditional FP-tree
def build_conditional_fp_tree(conditional_pattern_base):
    conditional_fp_tree = {}

    for pattern, support in conditional_pattern_base.items():
        current_node = conditional_fp_tree

        for item in pattern:
            if item not in current_node:
                current_node[item] = {'count': support, 'parent': None, 'children': {}}
            else:
                current_node[item]['count'] += support

            if 'parent' in current_node[item]:
                current_node[item]['parent'] = current_node

            current_node = current_node[item]['children']

    return conditional_fp_tree

In [12]:
# Menggunakan Conditional Pattern Base untuk membangun Conditional FP-tree
conditional_fp_tree_result = build_conditional_fp_tree(conditional_pattern_base_result)

In [13]:
# Tampilan hasil
print("\n5. Pembangkitan Conditional FP-tree:")
print(conditional_fp_tree_result)


5. Pembangkitan Conditional FP-tree:
{'Medan Jaya 2000': {'count': 0.0949845895208742, 'parent': {...}, 'children': {}}, 'Kerupuk Putih Jaring 2.000': {'count': 0.1263659288316055, 'parent': {...}, 'children': {'Medan Jaya 2000': {'count': 0.017091622303166153, 'parent': {...}, 'children': {}}}}, 'Roti Ikan Hiu Kacang': {'count': 0.17231717567946203, 'parent': {...}, 'children': {'Medan Jaya 2000': {'count': 0.017371812832726253, 'parent': {...}, 'children': {}}, 'Kerupuk Putih Jaring 2.000': {'count': 0.012608573830204538, 'parent': {...}, 'children': {}}, 'Pok-Pok Chicken': {'count': 0.011768002241524236, 'parent': {...}, 'children': {}}}}, 'Kerupuk Kuning': {'count': 0.18352479686186607, 'parent': {...}, 'children': {'Kerupuk Putih Jaring 2.000': {'count': 0.022135051835247967, 'parent': {...}, 'children': {}}, 'Medan Jaya 2000': {'count': 0.014569907537125245, 'parent': {...}, 'children': {}}, 'Kerupuk Kulit': {'count': 0.010647240123283833, 'parent': {...}, 'children': {}}}}, 'Po

In [14]:
# Fungsi untuk pembangkitan Frequent Pattern
def generate_frequent_patterns(tree, min_support, prefix, frequent_patterns):
    for item, node in tree.items():
        new_prefix = prefix.copy()
        new_prefix.append(item)
        support = node['count']

        # Tambahkan ke frequent patterns jika support mencukupi
        if support >= min_support:
            frequent_patterns[tuple(new_prefix)] = support

        # Rekursif untuk anak-anak node
        generate_frequent_patterns(node['children'], min_support, new_prefix, frequent_patterns)

In [15]:
# Menggunakan Conditional FP-tree untuk pembangkitan Frequent Patterns
min_support_threshold = 0.01
frequent_patterns_result = {}
generate_frequent_patterns(conditional_fp_tree_result, min_support_threshold, [], frequent_patterns_result)

In [16]:
# Tampilan hasil
print("\n6. Pembangkitan Frequent Patterns:")
for pattern, support in frequent_patterns_result.items():
    print(f"{pattern}: {support}")


6. Pembangkitan Frequent Patterns:
('Medan Jaya 2000',): 0.0949845895208742
('Kerupuk Putih Jaring 2.000',): 0.1263659288316055
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): 0.017091622303166153
('Roti Ikan Hiu Kacang',): 0.17231717567946203
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): 0.017371812832726253
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): 0.012608573830204538
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): 0.011768002241524236
('Kerupuk Kuning',): 0.18352479686186607
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): 0.022135051835247967
('Kerupuk Kuning', 'Medan Jaya 2000'): 0.014569907537125245
('Kerupuk Kuning', 'Kerupuk Kulit'): 0.010647240123283833
('Pok-Pok Chicken',): 0.10899411599887925
('Pok-Pok Chicken', 'Medan Jaya 2000'): 0.017932193891846457
('Nabati Wafer Coklat 46 gr',): 0.16195012608573828
('Nabati Wafer Coklat 46 gr', 'Nabati Wafer Keju 46 gr'): 0.016531241244045952
('Nabati Wafer Coklat 46 gr', 'Roti Ikan Hiu Kacang'): 0.015690669655365648
('N

In [17]:
# Fungsi untuk menghasilkan Frequent 2-itemsets
def generate_frequent_2_itemsets(tree, min_support, frequent_2_itemsets):
    # Iterasi melalui setiap item pada Conditional FP-tree
    for item, node in tree.items():
        support_item = node['count']
        # Cek apakah item tersebut memenuhi syarat support
        if support_item >= min_support:
            # Jika memenuhi, iterasi lagi untuk mencari item yang memiliki support cukup
            for child_item, child_node in node['children'].items():
                support_2_itemset = child_node['count']
                # Cek apakah pasangan item memenuhi syarat support
                if support_2_itemset >= min_support:
                    # Tambahkan ke frequent 2-itemsets
                    frequent_2_itemsets[(item, child_item)] = support_2_itemset

In [18]:
# Menggunakan fungsi untuk menghasilkan Frequent 2-itemsets
min_support_2_itemsets = 0.01
frequent_2_itemsets_result = {}
generate_frequent_2_itemsets(conditional_fp_tree_result, min_support_2_itemsets, frequent_2_itemsets_result)

In [19]:
# Tampilan hasil Frequent 2-itemsets
print("\n7. Pembangkitan Frequent 2-itemsets:")
for itemset, support in frequent_2_itemsets_result.items():
    print(f"{itemset}: {support}")


7. Pembangkitan Frequent 2-itemsets:
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): 0.017091622303166153
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): 0.017371812832726253
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): 0.012608573830204538
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): 0.011768002241524236
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): 0.022135051835247967
('Kerupuk Kuning', 'Medan Jaya 2000'): 0.014569907537125245
('Kerupuk Kuning', 'Kerupuk Kulit'): 0.010647240123283833
('Pok-Pok Chicken', 'Medan Jaya 2000'): 0.017932193891846457
('Nabati Wafer Coklat 46 gr', 'Nabati Wafer Keju 46 gr'): 0.016531241244045952
('Nabati Wafer Coklat 46 gr', 'Roti Ikan Hiu Kacang'): 0.015690669655365648
('Nabati Wafer Coklat 46 gr', 'Medan Jaya 2000'): 0.015690669655365648
('HoHo Rasa Balado', 'Medan Jaya 2000'): 0.01288876435976464
('HoHo Rasa Balado', 'Pok-Pok Chicken'): 0.011207621182404036
('Nabati Wafer Keju 46 gr', 'Medan Jaya 2000'): 0.01316895488932474
('Tricks Kimci

In [20]:
# Mencari Support 2 Itemset
support_2_itemset_result = {}
total_transactions = len(transactions)

for itemset, support in frequent_2_itemsets_result.items():
    support_2_itemset_result[itemset] = support / total_transactions

In [21]:
# Tampilan hasil Support 2 Itemset
print("\n8. Mencari Support 2 Itemset:")
for itemset, support in support_2_itemset_result.items():
    print(f"{itemset}: {support}")


8. Mencari Support 2 Itemset:
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): 4.788910704165355e-06
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): 4.867417437020525e-06
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): 3.532802978482639e-06
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): 3.2972827799171297e-06
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): 6.202031895558411e-06
('Kerupuk Kuning', 'Medan Jaya 2000'): 4.082350108468828e-06
('Kerupuk Kuning', 'Kerupuk Kulit'): 2.983255848496451e-06
('Pok-Pok Chicken', 'Medan Jaya 2000'): 5.024430902730865e-06
('Nabati Wafer Coklat 46 gr', 'Nabati Wafer Keju 46 gr'): 4.631897238455016e-06
('Nabati Wafer Coklat 46 gr', 'Roti Ikan Hiu Kacang'): 4.396377039889506e-06
('Nabati Wafer Coklat 46 gr', 'Medan Jaya 2000'): 4.396377039889506e-06
('HoHo Rasa Balado', 'Medan Jaya 2000'): 3.611309711337809e-06
('HoHo Rasa Balado', 'Pok-Pok Chicken'): 3.1402693142067905e-06
('Nabati Wafer Keju 46 gr', 'Medan Jaya 2000'): 3.6898164441929784e-06
('

In [22]:
# Mencari Confidence 2 Itemset
confidence_2_itemset_result = {}

for itemset, support in frequent_2_itemsets_result.items():
    item_A, item_B = itemset
    support_A = frequent_patterns_result.get((item_A,), 0)
    confidence = support / support_A
    confidence_2_itemset_result[itemset] = confidence

In [23]:
# Tampilan hasil Confidence 2 Itemset
print("\n9. Mencari Confidence 2 Itemset:")
for itemset, confidence in confidence_2_itemset_result.items():
    print(f"{itemset}: {confidence}")


9. Mencari Confidence 2 Itemset:
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): 0.1352549889135255
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): 0.1008130081300813
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): 0.07317073170731707
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): 0.06829268292682927
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): 0.12061068702290076
('Kerupuk Kuning', 'Medan Jaya 2000'): 0.07938931297709924
('Kerupuk Kuning', 'Kerupuk Kulit'): 0.058015267175572524
('Pok-Pok Chicken', 'Medan Jaya 2000'): 0.16452442159383032
('Nabati Wafer Coklat 46 gr', 'Nabati Wafer Keju 46 gr'): 0.10207612456747407
('Nabati Wafer Coklat 46 gr', 'Roti Ikan Hiu Kacang'): 0.09688581314878894
('Nabati Wafer Coklat 46 gr', 'Medan Jaya 2000'): 0.09688581314878894
('HoHo Rasa Balado', 'Medan Jaya 2000'): 0.1229946524064171
('HoHo Rasa Balado', 'Pok-Pok Chicken'): 0.10695187165775401
('Nabati Wafer Keju 46 gr', 'Medan Jaya 2000'): 0.16666666666666669
('Tricks Kimci', 'Tricks Rendang

In [24]:
# Langkah 5: Evaluasi
# Evaluasi kualitas aturan asosiasi dapat dilakukan dengan menghitung metrik seperti confidence, lift, dan support.
# Fungsi untuk menghitung lift
def calculate_lift(itemset, support_AB, support_A, support_B):
    return support_AB / (support_A * support_B)

In [25]:
# Fungsi untuk mengevaluasi kualitas aturan asosiasi
def evaluate_association_rules(association_rules, frequent_patterns):
    evaluation_results = {}
    for itemset, support_AB in association_rules.items():
        item_A, item_B = itemset
        support_A = frequent_patterns.get((item_A,), 0)
        support_B = frequent_patterns.get((item_B,), 0)
        confidence = support_AB / support_A
        lift = calculate_lift(itemset, support_AB, support_A, support_B)
        evaluation_results[itemset] = {'support_AB': support_AB, 'support_A': support_A, 'support_B': support_B, 'confidence': confidence, 'lift': lift}
    return evaluation_results

In [26]:
# Evaluasi aturan asosiasi
evaluation_results = evaluate_association_rules(frequent_2_itemsets_result, frequent_patterns_result)

In [27]:
# Tampilkan hasil evaluasi
print("\n10. Evaluasi Aturan Asosiasi:")
for itemset, metrics in evaluation_results.items():
    print(f"{itemset}: {metrics}")


10. Evaluasi Aturan Asosiasi:
('Kerupuk Putih Jaring 2.000', 'Medan Jaya 2000'): {'support_AB': 0.017091622303166153, 'support_A': 0.1263659288316055, 'support_B': 0.0949845895208742, 'confidence': 0.1352549889135255, 'lift': 1.4239677151397416}
('Roti Ikan Hiu Kacang', 'Medan Jaya 2000'): {'support_AB': 0.017371812832726253, 'support_A': 0.17231717567946203, 'support_B': 0.0949845895208742, 'confidence': 0.1008130081300813, 'lift': 1.0613617286615344}
('Roti Ikan Hiu Kacang', 'Kerupuk Putih Jaring 2.000'): {'support_AB': 0.012608573830204538, 'support_A': 0.17231717567946203, 'support_B': 0.1263659288316055, 'confidence': 0.07317073170731707, 'lift': 0.5790384511383916}
('Roti Ikan Hiu Kacang', 'Pok-Pok Chicken'): {'support_AB': 0.011768002241524236, 'support_A': 0.17231717567946203, 'support_B': 0.10899411599887925, 'confidence': 0.06829268292682927, 'lift': 0.6265721988839424}
('Kerupuk Kuning', 'Kerupuk Putih Jaring 2.000'): {'support_AB': 0.022135051835247967, 'support_A': 0.1835

In [28]:
# Simpan model menggunakan pickle
with open('Model.pkl', 'wb') as model_file:
    pickle.dump(frequent_patterns_result, model_file)