# Load Data

In [125]:
import pandas as pd

df = pd.read_csv('transaction_data.csv',index_col=0)
# ubah menjadi bentuk boolean
df = df.astype(bool)
# ubah kembali menjadi bentuk integer
df = df.astype(int)
df

Unnamed: 0_level_0,A,C,D,E,I,K,M,N,O,U,Y
Transaction ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1000,0,0,0,1,0,1,1,1,1,0,1
2000,0,0,1,1,0,1,0,0,1,0,1
4000,1,0,0,1,0,1,1,0,0,0,0
5000,0,1,0,0,0,1,1,0,0,1,1
6000,0,1,0,1,1,1,0,0,1,0,0
7000,0,0,1,0,0,0,1,1,1,0,0


# Hitung jumlah transaksi setiap barang dan supportnya

In [126]:
# hitung jumlah transaksi barang
item_counts = df.sum(axis=0)

# hitung jumlah support barang
support_counts = item_counts / len(df)

# bentuk dataframe
support_df = pd.DataFrame({
    'item': item_counts.index,
    'support': support_counts.values
})

# tampikkan hasil
support_df

Unnamed: 0,item,support
0,A,0.166667
1,C,0.333333
2,D,0.333333
3,E,0.666667
4,I,0.166667
5,K,0.833333
6,M,0.666667
7,N,0.333333
8,O,0.666667
9,U,0.166667


In [127]:
# filter barang untuk minimum support >= 20% (minimum 2 barang dari 10 transaksi)
support_df = support_df[support_df['support'] >= 0.33]

# tampilkan hasil akhir
support_df = support_df.sort_values(by='support', ascending=False) # urutkan berdasarkan support
support_df = support_df.reset_index(drop=True)
support_df

Unnamed: 0,item,support
0,K,0.833333
1,E,0.666667
2,O,0.666667
3,M,0.666667
4,Y,0.5
5,D,0.333333
6,C,0.333333
7,N,0.333333


In [128]:
# Ambil daftar item yang lolos minimum support
filtered_items = support_df['item'].tolist()

# Filter df hanya kolom item yang lolos support
df_filtered = df[filtered_items]

# Tampilkan df_filtered
df_filtered

Unnamed: 0_level_0,K,E,O,M,Y,D,C,N
Transaction ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1000,1,1,1,1,1,0,0,1
2000,1,1,1,0,1,1,0,0
4000,1,1,0,1,0,0,0,0
5000,1,0,0,1,1,0,1,0
6000,1,1,1,0,0,0,1,0
7000,0,0,1,1,0,1,0,1


# Pembentukan FP Tree

In [129]:
# item yang diurutkan sesuai support dimasukkan ke dalam list
ordered_items = support_df['item'].tolist()

# Ubah setiap transaksi menjadi list item yang dibeli, urutkan sesuai ordered_items
transactions = []
for _, row in df_filtered.iterrows():
    items = [item for item in ordered_items if row[item] == 1]
    transactions.append(items)

# Hasil
for i, t in enumerate(transactions, 1):
    print(f"{i}: {t}")

1: ['K', 'E', 'O', 'M', 'Y', 'N']
2: ['K', 'E', 'O', 'Y', 'D']
3: ['K', 'E', 'M']
4: ['K', 'M', 'Y', 'C']
5: ['K', 'E', 'O', 'C']
6: ['O', 'M', 'D', 'N']


In [130]:
class FPTreeNode:
    def __init__(self, item, count=1):
        self.item = item
        self.count = count
        self.children = {}
        self.parent = None

    def add_child(self, item):
        if item not in self.children:
            self.children[item] = FPTreeNode(item)
            self.children[item].parent = self
        else:
            self.children[item].count += 1
        return self.children[item]

# Membuat root
root = FPTreeNode(None, count=0)

# Menambahkan transaksi ke FP-Tree
for transaction in transactions:
    current_node = root
    for item in transaction:
        current_node = current_node.add_child(item)

In [131]:
# visualisasi FP-Tree
def print_fp_tree(node, prefix=''):
    for i, child in enumerate(node.children.values()):
        connector = '└── ' if i == len(node.children) - 1 else '├── '
        print(prefix + connector + f"{child.item} ({child.count})")
        next_prefix = prefix + ('    ' if i == len(node.children) - 1 else '│   ')
        print_fp_tree(child, next_prefix)

print_fp_tree(root)

├── K (5)
│   ├── E (4)
│   │   ├── O (3)
│   │   │   ├── M (1)
│   │   │   │   └── Y (1)
│   │   │   │       └── N (1)
│   │   │   ├── Y (1)
│   │   │   │   └── D (1)
│   │   │   └── C (1)
│   │   └── M (1)
│   └── M (1)
│       └── Y (1)
│           └── C (1)
└── O (1)
    └── M (1)
        └── D (1)
            └── N (1)


# Pembentukan Conditional Pattern

hitung jumlah count support untuk setiap item

In [132]:
# hitung jumlah support count untuk setiap item
def count_support(node):
    if node is None:
        return {}
    support_count = {node.item: node.count}
    for child in node.children.values():
        child_counts = count_support(child)
        for item, count in child_counts.items():
            if item in support_count:
                support_count[item] += count
            else:
                support_count[item] = count
    return support_count
support_counts = count_support(root)
# ubah ke DataFrame
support_counts_df = pd.DataFrame(list(support_counts.items()), columns=['item', 'support_count'])

# tampilkan DataFrame support counts
support_counts_df = support_counts_df.sort_values(by='support_count', ascending=True)
# tampilkan support counts >=2 
support_counts_df = support_counts_df[support_counts_df['support_count'] >= 2]
support_counts_df = support_counts_df.reset_index(drop=True)
support_counts_df


Unnamed: 0,item,support_count
0,D,2
1,N,2
2,C,2
3,Y,3
4,E,4
5,M,4
6,O,4
7,K,5


In [133]:
# Fungsi untuk mencari semua lintasan dari root ke node yang berisi item target
def find_paths(node, target_item, path=None, paths=None):
    if path is None:
        path = []
    if paths is None:
        paths = []
    if node.item == target_item:
        paths.append(path.copy())
    for child in node.children.values():
        find_paths(child, target_item, path + [node.item] if node.item else path, paths)
    return paths

# Tampilkan conditional pattern base untuk setiap item
for item in support_counts_df['item']:
    paths = find_paths(root, item)
    print(f"Conditional pattern base for {item}:")
    if not paths:
        print("  (tidak ada)")
    for p in paths:
        print("  " + " -> ".join(str(x) for x in p if x))
    print("="*30)

Conditional pattern base for D:
  K -> E -> O -> Y
  O -> M
Conditional pattern base for N:
  K -> E -> O -> M -> Y
  O -> M -> D
Conditional pattern base for C:
  K -> E -> O
  K -> M -> Y
Conditional pattern base for Y:
  K -> E -> O -> M
  K -> E -> O
  K -> M
Conditional pattern base for E:
  K
Conditional pattern base for M:
  K -> E -> O
  K -> E
  K
  O
Conditional pattern base for O:
  K -> E
  
Conditional pattern base for K:
  


In [134]:
# Fungsi untuk mencari semua lintasan dari root ke node yang berisi item target, beserta count-nya
def find_paths_with_count(node, target_item, path=None, paths=None):
    if path is None:
        path = []
    if paths is None:
        paths = []
    if node.item == target_item:
        # Simpan lintasan dan count node target
        paths.append((path.copy(), node.count))
    for child in node.children.values():
        find_paths_with_count(child, target_item, path + [node.item] if node.item else path, paths)
    return paths

# Fungsi membangun FP-Tree dari pattern base
def build_conditional_fptree(pattern_base):
    class Node:
        def __init__(self, item):
            self.item = item
            self.count = 0
            self.children = {}
        def add(self, items, count):
            if not items:
                return
            first = items[0]
            if first not in self.children:
                self.children[first] = Node(first)
            self.children[first].count += count
            self.children[first].add(items[1:], count)
    root = Node(None)
    for path, count in pattern_base:
        root.add(path, count)
    return root

# Fungsi untuk print conditional FP-Tree
def print_conditional_tree(node, prefix=''):
    for i, child in enumerate(node.children.values()):
        connector = '└── ' if i == len(node.children) - 1 else '├── '
        print(prefix + connector + f"{child.item} ({child.count})")
        next_prefix = prefix + ('    ' if i == len(node.children) - 1 else '│   ')
        print_conditional_tree(child, next_prefix)

# Bangun dan tampilkan conditional FP-Tree untuk setiap item
for item in support_counts_df['item']:
    pattern_base = find_paths_with_count(root, item)
    # Hapus node target dari setiap path (hanya ambil prefix-nya)
    pattern_base = [(path, count) for path, count in pattern_base if path]
    print(f"Conditional FP-Tree for {item}:")
    if not pattern_base:
        print("  (tidak ada)")
    else:
        cond_tree = build_conditional_fptree(pattern_base)
        print_conditional_tree(cond_tree)
    print("="*40)

Conditional FP-Tree for D:
├── K (1)
│   └── E (1)
│       └── O (1)
│           └── Y (1)
└── O (1)
    └── M (1)
Conditional FP-Tree for N:
├── K (1)
│   └── E (1)
│       └── O (1)
│           └── M (1)
│               └── Y (1)
└── O (1)
    └── M (1)
        └── D (1)
Conditional FP-Tree for C:
└── K (2)
    ├── E (1)
    │   └── O (1)
    └── M (1)
        └── Y (1)
Conditional FP-Tree for Y:
└── K (3)
    ├── E (2)
    │   └── O (2)
    │       └── M (1)
    └── M (1)
Conditional FP-Tree for E:
└── K (4)
Conditional FP-Tree for M:
├── K (3)
│   └── E (2)
│       └── O (1)
└── O (1)
Conditional FP-Tree for O:
└── K (3)
    └── E (3)
Conditional FP-Tree for K:
  (tidak ada)


In [135]:
min_support = 2  # ganti sesuai kebutuhan

def filter_pattern_base(pattern_base, min_support):
    """
    Filter pattern base untuk hanya menyertakan item yang memiliki support >= min_support.
    Args:
        pattern_base (list of tuples): List berisi tuple (path, count) dari pattern base.
        min_support (int): Nilai minimum support yang harus dipenuhi.
    Returns:
        list of tuples: Pattern base yang sudah difilter.
    """
    # Hitung support setiap item di pattern base
    item_counts = {}
    for path, count in pattern_base:
        for item in path:
            item_counts[item] = item_counts.get(item, 0) + count
    # Ambil item yang lolos min_support
    frequent_items = {item for item, cnt in item_counts.items() if cnt >= min_support}
    # Filter path: hanya item yang frequent
    filtered_base = []
    for path, count in pattern_base:
        filtered_path = [item for item in path if item in frequent_items]
        if filtered_path:  # hanya simpan path yang tidak kosong
            filtered_base.append((filtered_path, count))
    return filtered_base

# Bangun dan tampilkan conditional FP-Tree untuk setiap item dengan filtering min_support
for item in support_counts_df['item']:
    pattern_base = find_paths_with_count(root, item)
    # Hapus node target dari setiap path (hanya ambil prefix-nya)
    pattern_base = [(path, count) for path, count in pattern_base if path]
    # Filter pattern base sesuai min_support
    filtered_base = filter_pattern_base(pattern_base, min_support)
    print(f"Conditional FP-Tree for {item}:")
    if not filtered_base:
        print("  (tidak ada)")
    else:
        cond_tree = build_conditional_fptree(filtered_base)
        print_conditional_tree(cond_tree)
    print("="*40)

Conditional FP-Tree for D:
└── O (2)
Conditional FP-Tree for N:
└── O (2)
    └── M (2)
Conditional FP-Tree for C:
└── K (2)
Conditional FP-Tree for Y:
└── K (3)
    ├── E (2)
    │   └── O (2)
    │       └── M (1)
    └── M (1)
Conditional FP-Tree for E:
└── K (4)
Conditional FP-Tree for M:
├── K (3)
│   └── E (2)
│       └── O (1)
└── O (1)
Conditional FP-Tree for O:
└── K (3)
    └── E (3)
Conditional FP-Tree for K:
  (tidak ada)


In [136]:
from itertools import combinations, chain

def generate_frequent_itemsets_from_pattern_base(pattern_base, min_support, suffix):
    """
    Generate frequent itemsets dari pattern base
    pattern_base: list of (path, count)
    suffix: item yang sedang dicari conditional-nya, misal 'e'
    """
    itemset_counts = {}
    for path, count in pattern_base:
        # Untuk setiap kombinasi item di path (ukuran 1 sampai len(path))
        for r in range(1, len(path)+1):
            for combo in combinations(path, r):
                # Itemset selalu diurutkan + tambahkan suffix (misal 'e')
                itemset = tuple(sorted(combo + (suffix,)))
                itemset_counts[itemset] = itemset_counts.get(itemset, 0) + count
    # Filter hanya yang support >= min_support
    frequent_itemsets = {itemset: sup for itemset, sup in itemset_counts.items() if sup >= min_support}
    return frequent_itemsets

# Tampilkan frequent itemset untuk setiap item dari pattern base yang sudah difilter
for item in support_counts_df['item']:
    pattern_base = find_paths_with_count(root, item)
    # Ambil prefix path saja (tanpa node target)
    pattern_base = [(path, count) for path, count in pattern_base if path]
    # Filter pattern base sesuai min_support
    filtered_base = filter_pattern_base(pattern_base, min_support)
    # Generate frequent itemset dari pattern base
    frequent_itemsets = generate_frequent_itemsets_from_pattern_base(filtered_base, min_support, item)
    print(f"Frequent itemsets yang mengandung '{item}':")
    if not frequent_itemsets:
        print("  (tidak ada)")
    else:
        for itemset, sup in frequent_itemsets.items():
            print(f"  {set(itemset)} : support = {sup}")
    print("="*40)

Frequent itemsets yang mengandung 'D':
  {'O', 'D'} : support = 2
Frequent itemsets yang mengandung 'N':
  {'N', 'O'} : support = 2
  {'N', 'M'} : support = 2
  {'N', 'O', 'M'} : support = 2
Frequent itemsets yang mengandung 'C':
  {'C', 'K'} : support = 2
Frequent itemsets yang mengandung 'Y':
  {'K', 'Y'} : support = 3
  {'Y', 'E'} : support = 2
  {'O', 'Y'} : support = 2
  {'Y', 'M'} : support = 2
  {'K', 'Y', 'E'} : support = 2
  {'O', 'K', 'Y'} : support = 2
  {'K', 'Y', 'M'} : support = 2
  {'O', 'Y', 'E'} : support = 2
  {'O', 'K', 'Y', 'E'} : support = 2
Frequent itemsets yang mengandung 'E':
  {'K', 'E'} : support = 4
Frequent itemsets yang mengandung 'M':
  {'K', 'M'} : support = 3
  {'E', 'M'} : support = 2
  {'O', 'M'} : support = 2
  {'K', 'E', 'M'} : support = 2
Frequent itemsets yang mengandung 'O':
  {'O', 'K'} : support = 3
  {'O', 'E'} : support = 3
  {'O', 'K', 'E'} : support = 3
Frequent itemsets yang mengandung 'K':
  (tidak ada)


In [137]:
def generate_frequent_itemsets_df(support_counts_df, root, min_support):
    """
    Generate DataFrame berisi frequent itemsets dari FP-Tree.
    support_counts_df: DataFrame berisi item dan support count-nya
    root: root node dari FP-Tree
    min_support: nilai minimum support untuk itemset
    """
    # Inisialisasi list untuk menyimpan semua itemset
    all_itemsets = []
    # Tambahkan frequent 1-itemset
    for _, row in support_counts_df.iterrows():
        if row['item'] is not None:
            all_itemsets.append({'itemset': (row['item'],), 'support': row['support_count'] / len(df)})
    # Tambahkan frequent itemset dari pattern base (ukuran > 1)
    for item in support_counts_df['item']:
        pattern_base = find_paths_with_count(root, item)
        pattern_base = [(path, count) for path, count in pattern_base if path]
        filtered_base = filter_pattern_base(pattern_base, min_support)
        frequent_itemsets = generate_frequent_itemsets_from_pattern_base(filtered_base, min_support, item)
        for itemset, sup in frequent_itemsets.items():
            all_itemsets.append({'itemset': tuple(sorted(itemset)), 'support': sup / len(df)})
    # Gabungkan dan hilangkan duplikat (jika ada)
    df_itemsets = pd.DataFrame(all_itemsets)
    df_itemsets = df_itemsets.drop_duplicates(subset=['itemset'])
    df_itemsets = df_itemsets.sort_values(by='support', ascending=False).reset_index(drop=True)
    return df_itemsets

# Contoh penggunaan:
frequent_itemsets_df = generate_frequent_itemsets_df(support_counts_df, root, min_support)
frequent_itemsets_df

Unnamed: 0,itemset,support
0,"(K,)",0.833333
1,"(E,)",0.666667
2,"(O,)",0.666667
3,"(M,)",0.666667
4,"(E, K)",0.666667
5,"(Y,)",0.5
6,"(E, O)",0.5
7,"(K, M)",0.5
8,"(K, O)",0.5
9,"(E, K, O)",0.5


In [138]:
from itertools import combinations

def generate_association_rules(frequent_itemsets_df, min_confidence=0.7):
    rules = []
    # Buat dict untuk lookup support
    support_dict = {frozenset(itemset): support for itemset, support in zip(frequent_itemsets_df['itemset'], frequent_itemsets_df['support'])}
    for itemset in frequent_itemsets_df['itemset']:
        if len(itemset) < 2:
            continue  # hanya untuk itemset > 1
        itemset_support = support_dict[frozenset(itemset)]
        # Cek semua kemungkinan pemisahan antecedent dan consequent
        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                antecedent = frozenset(antecedent)
                consequent = frozenset(itemset) - antecedent
                if not consequent:
                    continue
                antecedent_support = support_dict.get(antecedent, 0)
                if antecedent_support == 0:
                    continue
                confidence = itemset_support / antecedent_support
                if confidence >= min_confidence:
                    rules.append({
                        'antecedents': tuple(sorted(antecedent)),
                        'consequents': tuple(sorted(consequent)),
                        'support': itemset_support,
                        'confidence': confidence
                    })
    return pd.DataFrame(rules)

# Contoh penggunaan:
association_rules_df = generate_association_rules(frequent_itemsets_df, min_confidence=0.8)
association_rules_df

Unnamed: 0,antecedents,consequents,support,confidence
0,"(E,)","(K,)",0.666667,1.0
1,"(E, O)","(K,)",0.5,1.0
2,"(K, O)","(E,)",0.5,1.0
3,"(Y,)","(K,)",0.5,1.0
4,"(C,)","(K,)",0.333333,1.0
5,"(N,)","(M, O)",0.333333,1.0
6,"(M, N)","(O,)",0.333333,1.0
7,"(M, O)","(N,)",0.333333,1.0
8,"(N, O)","(M,)",0.333333,1.0
9,"(N,)","(M,)",0.333333,1.0
