#### Step 1: Import the dataset and set the minSupport and minConfidence

[REFERENCE](https://hands-on.cloud/implementation-of-fp-growth-algorithm-using-python/)

In [164]:
import pandas as pd
import numpy as np
from itertools import combinations
from csv import reader
from collections import defaultdict
from mlxtend.preprocessing import TransactionEncoder

min_support = 0.4
min_confidence = 1

# Step 0 Preprocessing
df = pd.read_csv('data.csv')
transactions = []
for i in range(len(df)):
    transaction = []
    for col in df.columns:
        if df.loc[i, col] == True:
            transaction.append(col)
    transactions.append(transaction)

print(transactions)

# Convert the transactions into a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
df.head()

[['apt', 'poor'], ['car', 'apt', 'avg'], ['car', 'villa', 'rich'], ['car', 'villa', 'rich'], ['car', 'apt', 'avg']]


Unnamed: 0,apt,avg,car,poor,rich,villa
0,True,False,False,True,False,False
1,True,True,True,False,False,False
2,False,False,True,False,True,True
3,False,False,True,False,True,True
4,True,True,True,False,False,False


In [165]:
# Step 1: Calculate the support for each item
support_counts = {}
for transaction in transactions:
    for item in transaction:
        if item in support_counts:
            support_counts[item] += 1
        else:
            support_counts[item] = 1

print(support_counts)

{'apt': 3, 'poor': 1, 'car': 4, 'avg': 2, 'villa': 2, 'rich': 2}


In [166]:
# Step 2: Calculate F-list
# Remove items that do not meet the minimum support threshold
f_list = []
for item, support in support_counts.items():
    if support >= min_support:
        f_list.append(item)

# Sort by support
f_list = sorted(f_list, key=lambda x: support_counts[x], reverse=True)

print(f_list)

['car', 'apt', 'avg', 'villa', 'rich', 'poor']


In [167]:
# Step 3: Create Conditional Base
conditional_base = {}
for item in f_list:
    conditional_base[item] = []
    for transaction in transactions:
        if item in transaction:
            conditional_base[item].append(transaction)

print(conditional_base)

{'car': [['car', 'apt', 'avg'], ['car', 'villa', 'rich'], ['car', 'villa', 'rich'], ['car', 'apt', 'avg']], 'apt': [['apt', 'poor'], ['car', 'apt', 'avg'], ['car', 'apt', 'avg']], 'avg': [['car', 'apt', 'avg'], ['car', 'apt', 'avg']], 'villa': [['car', 'villa', 'rich'], ['car', 'villa', 'rich']], 'rich': [['car', 'villa', 'rich'], ['car', 'villa', 'rich']], 'poor': [['apt', 'poor']]}


In [168]:
# Step 4: Create Fp-Tree
class FPNode:
    def __init__(self, name, count, parent):
        self.name = name
        self.count = count
        self.parent = parent
        self.children = {}
        self.next = None

def build_FPTree(transactions, f_list, conditional_base, min_support):
    root = FPNode('root', 0, None)
    for transaction in transactions:
        transaction = [item for item in transaction if item in f_list]
        transaction.sort(key=lambda x: f_list.index(x))
        curr_node = root
        for item in transaction:
            if item in curr_node.children:
                curr_node.children[item].count += 1
            else:
                new_node = FPNode(item, 1, curr_node)
                curr_node.children[item] = new_node
                if item in conditional_base:
                    conditional_base[item].append(new_node)
                else:
                    conditional_base[item] = [new_node]
            curr_node = curr_node.children[item]
    return root, conditional_base           

In [169]:
# Step 5: Gerenate Frequent Patterns
def generate_frequent_patterns(conditional_base, min_support):
    frequent_patterns = {}
    for item in conditional_base:
        if len(conditional_base[item]) >= min_support:
            frequent_patterns[item] = conditional_base[item]
    return frequent_patterns

print(generate_frequent_patterns(conditional_base, min_support))

{'car': [['car', 'apt', 'avg'], ['car', 'villa', 'rich'], ['car', 'villa', 'rich'], ['car', 'apt', 'avg']], 'apt': [['apt', 'poor'], ['car', 'apt', 'avg'], ['car', 'apt', 'avg']], 'avg': [['car', 'apt', 'avg'], ['car', 'apt', 'avg']], 'villa': [['car', 'villa', 'rich'], ['car', 'villa', 'rich']], 'rich': [['car', 'villa', 'rich'], ['car', 'villa', 'rich']], 'poor': [['apt', 'poor']]}


In [170]:
# Step 6: Generate Association Rules
def generate_association_rules(frequent_patterns, min_confidence):
    association_rules = []
    for item in frequent_patterns:
        for i in range(1, len(frequent_patterns[item])):
            for subset in combinations(frequent_patterns[item], i):
                confidence = len(subset) / len(frequent_patterns[item])
                if confidence >= min_confidence:
                    association_rules.append((subset, item))
    return association_rules

print(generate_association_rules(generate_frequent_patterns(conditional_base, min_support), min_confidence))

[]
