In [None]:
import pandas as pd
print(pd.__version__)


2.0.3


# **Key Concepts**:
## Frequent Itemsets:

An itemset is a collection of one or more items.

A frequent itemset is an itemset that appears in the dataset with a frequency greater than or equal to a specified threshold (minimum support).

## Association Rules:

Association rules describe relationships between items based on their co-occurrence in transactions.

The rules are typically of the form "If itemset X is present, then itemset Y is also likely to be present."

# **How Apriori Works:**
The Apriori algorithm uses a level-wise search strategy to discover frequent itemsets. The key idea is based on the Apriori property:

Apriori Property:  If an itemset is frequent, then all of its subsets must also be frequent.

The algorithm iteratively generates candidate itemsets of increasing size, prunes infrequent candidates, and continues until no more frequent itemsets can be found.

# **Steps in Apriori Algorithm**:
###Generate Candidate Itemsets:

Start with frequent itemsets of size 1.
Generate candidates of size k by joining frequent itemsets of size k-1.
###Scan the Dataset:

Count the support (frequency) of each candidate itemset in the dataset.
Prune Infrequent Itemsets:

Remove candidate itemsets that do not meet the minimum support threshold.
###Repeat:

Repeat the process with the remaining frequent itemsets until no more frequent itemsets can be found.
#**Uses of Apriori Algorithm**:
###Market Basket Analysis:

Apriori is commonly used in retail for market basket analysis, where the goal is to discover associations between products that are frequently purchased together. This information can be used for product placement, promotions, and inventory management.
###Cross-Selling:

It is used in e-commerce and online platforms to identify items that are often bought together, helping to suggest additional products to customers during their shopping experience.
###Healthcare:

In healthcare, Apriori can be applied to analyze patient records and identify co-occurring medical conditions or patterns of treatment.

###Network Security:

The algorithm can be used in network security to identify patterns of activities or behaviors that may indicate security threats.

###Web Usage Mining:

Apriori can be applied to analyze user navigation patterns on websites, helping to improve website design and user experience.

In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd


data = {'Transaction_ID': [1, 2, 3, 4, 5],
        'Items': [['Milk', 'Bread', 'Diaper'],
                  ['Beer', 'Milk', 'Bread', 'Eggs'],
                  ['Beer', 'Coke', 'Diaper'],
                  ['Beer', 'Bread', 'Diaper'],
                  ['Milk', 'Bread', 'Coke']]}
df = pd.DataFrame(data)


oht = pd.get_dummies(df['Items'].apply(pd.Series).stack()).groupby(level=0).sum()

frequent_itemsets = apriori(oht, min_support=0.5, use_colnames=True)

print(frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# Print association rules
print(rules)



   support       itemsets
0      0.6         (Beer)
1      0.8        (Bread)
2      0.6       (Diaper)
3      0.6         (Milk)
4      0.6  (Milk, Bread)
  antecedents consequents  antecedent support  consequent support  support  \
0      (Milk)     (Bread)                 0.6                 0.8      0.6   
1     (Bread)      (Milk)                 0.8                 0.6      0.6   

   confidence  lift  leverage  conviction  zhangs_metric  
0        1.00  1.25      0.12         inf            0.5  
1        0.75  1.25      0.12         1.6            1.0  




In [None]:
def load_data():

    return [
        ['bread', 'milk'],
        ['bread', 'diaper', 'beer', 'egg'],
        ['milk', 'diaper', 'beer', 'cola'],
        ['bread', 'milk', 'diaper', 'beer'],
        ['bread', 'milk', 'diaper', 'cola']
    ]

def create_candidates(itemsets, k):
    # Generate candidate itemsets of size k
    candidates = []
    for i in range(len(itemsets)):
        for j in range(i + 1, len(itemsets)):
            if itemsets[i][:k - 2] == itemsets[j][:k - 2]:
                candidate = list(set(itemsets[i]) | set(itemsets[j]))
                candidate.sort()
                if candidate not in candidates:
                    candidates.append(candidate)
    return candidates

def prune_candidates(candidates, prev_itemsets):
    # Prune candidates that contain subsets of size k-1 not in the previous itemsets
    pruned_candidates = []
    for candidate in candidates:
        subsets = [candidate[:i] + candidate[i + 1:] for i in range(len(candidate))]
        if all(subset in prev_itemsets for subset in subsets):
            pruned_candidates.append(candidate)
    return pruned_candidates

def apriori(data, min_support):
    itemsets = [[item] for transaction in data for item in transaction]
    frequent_itemsets = []
    k = 2

    while itemsets:
        candidates = create_candidates(itemsets, k)
        candidate_counts = {tuple(candidate): 0 for candidate in candidates}

        for transaction in data:
            for candidate in candidates:
                if set(candidate).issubset(transaction):
                    candidate_counts[tuple(candidate)] += 1

        frequent_itemsets_k = [list(candidate) for candidate, count in candidate_counts.items() if count >= min_support]
        frequent_itemsets.extend(frequent_itemsets_k)

        itemsets = prune_candidates(create_candidates(frequent_itemsets_k, k + 1), frequent_itemsets_k)
        k += 1

    return frequent_itemsets

if __name__ == "__main__":
    data = load_data()
    min_support = 3
    result = apriori(data, min_support)
    print("Frequent Itemsets:")
    for itemset in result:
        print(itemset)


Frequent Itemsets:
['bread', 'milk']
['bread']
['bread', 'diaper']
['diaper', 'milk']
['milk']
['beer', 'diaper']
['diaper']
['beer']


# **FP growth**
Using library

In [None]:
from mlxtend.frequent_patterns import fpgrowth
import pandas as pd

dataset = pd.DataFrame({'A' : [1,1,0,1],'B' : [0,1,1,1],'C' : [1,1,0,1],'D' : [0,1,1,1]})
freq_itemset = fpgrowth(dataset, min_support = 0.5, use_colnames = True)
print(freq_itemset)

    support      itemsets
0      0.75           (C)
1      0.75           (A)
2      0.75           (D)
3      0.75           (B)
4      0.50        (D, C)
5      0.75        (A, C)
6      0.50        (A, B)
7      0.50        (A, D)
8      0.50     (A, B, C)
9      0.50     (A, D, C)
10     0.50     (A, B, D)
11     0.50  (A, B, D, C)
12     0.75        (B, D)
13     0.50        (B, C)
14     0.50     (D, B, C)


  and should_run_async(code)


In [None]:
# from mlxtend.preprocessing import TransactionEncoder
# from mlxtend.frequent_patterns import fpgrowth

# # Example usage:
# transactions = [
#     ['A', 'B', 'D'],
#     ['B', 'C', 'E'],
#     ['A', 'B', 'D', 'E'],
#     ['B', 'E']
# ]
# min_support = 2

# te = TransactionEncoder()
# te_ary = te.fit(transactions).transform(transactions)
# df = pd.DataFrame(te_ary, columns=te.columns_)

# frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True)

# print("Frequent Itemsets:", frequent_itemsets)


  and should_run_async(code)


In [None]:
'''
class FPNode:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}

def build_tree(transactions, min_support):
    header_table = {}

    for transaction in transactions:
        for item in transaction:
            header_table[item] = header_table.get(item, 0) + 1

    header_table = {k: v for k, v in header_table.items() if v >= min_support}

    frequent_items = list(header_table.keys())
    frequent_items.sort(key=lambda item: header_table[item], reverse=True)

    root = FPNode(None, None, None)

    for transaction in transactions:
        sorted_items = [item for item in frequent_items if item in transaction]
        insert_tree(sorted_items, root, header_table)

    return root, header_table

def insert_tree(items, node, header_table):
    if items:
        current_item = items[0]
        if current_item in node.children:
            child = node.children[current_item]
        else:
            child = FPNode(current_item, 0, node)
            node.children[current_item] = child
            if current_item in header_table:
                update_header_table(header_table, child)

        child.count += 1
        insert_tree(items[1:], child, header_table)

def update_header_table(header_table, node):
    while node.parent is not None:
        header_table[node.item].append(node)
        node = node.parent

def fp_growth(tree, header_table, prefix, frequent_itemsets, min_support):
    for node in header_table.values():
        new_frequent_set = prefix.copy()
        new_frequent_set.add(node.item)
        frequent_itemsets.append(new_frequent_set)

        cond_pattern_base = []
        while node is not None:
            prefix_path = []
            ascend_tree(node, prefix_path)
            if prefix_path:
                cond_pattern_base.append(prefix_path)
            node = node.parent

        cond_tree, cond_header_table = build_tree(cond_pattern_base, min_support)
        if cond_header_table:
            fp_growth(cond_tree, cond_header_table, new_frequent_set, frequent_itemsets, min_support)

def ascend_tree(node, prefix_path):
    if node.parent is not None:
        prefix_path.append(node.item)
        ascend_tree(node.parent, prefix_path)

# Example usage:
transactions = [
    ['A', 'B', 'D'],
    ['B', 'C', 'E'],
    ['A', 'B', 'D', 'E'],
    ['B', 'E']
]
min_support = 2

root, header_table = build_tree(transactions, min_support)
frequent_itemsets = []
fp_growth(root, header_table, set(), frequent_itemsets, min_support)

print("Frequent Itemsets:", frequent_itemsets)
