In [1]:
from itertools import combinations
import pandas as pd

In [2]:
def convertToSet(transactions):
    return set(transactions.split(", "))

In [3]:
transactions = pd.read_csv("Amazon Transactions.csv")
transactions["Transaction"] = transactions["Transaction"].apply(convertToSet)
itemset = pd.read_csv("Amazon Items.csv")
support = 45
confidence = 90
threshold = (support / 100) * len(transactions)

In [4]:
def generateItemsets(prevItemsets, transactions, threshold, k):
    itemsets = {}
    itemList = list(prevItemsets.keys())

    for combo in combinations(itemList, k):
        combo = set(combo)
        count = sum(
            1
            for transaction in transactions["Transaction"]
            if combo.issubset(transaction)
        )
        if count >= threshold:
            itemsets[tuple(combo)] = count
    return itemsets

In [5]:
def generateRules(itemsets, transactions, min_support, min_confidence):
    # Go through the final dictionary and if the length is only one item ignore it
    # If the length is more than two generate all the possible combinations
    for itemset in itemsets.keys():
        k = len(itemset)
        if k < 2:
            continue
        for i in range(1, k):
            for combo in combinations(itemset, i):
                start = set(combo)
                itemset = set(itemset)
                res = itemset - start
                start_support = sum(
                    1
                    for transaction in transactions["Transaction"]
                    if start.issubset(transaction)
                )
                rule_support = sum(
                    1
                    for transaction in transactions["Transaction"]
                    if itemset.issubset(transaction)
                )
                confidence = rule_support / start_support
                if confidence >= min_confidence / 100:
                    print(
                        f"\nRule: {', '.join(start)} -> {', '.join(res)}, Confidence: {confidence}"
                    )

In [6]:
def generateFrequentItems(itemsets):
    freq = []
    freqString = """\nFrequent Items: """
    for itemset in itemsets:
        freq.append(itemset)
    for i in range(len(freq)):
        freqString += "{" + ", ".join(freq[i]) + "}"
        if i < len(freq) - 1:
            freqString += ", "
    return freqString

In [7]:
def apriori(transactions, min_support, min_confidence):
    itemsets = {}
    results = {}

    for index, row in transactions.iterrows():
        for item in row["Transaction"]:
            itemsets[item] = itemsets.get(item, 0) + 1

    threshold = (min_support / 100) * len(transactions)
    itemsets = {k: v for k, v in itemsets.items() if v >= threshold}
    # results.update(itemsets)
    k = 1

    while True:
        new = generateItemsets(itemsets, transactions, threshold, k)
        if not new:
            break
        results.update(new)
        k += 1
    generateRules(results, transactions, min_support, min_confidence)

    return results

In [8]:
results = apriori(transactions, 45, 90)
print(generateFrequentItems(results))


Rule: Java: The Complete Reference -> A Beginner’s Guide, Confidence: 0.9

Rule: Java: The Complete Reference -> Java For Dummies, Confidence: 1.0

Rule: Java: The Complete Reference -> A Beginner’s Guide, Java For Dummies, Confidence: 0.9

Rule: A Beginner’s Guide, Java: The Complete Reference -> Java For Dummies, Confidence: 1.0

Rule: A Beginner’s Guide, Java For Dummies -> Java: The Complete Reference, Confidence: 1.0

Rule: Java: The Complete Reference, Java For Dummies -> A Beginner’s Guide, Confidence: 0.9

Frequent Items: {A Beginner’s Guide}, {Java: The Complete Reference}, {Android Programming: The Big Nerd Ranch}, {Java For Dummies}, {A Beginner’s Guide, Java: The Complete Reference}, {A Beginner’s Guide, Java For Dummies}, {Java: The Complete Reference, Java For Dummies}, {Android Programming: The Big Nerd Ranch, Java For Dummies}, {A Beginner’s Guide, Java: The Complete Reference, Java For Dummies}


In [9]:
from apriori_python.apriori import apriori

In [10]:
items = pd.read_csv('Amazon Items.csv')
transactions = pd.read_csv('Amazon Transactions.csv')
itemsList = []
for index, row in transactions.iterrows():
    itemSet = row['Transaction'].split(', ')
    itemsList.append(itemSet)
print(itemsList)

[['A Beginner’s Guide', 'Java: The Complete Reference', 'Java For Dummies', 'Android Programming: The Big Nerd Ranch'], ['A Beginner’s Guide', 'Java: The Complete Reference', 'Java For Dummies'], ['A Beginner’s Guide', 'Java: The Complete Reference', 'Java For Dummies', 'Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition'], ['Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition ', 'Beginning Programming with Java'], ['Android Programming: The Big Nerd Ranch', 'Beginning Programming with Java', 'Java 8 Pocket Guide'], ['A Beginner’s Guide', 'Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition'], ['A Beginner’s Guide', 'Head First Java 2nd Edition ', 'Beginning Programming with Java'], ['Java: The Complete Reference', 'Java For Dummies', 'Android Programming: The Big Nerd Ranch'], ['Java For Dummies', 'Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition ', 'Beginning Programming with Java'], ['Beginning Programmi

In [11]:
minSupport = 0.45
minConf = 0.9

freqItemSet, rules = apriori(itemsList, minSupport, minConf)

In [12]:
for i, rule in enumerate(rules):
    print(f"Rule {i+1}: {rule}\n")

print(freqItemSet)

Rule 1: [{'Java: The Complete Reference'}, {'Java For Dummies'}, 1.0]

Rule 2: [{'A Beginner’s Guide', 'Java: The Complete Reference'}, {'Java For Dummies'}, 1.0]

Rule 3: [{'A Beginner’s Guide', 'Java For Dummies'}, {'Java: The Complete Reference'}, 1.0]

{1: {frozenset({'A Beginner’s Guide'}), frozenset({'Java For Dummies'}), frozenset({'Android Programming: The Big Nerd Ranch'}), frozenset({'Java: The Complete Reference'})}, 2: {frozenset({'Android Programming: The Big Nerd Ranch', 'Java For Dummies'}), frozenset({'A Beginner’s Guide', 'Java For Dummies'}), frozenset({'A Beginner’s Guide', 'Java: The Complete Reference'}), frozenset({'Java: The Complete Reference', 'Java For Dummies'})}, 3: {frozenset({'A Beginner’s Guide', 'Java: The Complete Reference', 'Java For Dummies'})}}


In [13]:
from fpgrowth_py import fpgrowth

In [14]:
freqItemSet, rules = fpgrowth(itemsList, minSupport, minConf)
print(rules)

[[{'A Beginner’s Guide', 'Java: The Complete Reference'}, {'Java For Dummies'}, 1.0], [{'A Beginner’s Guide', 'Java For Dummies'}, {'Java: The Complete Reference'}, 1.0], [{'Java: The Complete Reference'}, {'Java For Dummies'}, 1.0]]
