In [6]:
# Title: Implement Apriori for Association Rule Mining

# Task 1: Market Basket Analysis with Simple Transactions
# Step 1: Define a simple dataset of transactions.
# Step 2: Implement the Apriori algorithm using the apyori library.
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Define a simple dataset of transactions
transactions = [
    ['Milk', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Egg', 'Milk'],
    ['Bread', 'Egg', 'Diaper', 'Milk', 'Beer']
]

# Step 2: Implement the Apriori algorithm using mlxtend
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets)


# Task 2: Apriori Implementation with a Groceries Dataset
# Step 1: Load a sample dataset of grocery transactions. (Consider creating or downloading a CSV file of transactions)
# Step 2: Convert transactions for Apriori and execute the algorithm.
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Load a sample dataset of grocery transactions
data = [
    ['Milk', 'Bread', 'Eggs'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Eggs', 'Milk'],
    ['Bread', 'Eggs', 'Diaper', 'Milk', 'Beer']
]

# Step 2: Convert transactions for Apriori and execute the algorithm
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets)


# Task 3: Finding Frequent Itemsets in Large Dataset
# Step 1: Use a pre-existing large dataset or generate a synthetic dataset.
# Step 2: Run Apriori and identify frequent itemsets.
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Use a synthetic large dataset (or you can load your own large dataset here)
data = [
    ['Milk', 'Bread', 'Eggs'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Eggs', 'Milk'],
    ['Bread', 'Eggs', 'Diaper', 'Milk', 'Beer'],
    ['Coke', 'Milk', 'Beer', 'Bread'],
    ['Diaper', 'Bread', 'Beer', 'Coke'],
    ['Milk', 'Diaper', 'Beer', 'Bread']
]

# Step 2: Convert transactions for Apriori and execute the algorithm
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets)


   support              itemsets
0      0.6                (Beer)
1      0.8               (Bread)
2      0.6              (Diaper)
3      1.0                (Milk)
4      0.6        (Diaper, Beer)
5      0.6          (Milk, Beer)
6      0.8         (Bread, Milk)
7      0.6        (Diaper, Milk)
8      0.6  (Milk, Diaper, Beer)
    support              itemsets
0       0.6                (Beer)
1       0.8               (Bread)
2       0.6              (Diaper)
3       0.6                (Eggs)
4       1.0                (Milk)
5       0.6        (Diaper, Beer)
6       0.6          (Milk, Beer)
7       0.6         (Bread, Eggs)
8       0.8         (Bread, Milk)
9       0.6        (Diaper, Milk)
10      0.6          (Eggs, Milk)
11      0.6  (Milk, Diaper, Beer)
12      0.6   (Bread, Eggs, Milk)
    support               itemsets
0     0.750                 (Beer)
1     0.875                (Bread)
2     0.625               (Diaper)
3     0.875                 (Milk)
4     0.625        