# Code Implementation

In [92]:
import itertools
import pandas as pd
import numpy as np

In [93]:
# Sample data
transactions = [['Bread', 'Butter', 'Eggs', 'Milk'], ['Bread', 'Butter', 'Eggs'], ['Bread', 'Milk', 'Eggs'], 
                ['Bread', 'Butter', 'Milk'], ['Bread', 'Milk', 'Eggs'], ['Butter', 'Milk'], 
                ['Bread', 'Butter', 'Milk'], ['Bread', 'Milk'], ['Bread', 'Butter', 'Eggs', 'Milk'], 
                ['Bread', 'Eggs', 'Milk']]
transactions

[['Bread', 'Butter', 'Eggs', 'Milk'],
 ['Bread', 'Butter', 'Eggs'],
 ['Bread', 'Milk', 'Eggs'],
 ['Bread', 'Butter', 'Milk'],
 ['Bread', 'Milk', 'Eggs'],
 ['Butter', 'Milk'],
 ['Bread', 'Butter', 'Milk'],
 ['Bread', 'Milk'],
 ['Bread', 'Butter', 'Eggs', 'Milk'],
 ['Bread', 'Eggs', 'Milk']]

In [94]:
def find_frequent_itemsets(transactions, min_support):
    itemsets = {}
    transactions = [set(t) for t in transactions]
    num_transactions = len(transactions)
    
    for i in range(1, len(transactions[0])+1):
        itemsets[i] = {}
        candidates = set(itertools.chain(*transactions))
        
        for c in candidates:
            freq = sum([1 for t in transactions if c in t])
            support = freq / num_transactions
            if support >= min_support:
                itemsets[i][(c,)] = support
                
        if not itemsets[i]:
            del itemsets[i]
            break
            
        if i > 1:
            itemsets[i] = join_itemsets(itemsets[i-1], itemsets[1])
            
    return itemsets

def join_itemsets(itemset1, itemset2):
    joined_itemset = {}
    for i1 in itemset1:
        for i2 in itemset2:
            if len(set(i1) & set(i2)) == len(i1) - 1:
                joined_itemset[tuple(sorted(set(i1) | set(i2)))] = min(itemset1[i1], itemset2[i2])
                
    return joined_itemset

def generate_rules(frequent_itemsets, min_confidence):
    rules = []
    for i in range(2, len(frequent_itemsets)):
        for itemset in frequent_itemsets[i]:
            for j in range(1, len(itemset)):
                for antecedent in itertools.combinations(itemset, j):
                    antecedent = tuple(sorted(antecedent))
                    consequent = tuple(sorted(set(itemset) - set(antecedent)))
                    confidence = frequent_itemsets[i][itemset] / frequent_itemsets[len(antecedent)][antecedent]
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, frequent_itemsets[i][itemset], confidence))
                        
    return rules

In [95]:
# Parameters
min_support = 0.3
min_confidence = 0.5

# Find frequent itemsets
frequent_itemsets = find_frequent_itemsets(transactions, min_support)

# Generate association rules
association_rules = generate_rules(frequent_itemsets, min_confidence)

def print_freq():
    # Print frequent itemsets and association rules
    print("Frequent itemsets:")
    for i in frequent_itemsets:
        for itemset in frequent_itemsets[i]:
            print(f"{itemset}: {frequent_itemsets[i][itemset]:.2f}")
        
def print_assoc():
    # Print the association rules with confidence
    print("Association rules:")
    for rule in association_rules:
        antecedent = ', '.join(rule[0])
        consequent = ', '.join(rule[1])
        support = rule[2]
        confidence = rule[3]
        print(f"{antecedent} => {consequent} (support: {support:.2f}, confidence: {confidence:.2f})")

In [96]:
print_freq()

Frequent itemsets:
('Bread',): 0.90
('Butter',): 0.60
('Eggs',): 0.60
('Milk',): 0.90
('Bread', 'Butter'): 0.60
('Bread', 'Eggs'): 0.60
('Bread', 'Milk'): 0.90
('Butter', 'Eggs'): 0.60
('Butter', 'Milk'): 0.60
('Eggs', 'Milk'): 0.60
('Bread', 'Butter'): 0.60
('Bread', 'Eggs'): 0.60
('Bread', 'Milk'): 0.90
('Butter', 'Eggs'): 0.60
('Butter', 'Milk'): 0.60
('Eggs', 'Milk'): 0.60
('Bread', 'Butter'): 0.60
('Bread', 'Eggs'): 0.60
('Bread', 'Milk'): 0.90
('Butter', 'Eggs'): 0.60
('Butter', 'Milk'): 0.60
('Eggs', 'Milk'): 0.60


In [97]:
print_assoc()

Association rules:
Bread => Butter (support: 0.60, confidence: 0.67)
Butter => Bread (support: 0.60, confidence: 1.00)
Bread => Eggs (support: 0.60, confidence: 0.67)
Eggs => Bread (support: 0.60, confidence: 1.00)
Bread => Milk (support: 0.90, confidence: 1.00)
Milk => Bread (support: 0.90, confidence: 1.00)
Butter => Eggs (support: 0.60, confidence: 1.00)
Eggs => Butter (support: 0.60, confidence: 1.00)
Butter => Milk (support: 0.60, confidence: 1.00)
Milk => Butter (support: 0.60, confidence: 0.67)
Eggs => Milk (support: 0.60, confidence: 1.00)
Milk => Eggs (support: 0.60, confidence: 0.67)
Bread => Butter (support: 0.60, confidence: 0.67)
Butter => Bread (support: 0.60, confidence: 1.00)
Bread => Eggs (support: 0.60, confidence: 0.67)
Eggs => Bread (support: 0.60, confidence: 1.00)
Bread => Milk (support: 0.90, confidence: 1.00)
Milk => Bread (support: 0.90, confidence: 1.00)
Butter => Eggs (support: 0.60, confidence: 1.00)
Eggs => Butter (support: 0.60, confidence: 1.00)
Butter =>

In [98]:
# function to read form csv and convert to list of lists
def read_csv(file):
    # Read CSV file and replace NaN values with 'NA'
    df = pd.read_csv(file).fillna('NA')

    # Convert the DataFrame to a list of lists of items
    transactions = df.values.tolist()

    # Convert each transaction to a set of items and remove 'NA' from the set
    for i in range(len(transactions)):
        transactions[i] = set([item for item in transactions[i] if item != 'NA'])

    transactions

    # convert each row to list of items from set
    transactions = [list(t) for t in transactions]
    return transactions
transactions = read_csv('transactions.csv')[:100]
transactions

[['eggs', 'burgers', 'meatballs'],
 ['chutney'],
 ['avocado', 'turkey'],
 ['whole wheat rice', 'energy bar', 'milk', 'green tea', 'mineral water'],
 ['low fat yogurt'],
 ['whole wheat pasta', 'french fries'],
 ['light cream', 'soup', 'shallot'],
 ['spaghetti', 'frozen vegetables', 'green tea'],
 ['french fries'],
 ['eggs', 'pet food'],
 ['cookies'],
 ['cooking oil', 'burgers', 'eggs', 'mineral water', 'turkey'],
 ['champagne', 'spaghetti', 'cookies'],
 ['mineral water', 'salmon'],
 ['mineral water'],
 ['cooking oil',
  'honey',
  'chicken',
  'shrimp',
  'low fat yogurt',
  'oil',
  'chocolate'],
 ['eggs', 'turkey'],
 ['extra dark chocolate',
  'chicken',
  'eggs',
  'tomatoes',
  'fresh tuna',
  'salmon',
  'black tea',
  'mineral water',
  'spaghetti',
  'turkey'],
 ['honey', 'milk', 'protein bar', 'french fries', 'meatballs'],
 ['eggs', 'shrimp', 'pepper', 'shampoo', 'red wine', 'chocolate', 'pasta'],
 ['rice', 'sparkling water'],
 ['ham', 'body spray', 'green tea', 'pancakes', 'min

In [99]:
# Parameters
min_support = 0.3
min_confidence = 0.5

# Find frequent itemsets
frequent_itemsets = find_frequent_itemsets(transactions, min_support)

# Generate association rules
association_rules = generate_rules(frequent_itemsets, min_confidence)

frequent_itemsets

{}