In [11]:
from typing import List, Tuple
from collections import defaultdict


class Itemset:
    def __init__(self, items: List[str], support: int):
        self.items = items
        self.support = support


def prefixspan(db: List[List[str]], minsup: int) -> List[Itemset]:
    freq_itemsets = []
    support_counts = defaultdict(int)
    for transaction in db:
        for item in set(transaction):
            support_counts[item] += 1
    freq_items = set(item for item, count in support_counts.items() if count >= minsup)
    for item in freq_items:
        freq_itemsets.append(Itemset([item], support_counts[item]))
    while freq_items:
        new_freq_items = set()
        new_db = []
        for transaction in db:
            if not set(freq_items).intersection(set(transaction)):
                continue
            new_transaction = []
            for item in transaction:
                if item in freq_items:
                    new_transaction.append(item)
            new_db.append(new_transaction)
            for i, item in enumerate(new_transaction):
                if i == 0:
                    prefix = []
                else:
                    prefix = new_transaction[:i]
                suffix = new_transaction[i+1:]
                new_itemset = prefix + suffix
                if new_itemset:
                    support_counts[str(new_itemset)] += 1
                    if support_counts[str(new_itemset)] == minsup:
                        new_freq_items.add(str(new_itemset))
        freq_items = new_freq_items
        for itemset in freq_items:
            freq_itemsets.append(Itemset(eval(itemset), support_counts[itemset]))
    return freq_itemsets
# Load the dataset from a CSV file
import pandas as pd
df = pd.read_csv('groceries.csv')
transactions = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list).tolist()

# Generate the frequent itemsets with a minimum support of 100
freq_itemsets = prefixspan(transactions, 100)



##Code adapted from 


['frozen vegetables'] 301
['brown bread'] 411
['UHT-milk'] 209
['napkins'] 248
['pip fruit'] 507
['canned beer'] 480
['processed cheese'] 114
['root vegetables'] 717
['soda'] 1030
['yogurt'] 894
['dessert'] 244
['domestic eggs'] 394
['chocolate'] 252
['tropical fruit'] 673
['rolls/buns'] 1161
['shopping bags'] 491
['herbs'] 112
['long life bakery product'] 177
['beverages'] 172
['ham'] 187
['salty snack'] 185
['pork'] 380
['berries'] 231
['frozen meals'] 185
['soft cheese'] 104
['red/blush wine'] 115
['hard cheese'] 148
['specialty bar'] 149
['newspapers'] 405
['citrus fruit'] 553
['cream cheese '] 254
['cat food'] 124
['oil'] 153
['sausage'] 629
['chewing gum'] 121
['ice cream'] 157
['coffee'] 356
['other vegetables'] 1290
['candy'] 142
['sugar'] 184
['sliced cheese'] 143
['frankfurter'] 401
['onions'] 218
['margarine'] 348
['white bread'] 247
['whipped/sour cream'] 465
['meat'] 184
['butter'] 377
['whole milk'] 1641
['butter milk'] 176
['bottled water'] 655
['hygiene articles'] 147
[

In [12]:


# Define a function to generate recommendations based on the frequent itemsets
def generate_recommendations(purchased_items, freq_itemsets, num_recommendations=5):
    # Create a dictionary to store the support counts for each item in the frequent itemsets
    support_counts = defaultdict(int)
    # Increment the support counts for each item that appears in the frequent itemsets
    for itemset in freq_itemsets:
        for item in itemset.items:
            if item not in purchased_items:
                support_counts[item] += itemset.support
    # Sort the items by their support counts in descending order
    sorted_items = sorted(support_counts.items(), key=lambda x: x[1], reverse=True)
    # Return the top N items as recommendations
    return [item for item, count in sorted_items[:num_recommendations]]

# Test the recommendation system
purchased_items = ['candy']
recommendations = generate_recommendations(purchased_items, freq_itemsets)
print(f"Recommendations for {purchased_items}: {recommendations}")


[<__main__.Itemset object at 0x000001F2AAB87250>, <__main__.Itemset object at 0x000001F2AAB872B0>, <__main__.Itemset object at 0x000001F2BBD9E580>, <__main__.Itemset object at 0x000001F2BBD9E5E0>, <__main__.Itemset object at 0x000001F2BBD9E250>, <__main__.Itemset object at 0x000001F2BBD9E400>, <__main__.Itemset object at 0x000001F2BBD9E460>, <__main__.Itemset object at 0x000001F2BBD9E2B0>, <__main__.Itemset object at 0x000001F2BBD9E1F0>, <__main__.Itemset object at 0x000001F2BBD9E670>, <__main__.Itemset object at 0x000001F2BC1FC550>, <__main__.Itemset object at 0x000001F2BC1FC5E0>, <__main__.Itemset object at 0x000001F2BC1FC760>, <__main__.Itemset object at 0x000001F2BC1FC730>, <__main__.Itemset object at 0x000001F2BC1FC4F0>, <__main__.Itemset object at 0x000001F2BC1FC7C0>, <__main__.Itemset object at 0x000001F2BC1FC820>, <__main__.Itemset object at 0x000001F2BC1FC880>, <__main__.Itemset object at 0x000001F2BC1FC8E0>, <__main__.Itemset object at 0x000001F2BC1FC940>, <__main__.Itemset o