In [52]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load the data
df = pd.read_csv('sampleData.csv')

# Preprocess data
baskets = df.groupby('userid')['productname'].apply(list).tolist()
te = TransactionEncoder()
te_ary = te.fit(baskets).transform(baskets)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.05, use_colnames=True)

# Generate rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)

# Filter rules to focus on high lift and decent confidence, considering practical significance through support
filtered_rules = rules[(rules['lift'] >= 3) & (rules['confidence'] >= 0.5) & (rules['support'] >= 0.05)]

# Top 20 filtered rules based on lift
top_rules = filtered_rules.nlargest(20, 'lift')

# Display the top 20 frequent itemsets with at least 2 items
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_pairs = frequent_itemsets[frequent_itemsets['length'] >= 2]
top_frequent_pairs = frequent_pairs.nlargest(20, 'support')

print("Top 20 Frequent Itemsets (Pairs or More):")
print(top_frequent_pairs)

print("\nTop Association Rules - Confidence vs Lift:")
print(top_rules)


Top 20 Frequent Itemsets (Pairs or More):
     support                                 itemsets  length
33      0.35                  (Chicken Breast, Bread)       2
48      0.35          (Chicken Breast, Salmon Fillet)       2
38      0.30               (Broccoli, Chicken Breast)       2
42      0.30                 (Carrot, Chicken Breast)       2
21      0.25                  (Apple, Chicken Breast)       2
41      0.25                (Broccoli, Salmon Fillet)       2
47      0.25                   (Milk, Chicken Breast)       2
12      0.20                         (Bread, Almonds)       2
14      0.20                (Chicken Breast, Almonds)       2
36      0.20                   (Bread, Salmon Fillet)       2
45      0.20                  (Carrot, Salmon Fillet)       2
112     0.20  (Carrot, Chicken Breast, Salmon Fillet)       3
18      0.15                           (Apple, Bread)       2
24      0.15                          (Banana, Bread)       2
27      0.15                

In [56]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

def find_associated_items(data, product, top_n=5):
    # Preprocess data
    baskets = data.groupby('userid')['productname'].apply(list).tolist()
    te = TransactionEncoder()
    te_ary = te.fit(baskets).transform(baskets)
    df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

    # Find frequent itemsets
    frequent_itemsets = apriori(df_encoded, min_support=0.05, use_colnames=True)

    # Filter itemsets to include the specified product
    filtered_itemsets = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: product in x)]
    
    # Sort itemsets by support and return the top N
    top_itemsets = filtered_itemsets.nlargest(top_n, 'support')
    
    return top_itemsets['itemsets']

# Load the data
df = pd.read_csv('sampleData.csv')

# Example usage: find top 5 itemsets associated with 'Chicken'
product = 'Chicken Breast'
associated_items = find_associated_items(df, product, top_n=6)
print(f"Top 5 itemsets associated with {product}:")
print(associated_items)


Top 5 itemsets associated with Chicken Breast:
6                    (Chicken Breast)
33            (Chicken Breast, Bread)
48    (Chicken Breast, Salmon Fillet)
38         (Broccoli, Chicken Breast)
42           (Carrot, Chicken Breast)
21            (Apple, Chicken Breast)
Name: itemsets, dtype: object
