In [17]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load the dataset
df = pd.read_csv('GroceriesData.csv')  

# Assuming 'itemDescription' is the column containing item names
# Drop rows with missing itemDescription values
df_cleaned = df.dropna(subset=['itemDescription'])

# Convert the data to the required format for Apriori algorithm
transactions = df_cleaned.groupby('Member_number')['itemDescription'].apply(list).values.tolist()

# Convert the transaction data to a binary matrix
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
transactions_df = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm
frequent_itemsets = apriori(transactions_df, min_support=0.05, use_colnames=True)

# Generate association rules with a lower confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)

# Display results
print("Frequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules)


Frequent Itemsets:
      support                                        itemsets
0    0.078502                                      (UHT-milk)
1    0.119548                                          (beef)
2    0.079785                                       (berries)
3    0.062083                                     (beverages)
4    0.158799                                  (bottled beer)
..        ...                                             ...
160  0.050539  (tropical fruit, whole milk, other vegetables)
161  0.071832          (whole milk, yogurt, other vegetables)
162  0.065162                  (whole milk, rolls/buns, soda)
163  0.065931                (whole milk, yogurt, rolls/buns)
164  0.054387                      (whole milk, yogurt, soda)

[165 rows x 2 columns]

Association Rules:
              antecedents               consequents  antecedent support  \
0                  (beef)        (other vegetables)            0.119548   
1                  (beef)              (who