In [31]:
# Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import matplotlib.pyplot as plt
import seaborn as sns


In [32]:
df = pd.read_csv('data/Order1.csv')

In [33]:
df.dropna(inplace=True)

In [47]:
print(df.head())

# Step 2: Generate the list of transactions
# Create a list of transactions by grouping the data by 'Member_number'
# Assuming 'itemDescription' is the column representing the items purchased in the order
transactions = df.groupby('Member_number')['itemDescription'].apply(list).values.tolist()

# Step 3: Convert the list of transactions into a one-hot encoded format
# This is necessary for the apriori algorithm to work
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_transactions = pd.DataFrame(te_ary, columns=te.columns_)

# Step 4: Train Apriori on the dataset
# Apply the Apriori algorithm to find frequent itemsets with a minimum support of 1%
frequent_itemsets = apriori(df_transactions, min_support=0.01, use_colnames=True)

# Display the frequent itemsets
print("Frequent Itemsets:")
print(frequent_itemsets)

# Step 5: Generate Association Rules
# Generate association rules using the frequent itemsets with a minimum lift of 1.0
# Ensure frequent_itemsets is not empty and contains valid data
if not frequent_itemsets.empty:
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
    print("Association Rules:")
    print(rules)
else:
    print("No frequent itemsets found with the given support threshold.")
# Step 6: Visualize the results if association rules exist
if not frequent_itemsets.empty:
    # Plot the support of the association rules
    plt.figure(figsize=(10,6))
    sns.barplot(x='support', y='antecedents', data=rules)
    plt.title('Association Rules Support')
    plt.show()

    # Optionally, you can also plot other metrics like confidence or lift
    # Plot lift
    plt.figure(figsize=(10,6))
    sns.barplot(x='lift', y='antecedents', data=rules)
    plt.title('Association Rules Lift')
    plt.show()    

   Member_number       Date   itemDescription
0           1808 2015-07-21    tropical fruit
1           2552 2015-01-05        whole milk
2           2300 2015-09-19         pip fruit
3           1187 2015-12-12  other vegetables
4           3037 2015-02-01        whole milk
Frequent Itemsets:
       support                                           itemsets
0     0.015393                            (Instant food products)
1     0.078502                                         (UHT-milk)
2     0.031042                                    (baking powder)
3     0.119548                                             (beef)
4     0.079785                                          (berries)
...        ...                                                ...
3011  0.011031     (whipped/sour cream, soda, whole milk, yogurt)
3012  0.010518  (bottled water, rolls/buns, whole milk, yogurt...
3013  0.013597  (rolls/buns, whole milk, yogurt, other vegetab...
3014  0.010005  (rolls/buns, whole milk, yogu

TypeError: association_rules() missing 1 required positional argument: 'num_itemsets'