In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
# show rows and columns
pd.set_option('display.max_rows', 8)
pd.set_option('display.max_columns', None)

In [None]:
df = pd.read_excel('../../sqllite/Online Retail.xlsx')

In [None]:
# Drop rows with missing values
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)

# Remove credit transactions (Invoice numbers starting with 'C')
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.startswith('C')]

# Remove leading/trailing whitespaces from the Description column
df['Description'] = df['Description'].str.strip()


In [None]:
df

In [None]:
# Pivot the dataset to convert it into a transaction format
basket = (df.groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

basket

In [None]:
# Convert quantities to binary values (1 if quantity > 0, 0 otherwise)
basket_sets = basket.applymap(lambda x: 1 if x > 0 else 0)  

# Find frequent itemsets with minimum support threshold
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)


In [None]:
basket_sets

In [None]:
# Generate association rules with minimum confidence threshold
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)


In [None]:

# Sort the rules by lift in descending order
rules.sort_values(by='lift', ascending=False, inplace=True)

In [None]:
# Display the top 10 association rules
print("Top 10 Association Rules:")
#print(rules.head(10))
rules

In [None]:
# # Find frequent itemsets with minimum support threshold of 0.01
# frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)

# # Store frequent itemsets in a DataFrame
# df_frequent_itemsets = pd.DataFrame(frequent_itemsets)

# # Print frequent itemsets DataFrame
# print("Frequent Itemsets:")
# print(df_frequent_itemsets)

# # Generate association rules with minimum confidence threshold of 0.5
# rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

# # Sort the rules by lift in descending order
# rules.sort_values(by='lift', ascending=False, inplace=True)

# # Store association rules in a DataFrame
# df_association_rules = pd.DataFrame(rules)

# # Display the top 10 association rules DataFrame
# print("\nTop 10 Association Rules:")
# #print(df_association_rules.head(10))
# df_association_rules
