In [None]:
# lp15 : Apriori
# Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Step 1: Data Preprocessing
# Load the dataset
data = pd.read_csv('./Datasets/Order1.csv')

# Preview the data to understand its structure
print("Dataset Preview:")
print(data.head())

# We assume the dataset contains 'Member_number', 'Date', and 'itemDescription'
# We will group the data by 'Member_number' to represent each transaction
# Each 'Member_number' will have a list of items they purchased.

# Step 2: Generate the List of Transactions
# Group by 'Member_number' and aggregate the 'itemDescription' into a list of items for each transaction
transactions = data.groupby('Member_number')['itemDescription'].apply(list).values.tolist()

# Step 3: Prepare the Transactions for Apriori
# Apply the TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)

# Convert the list into a DataFrame for easier manipulation
df = pd.DataFrame(te_ary, columns=te.columns_)

# Step 4: Train Apriori on the Dataset
# Apply the Apriori algorithm to find frequent itemsets
# An item should atleast come 5 times -> min_support
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)  # Adjust the min_support threshold as needed
print("\nFrequent Itemsets : \n",frequent_itemsets)

# Step 5: Generate Association Rules
# Generate the association rules from the frequent itemsets
# Remove num_itemsets=None if error comes here , it is due to version of mlxtend
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4, num_itemsets=None)
print("\nRules : \n")
rules


Dataset Preview:
   Member_number        Date   itemDescription
0           1808  21-07-2015    tropical fruit
1           2552  05-01-2015        whole milk
2           2300  19-09-2015         pip fruit
3           1187  12-12-2015  other vegetables
4           3037  01-02-2015        whole milk
      Instant food products  UHT-milk  abrasive cleaner  artif. sweetener  \
0                     False     False             False             False   
1                     False     False             False             False   
2                     False     False             False             False   
3                     False     False             False             False   
4                     False     False             False             False   
...                     ...       ...               ...               ...   
3893                  False     False             False             False   
3894                  False     False             False             False   
3895    

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(beef),(other vegetables),0.119548,0.376603,0.050795,0.424893,1.128223,1.0,0.005773,1.083966,0.129082,0.114055,0.077462,0.279885
1,(beef),(whole milk),0.119548,0.458184,0.064135,0.536481,1.170886,1.0,0.009360,1.168919,0.165762,0.124875,0.144508,0.338229
2,(bottled beer),(other vegetables),0.158799,0.376603,0.068497,0.431341,1.145345,1.0,0.008692,1.096257,0.150857,0.146703,0.087805,0.306610
3,(bottled beer),(whole milk),0.158799,0.458184,0.085428,0.537964,1.174124,1.0,0.012669,1.172672,0.176297,0.160714,0.147247,0.362207
4,(bottled water),(other vegetables),0.213699,0.376603,0.093894,0.439376,1.166680,1.0,0.013414,1.111969,0.181695,0.189147,0.100694,0.344347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,"(rolls/buns, soda)",(whole milk),0.119805,0.458184,0.065162,0.543897,1.187072,1.0,0.010269,1.187926,0.179041,0.127064,0.158197,0.343057
74,"(whole milk, soda)",(rolls/buns),0.151103,0.349666,0.065162,0.431239,1.233288,1.0,0.012326,1.143422,0.222829,0.149588,0.125432,0.308797
75,"(rolls/buns, yogurt)",(whole milk),0.111339,0.458184,0.065931,0.592166,1.292420,1.0,0.014917,1.328521,0.254605,0.130922,0.247283,0.368031
76,"(whole milk, yogurt)",(rolls/buns),0.150590,0.349666,0.065931,0.437819,1.252106,1.0,0.013275,1.156805,0.237041,0.151802,0.135550,0.313187
