In [8]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Sample data
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

# Transform data into suitable format
# print(dataset)
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

# print(df)
# Apply Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print(frequent_itemsets)
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)

print(rules)

    support                     itemsets
0       0.8                       (Eggs)
1       1.0               (Kidney Beans)
2       0.6                       (Milk)
3       0.6                      (Onion)
4       0.6                     (Yogurt)
5       0.8         (Kidney Beans, Eggs)
6       0.6                (Onion, Eggs)
7       0.6         (Milk, Kidney Beans)
8       0.6        (Kidney Beans, Onion)
9       0.6       (Kidney Beans, Yogurt)
10      0.6  (Kidney Beans, Onion, Eggs)
              antecedents            consequents  antecedent support  \
0          (Kidney Beans)                 (Eggs)                 1.0   
1                  (Eggs)         (Kidney Beans)                 0.8   
2                 (Onion)                 (Eggs)                 0.6   
3                  (Eggs)                (Onion)                 0.8   
4                  (Milk)         (Kidney Beans)                 0.6   
5          (Kidney Beans)                 (Milk)                 1.0   
6   

In [3]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# 1. support(A⇒ B) =P(A ∪ B)

# Support (Books) = Freq (Books)/Total transactions made

# Support (Books) = 6/100 = 0.06%

# 2. Confidence: It is the ratio of combined transactions to individual transactions.

# confidence(A⇒ B) =P(B|A)

# Confidence (Books) = Combined transactions/Individual transaction

# Confidence (Books) = 0.06/0.08 = 0.75

# 3. Lift: It is the ratio of the confidence percent to the support percent.

# Lift = 0.75/0.10 = 7.5

# If the value of lift < 1, the combination is not bought by consumers frequently.
# If the value of lift >1, the combination is brought frequently by the consumers.
# If the value of lift = 1, then the purchase of antecedent makes no difference on the consequent.

# Sample data
df1 = pd.read_csv('data.csv')


# Get data from Invoice ID with Barcode
df1 = df1[['InvoiceID','Category']]

df1 = df1[df1.groupby('InvoiceID')['Category'].transform('size') > 1]
dataset_str = df1
dataset_str = df1.groupby('InvoiceID').agg({
    'Category': lambda x: ','.join(str(i) for i in x)
}).reset_index()

#dataset_str.to_csv("output.csv")

In [4]:
# Transform the data into a list of lists (transactions) with TransactionEncoder
# create a TransactionEncoder object
data = dataset_str[['Category']]

data = data.values.tolist()

data = [x[0].split(',') for x in data]

te = TransactionEncoder()

# transform the data into a list of lists
transactions = te.fit(data).transform(data)
transactions = pd.DataFrame(transactions, columns=te.columns_)

transactions

Unnamed: 0,Accessories,Add-on item Cat,Babycare,Batteries,Biscuits,Bottled Water,Bread Offsite,Candy Category,Carbonated Drinks - CSD,Cheese,...,Steamed Bun,Supplement,Tea,Tofu,Toys,Vegetables Cat,Vegetarian/Organic food,Whole Milk,Wine,Yogurt
0,False,False,False,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38616,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
38617,False,False,False,False,False,False,True,False,True,False,...,False,False,False,False,False,False,False,False,False,False
38618,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
38619,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [6]:
# Calculate frequent itemsets using Apriori algorithm

frequent_itemsets = apriori(transactions, min_support=0.001, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.007276,(Add-on item Cat)
1,0.003055,(Batteries)
2,0.030760,(Biscuits)
3,0.159654,(Bottled Water)
4,0.055462,(Bread Offsite)
...,...,...
826,0.002020,"(Rice Offsite, Tea, Other Cuisine)"
827,0.001295,"(Potato Chips, Tea, Rice Offsite)"
828,0.001217,"(Potato Chips, Tea, Snacks for Drinks)"
829,0.001010,"(Rice Offsite, Tea, Snacks for Drinks)"


In [7]:
# Generate association rules from the frequent itemsets

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Add-on item Cat),(Instant Noodles),0.007276,0.156210,0.004505,0.619217,3.963995,0.003369,2.215934
1,(Instant Noodles),(Add-on item Cat),0.156210,0.007276,0.004505,0.028841,3.963995,0.003369,1.022206
2,(Add-on item Cat),(Other Cuisine),0.007276,0.091116,0.001528,0.209964,2.304358,0.000865,1.150434
3,(Other Cuisine),(Add-on item Cat),0.091116,0.007276,0.001528,0.016766,2.304358,0.000865,1.009652
4,(Biscuits),(Bottled Water),0.030760,0.159654,0.003651,0.118687,0.743400,-0.001260,0.953516
...,...,...,...,...,...,...,...,...,...
2117,"(Rice Offsite, Yogurt)",(Tea),0.011626,0.166127,0.001062,0.091314,0.549663,-0.000870,0.917669
2118,"(Tea, Yogurt)",(Rice Offsite),0.006525,0.162036,0.001062,0.162698,1.004087,0.000004,1.000791
2119,(Rice Offsite),"(Tea, Yogurt)",0.162036,0.006525,0.001062,0.006552,1.004087,0.000004,1.000027
2120,(Tea),"(Rice Offsite, Yogurt)",0.166127,0.011626,0.001062,0.006390,0.549663,-0.000870,0.994731


In [8]:
# Display the association rules with support, lift, and confidence

print(rules[['antecedents', 'consequents', 'support', 'lift', 'confidence']])


                  antecedents              consequents   support      lift  \
0           (Add-on item Cat)        (Instant Noodles)  0.004505  3.963995   
1           (Instant Noodles)        (Add-on item Cat)  0.004505  3.963995   
2           (Add-on item Cat)          (Other Cuisine)  0.001528  2.304358   
3             (Other Cuisine)        (Add-on item Cat)  0.001528  2.304358   
4                  (Biscuits)          (Bottled Water)  0.003651  0.743400   
...                       ...                      ...       ...       ...   
2117  (Rice  Offsite, Yogurt)                    (Tea)  0.001062  0.549663   
2118            (Tea, Yogurt)          (Rice  Offsite)  0.001062  1.004087   
2119          (Rice  Offsite)            (Tea, Yogurt)  0.001062  1.004087   
2120                    (Tea)  (Rice  Offsite, Yogurt)  0.001062  0.549663   
2121                 (Yogurt)     (Rice  Offsite, Tea)  0.001062  0.552562   

      confidence  
0       0.619217  
1       0.028841  
2     