**Utility functions**

In [25]:
def do_while(check_condition, action):
  action()
  while check_condition():
    action()

  and should_run_async(code)


# Data Selection

In [26]:
datasets = {
  '1': 'Amazon',
  '2': 'Best Buy',
  '3': 'K-Mart',
  '4': 'Nike',
  '5': 'Custom',
  '6': 'Generic',
}


dataset = 0
attempted = 0

def read_dataset_input():
  global attempted, dataset
  print("Invalid selection. Try again. \n") if attempted > 0 else print("Select a dataset: \n")
  attempted += 1
  dataset = input(" 1. Amazon \n 2. Best Buy \n 3. K-mart \n 4. Nike \n 5. Custom \n 6. Generic \n")
  if dataset in datasets.keys(): print("You selected: ", datasets[dataset])

input_read_condition = lambda: dataset not in datasets.keys()
do_while(input_read_condition, read_dataset_input)


  and should_run_async(code)


Select a dataset: 

 1. Amazon 
 2. Best Buy 
 3. K-mart 
 4. Nike 
 5. Custom 
 6. Generic 
5
You selected:  Custom


# Defining support and confidence

In [27]:
support = None
confidence = None

def read_threshold():
  global support, confidence
  support = input("Enter support threshold: ")
  confidence = input("Enter confidence threshold: ")

def threshold_read_condition():
  global support, confidence
  try:
    support = float(support)
    confidence = float(confidence)
  except ValueError:
    print("Invalid input type. Try again.")
    return True
  if 0 <= support <= 1 and 0 <= confidence <= 1:
    return False
  print("Invalid input range. Try again.")
  return True
do_while(threshold_read_condition, read_threshold)

print("Generating association rules for ", datasets[dataset], " dataset with support: ", support, " and confidence: ", confidence)

  and should_run_async(code)


Enter support threshold: 0.5
Enter confidence threshold: 0.5
Generating association rules for  Custom  dataset with support:  0.5  and confidence:  0.5


# Reading Transactions

In [29]:
import pandas as pd

raw_dataset = pd.read_csv('./datasets/' + datasets[dataset] + '.csv', usecols=[1])
transactions_list = [transaction[0].split(',') for transaction in raw_dataset.values.tolist()]
transactions_list = [[item.strip() for item in transaction] for transaction in transactions_list]

print(raw_dataset)

                Transactions
0     ink, pen, cheese, bag 
1  milk, pen, juice, cheese 
2               milk, juice 
3       juice, milk, cheese 
4     ink, pen, cheese, bag 
5  milk, pen, juice, cheese 


  and should_run_async(code)


# Arpiori Using Package

In [30]:
import time

  and should_run_async(code)


In [31]:
pip install mlxtend

  and should_run_async(code)




In [32]:
from mlxtend.frequent_patterns import apriori, association_rules

start_time = time.time()

all_items = set(item for sublist in transactions_list for item in sublist)

df = pd.DataFrame([{item: (item in transaction) for item in all_items} for transaction in transactions_list])

frequent_itemsets = apriori(df, min_support=support, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence)

rules_sorted = rules.sort_values(by='lift', ascending=False)

rules_filtered = rules[['antecedents', 'consequents', 'support', 'confidence']]
rules_filtered.columns = ['Antecedents', 'Consequents', 'Support', 'Confidence']

print(rules_filtered)

end_time = time.time()
print("Time taken: ", end_time - start_time, " seconds")

        Antecedents      Consequents   Support  Confidence
0             (pen)         (cheese)  0.666667        1.00
1          (cheese)            (pen)  0.666667        0.80
2           (juice)           (milk)  0.666667        1.00
3            (milk)          (juice)  0.666667        1.00
4           (juice)         (cheese)  0.500000        0.75
5          (cheese)          (juice)  0.500000        0.60
6            (milk)         (cheese)  0.500000        0.75
7          (cheese)           (milk)  0.500000        0.60
8     (juice, milk)         (cheese)  0.500000        0.75
9   (juice, cheese)           (milk)  0.500000        1.00
10   (milk, cheese)          (juice)  0.500000        1.00
11          (juice)   (milk, cheese)  0.500000        0.75
12           (milk)  (juice, cheese)  0.500000        0.75
13         (cheese)    (juice, milk)  0.500000        0.60
Time taken:  0.02274775505065918  seconds


  and should_run_async(code)


# FP-growth Using Package

In [33]:
from mlxtend.frequent_patterns import fpgrowth, association_rules

start_time = time.time()
frequent_itemsets = fpgrowth(df, min_support=support, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence)

rules_filtered = rules[['antecedents', 'consequents', 'support', 'confidence']]
rules_filtered.columns = ['Antecedents', 'Consequents', 'Support', 'Confidence']

print(rules_filtered)

end_time = time.time()
print("Time taken: ", end_time - start_time, " seconds")

        Antecedents      Consequents   Support  Confidence
0             (pen)         (cheese)  0.666667        1.00
1          (cheese)            (pen)  0.666667        0.80
2            (milk)         (cheese)  0.500000        0.75
3          (cheese)           (milk)  0.500000        0.60
4           (juice)           (milk)  0.666667        1.00
5            (milk)          (juice)  0.666667        1.00
6           (juice)         (cheese)  0.500000        0.75
7          (cheese)          (juice)  0.500000        0.60
8     (juice, milk)         (cheese)  0.500000        0.75
9   (juice, cheese)           (milk)  0.500000        1.00
10   (milk, cheese)          (juice)  0.500000        1.00
11          (juice)   (milk, cheese)  0.500000        0.75
12           (milk)  (juice, cheese)  0.500000        0.75
13         (cheese)    (juice, milk)  0.500000        0.60
Time taken:  0.01715230941772461  seconds


  and should_run_async(code)


# Implementing Apiori

**Generating frequent items**

In [34]:
from itertools import combinations

min_sup = support * len(transactions_list)
freq_itemset_support = {}


def count_item_freq(itemsets):
  itemset_support = {}
  for transaction in transactions_list:
    for itemset in itemsets:
      for item in itemset:
        if item not in transaction:
          break
      else:
        itemset_support[itemset] = itemset_support.get(itemset, 0) + 1
  return itemset_support


def prune_items(last_freq_itemset):
  return {itemset:(sup/len(transactions_list)) for itemset,sup in last_freq_itemset.items() if sup >= min_sup}

def make_n_itemset(n_itemset):
  n = len(n_itemset[0])
  return list(combinations(list(set(item for s in n_itemset for item in s)), n + 1))

start_time = time.time()

new_item_set_list = list(set((item,) for transaction in transactions_list for item in transaction))

while new_item_set_list:
  itemset_support = count_item_freq(new_item_set_list)
  freq_itemsets = prune_items(itemset_support)
  freq_itemset_support.update(freq_itemsets)
  if len(freq_itemsets) == 0:
    break
  new_item_set_list = make_n_itemset(list(freq_itemsets.keys()))

for itemset, sup in freq_itemset_support.items():
  print(itemset, sup)

('pen',) 0.6666666666666666
('cheese',) 0.8333333333333334
('juice',) 0.6666666666666666
('milk',) 0.6666666666666666
('pen', 'cheese') 0.6666666666666666
('milk', 'juice') 0.6666666666666666
('milk', 'cheese') 0.5
('juice', 'cheese') 0.5
('milk', 'juice', 'cheese') 0.5


  and should_run_async(code)


**Mining Association Rules**

In [35]:
index = 1
for itemset, sup in freq_itemset_support.items():
  if len(itemset) < 2:
    continue
  for i in range(1, len(itemset)):
    for antecedent in combinations(itemset, i):
      consequent = tuple(set(itemset) - set(antecedent))
      conf = freq_itemset_support[itemset] / freq_itemset_support[antecedent]
      if conf >= confidence:
        print("Rule ", index, ": ", antecedent, "->", consequent)
        print("Confidence: ", conf*100, "%")
        print("Support: ", freq_itemset_support[itemset]*100, "%")
        print("\n")
        index += 1

end_time = time.time()
print("Time taken: ", end_time - start_time, " seconds")

Rule  1 :  ('pen',) -> ('cheese',)
Confidence:  100.0 %
Support:  66.66666666666666 %


Rule  2 :  ('cheese',) -> ('pen',)
Confidence:  80.0 %
Support:  66.66666666666666 %


Rule  3 :  ('milk',) -> ('juice',)
Confidence:  100.0 %
Support:  66.66666666666666 %


Rule  4 :  ('juice',) -> ('milk',)
Confidence:  100.0 %
Support:  66.66666666666666 %


Rule  5 :  ('milk',) -> ('cheese',)
Confidence:  75.0 %
Support:  50.0 %


Rule  6 :  ('cheese',) -> ('milk',)
Confidence:  60.0 %
Support:  50.0 %


Rule  7 :  ('juice',) -> ('cheese',)
Confidence:  75.0 %
Support:  50.0 %


Rule  8 :  ('cheese',) -> ('juice',)
Confidence:  60.0 %
Support:  50.0 %


Rule  9 :  ('milk',) -> ('juice', 'cheese')
Confidence:  75.0 %
Support:  50.0 %


Rule  10 :  ('juice',) -> ('milk', 'cheese')
Confidence:  75.0 %
Support:  50.0 %


Rule  11 :  ('cheese',) -> ('juice', 'milk')
Confidence:  60.0 %
Support:  50.0 %


Rule  12 :  ('milk', 'juice') -> ('cheese',)
Confidence:  75.0 %
Support:  50.0 %


Rule  13 :  

  and should_run_async(code)
