In [13]:
import pandas as pd
from itertools import combinations

class EClAT:
    def __init__(self, sheet_path, min_support, min_confidence):
        self.sheet_path = sheet_path
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.excel_data = None
        self.vertical_data = None
        self.allfreq_itemsets = {}
        self.number_of_transactions = None

    def generate_level2_or_more_frequent_items(self):

        current_length = 2
        while True:
            level_check_itemsets = {}
            items = list(self.allfreq_itemsets.keys())
            for i in range(len(items)):
                for j in range(i + 1, len(items)):
                    potential_item = sorted(set(items[i]).union(set(items[j])))
                    if len(potential_item) == current_length:
                        transaction_id = set(self.allfreq_itemsets[items[i]]).intersection(self.allfreq_itemsets[items[j]])
                        if len(transaction_id) >= self.min_support:
                          if(tuple(potential_item) not in level_check_itemsets):
                              level_check_itemsets[tuple(potential_item)] = transaction_id
                              print(f" {str(potential_item)} : {transaction_id}")
            if not level_check_itemsets:
                break
            self.allfreq_itemsets.update(level_check_itemsets)
            current_length += 1


    def read_data_from_excel(self):
        data = pd.read_excel(self.sheet_path)
        self.number_of_transactions = data.shape[0]
        excel_data = []
        print("\ndisplay original data")
        for index, row in data.iterrows():
            print(f"transactions_id : {row['TiD']}  row :{row['items']}")
            excel_data.append(row['items'].split(','))
        self.excel_data=excel_data


    def generate_level1_frequent_items(self):
      print("\n frequent itemsets:")
      for item, transactions_ids in self.vertical_data.items():
            if len(transactions_ids) >= self.min_support:
                 self.allfreq_itemsets[(item,)] = transactions_ids
                 print(f" {item} : {transactions_ids}")

    def calculate_support(self, transaction_id, items):
        item_transaction_id = self.vertical_data[items[0]]
        for item in items[1:]:
            item_transaction_id = item_transaction_id.intersection(self.vertical_data[item])
        return len(item_transaction_id)

    def change_to_vertical(self):
        vertical_data = {}
        transaction_id = 1
        for items in self.excel_data:
            for item in items:
                if item not in vertical_data:
                    vertical_data[item] = set()
                vertical_data[item].add(transaction_id)
            transaction_id += 1
        print("\n display vertical data")
        for item, transactions_ids in vertical_data.items():
            print(f"{item}: {transactions_ids}")
        self.vertical_data=vertical_data

    def generate_all_association_rules(self):
        print(" \n display association rules")

        for itemset, transactions_id in self.allfreq_itemsets.items():
            for i in range(1, len(itemset)):
                for before_arrow in combinations(itemset, i):
                    before_arrow = set(before_arrow)
                    after_arrow = set(itemset) - before_arrow

                    if len(after_arrow) !=0:
                        support_itemset = len(transactions_id)
                        support_before_arrow = self.calculate_support(transactions_id, list(before_arrow))
                        support_after_arrow = self.calculate_support(transactions_id, list(after_arrow))
                        probability_before_arrow = support_before_arrow / self.number_of_transactions
                        probability_after_arrow = support_after_arrow / self.number_of_transactions


                        if support_before_arrow != 0:
                            confidence = support_itemset / support_before_arrow
                        else:
                            confidence = 0


                        lift = (support_itemset / self.number_of_transactions) / (probability_before_arrow * probability_after_arrow)

                        if confidence >= self.min_confidence:
                            rule="strong rule : "
                        else :
                            rule="weak rule : "
                        rule=rule+f"{before_arrow} --> {after_arrow} : confidence: {confidence:.4f} , lift: {lift:.4f}"
                        if(lift>1):
                          rule=rule+" , positive Correlation"
                        elif(lift<1):
                            rule=rule+" , negative Correlation"
                        else :
                            rule =rule + " , independent"
                        print(rule)



In [14]:
import math
sheet_path = '/content/Horizontal_Format (1) (1).xlsx'
min_support = input("Enter the minimum support : ")
min_confidence = input("Enter the minimum confidence : ")
eclat = EClAT(sheet_path, math.ceil(float(min_support)), float(min_confidence))
eclat.read_data_from_excel()
eclat.change_to_vertical()
eclat.generate_level1_frequent_items()
eclat.generate_level2_or_more_frequent_items()
eclat.generate_all_association_rules()

Enter the minimum support : 3
Enter the minimum confidence : 0.8

display original data
transactions_id : 1  row :Mz,O,N,K,E,Y
transactions_id : 2  row :D,O,N,K,E,Y
transactions_id : 3  row :Mz,A,K,E
transactions_id : 4  row :Mz,U,C,K,Y
transactions_id : 5  row :C,O,O,K,I,E

 display vertical data
Mz: {1, 3, 4}
O: {1, 2, 5}
N: {1, 2}
K: {1, 2, 3, 4, 5}
E: {1, 2, 3, 5}
Y: {1, 2, 4}
D: {2}
A: {3}
U: {4}
C: {4, 5}
I: {5}

 frequent itemsets:
 Mz : {1, 3, 4}
 O : {1, 2, 5}
 K : {1, 2, 3, 4, 5}
 E : {1, 2, 3, 5}
 Y : {1, 2, 4}
 ['K', 'Mz'] : {1, 3, 4}
 ['K', 'O'] : {1, 2, 5}
 ['E', 'O'] : {1, 2, 5}
 ['E', 'K'] : {1, 2, 3, 5}
 ['K', 'Y'] : {1, 2, 4}
 ['E', 'K', 'O'] : {1, 2, 5}
 
 display association rules
weak rule : {'K'} --> {'Mz'} : confidence: 0.6000 , lift: 1.0000 , independent
strong rule : {'Mz'} --> {'K'} : confidence: 1.0000 , lift: 1.0000 , independent
weak rule : {'K'} --> {'O'} : confidence: 0.6000 , lift: 1.0000 , independent
strong rule : {'O'} --> {'K'} : confidence: 1.0000 ,

# New Section