<img src="../../figures/logo-esi-sba.png" width="700" height="126" align="center" alt="Logo">

# Book recommendation system
*Prepared by* 
- Benghenima Hafsa (h.benghenima@esi-sba.dz) 
- Ghandouz Amina (a.ghandouz@esi-sba.dz) 
- Benahmed Firdaws (f.benahmed@esi-sba.dz) 

## Notebook5 : Eclat Algorithm


### Packages

In [2]:
from collections import defaultdict
from itertools import combinations
from tqdm import tqdm
import pandas as pd  

### Load Data

In [3]:
transaction_data =  pd.read_csv("../../data/cleaned_df.csv")
print(transaction_data)

        User-ID        ISBN  Book-Rating  \
0        276729  0521795028            6   
1        276744  038550120X            7   
2        276747  0060517794            9   
3        276747  0671537458            9   
4        276747  0679776818            8   
...         ...         ...          ...   
321073   276704  0345386108            6   
321074   276704  0743211383            7   
321075   276704  1563526298            9   
321076   276709  0515107662           10   
321077   276721  0590442449           10   

                                               Book-Title  
0       The Amsterdam Connection : Level 4 (Cambridge ...  
1                                         A Painted House  
2                                Little Altars Everywhere  
3                                       Waiting to Exhale  
4                       Birdsong: A Novel of Love and War  
...                                                   ...  
321073                                        Winte

### declaration of the eclat algorithm 

In [4]:
def eclat(transactions, min_support):

    # Step 1: Convert transactions to vertical data format
    vertical_data = defaultdict(list)
    for tid, transaction in enumerate(tqdm(transactions, desc="Converting transactions to vertical data")):
        for item in transaction:
            vertical_data[item].append(tid)
     # Step 2: Get frequent single-item itemsets
    frequent_itemsets = []
    for item, tids in tqdm(vertical_data.items(), desc="Getting frequent single-item itemsets"):
        support = len(tids)
        if support >= min_support:
            frequent_itemsets.append(([item], tids))
    print(frequent_itemsets)
    answer = []
    answer.extend(frequent_itemsets)
      # Step 3: Generate frequent itemsets
    k = 2
    while True:
        candidates = generate_candidates(frequent_itemsets, k)
        answer.extend(candidates)
        if not candidates:
            break
        frequent_itemsets = candidates
        print(frequent_itemsets)
        k += 1

    return answer, vertical_data

def generate_candidates(frequent_itemsets, k):
    candidates = []
    itemsets = [itemset for itemset, _ in frequent_itemsets]
    for itemset_pair in tqdm(combinations(itemsets, 2), desc=f"Generating candidates (k={k})"):
        itemset1, itemset2 = itemset_pair
        union_itemset = set(itemset1) | set(itemset2)
        if len(union_itemset) == k:
            candidate = list(set(sorted(union_itemset , key=str)))
            if candidate not in [itemset for itemset, _ in candidates]:
                tids1 = [tids for itemset, tids in frequent_itemsets if itemset == itemset1][0]
                tids2 = [tids for itemset, tids in frequent_itemsets if itemset == itemset2][0]
                candidate_tids = intersect(tids1, tids2)
                if len(candidate_tids) >= min_support:
                    candidates.append((candidate, candidate_tids))
    return candidates
def intersect(tids1, tids2):
    return [tid for tid in tids1 if tid in tids2]

def generate_association_rules(frequent_itemsets, vertical_data, min_confidence):
    association_rules = []
    for itemset, tids in tqdm(frequent_itemsets, desc="Generating association rules"):
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    antecedent = list(antecedent)
                    consequent = [item for item in itemset if item not in antecedent]
                    antecedent_tids = set([tid for item in antecedent for tid in vertical_data[item]])
                    itemset_tids = set(tids)
                    confidence = len(itemset_tids) / len(antecedent_tids)
                    if confidence >= min_confidence:
                        association_rules.append((antecedent, consequent, confidence))
    return association_rules 
def convert_to_transactions(transaction_data):
    transactions = []
    for _, row in tqdm(transaction_data.iterrows(), desc="Converting transaction data to transactions"):
        transaction = [item for item in row[1:] if pd.notnull(item)]
        transactions.append(transaction)
    return transactions

### the work 

In [5]:
transactions = convert_to_transactions(transaction_data)
min_support = 30
min_confidence = 0.4

frequent_itemsets, vertical_data = eclat(transactions, min_support)
association_rules = generate_association_rules(frequent_itemsets, vertical_data, min_confidence)

# Print the frequent itemsets
print("Frequent Itemsets:")
for itemset, tids in frequent_itemsets:
    print(f"Itemset: {itemset}, Support: {len(tids)}")

# Print the association rules
print("\nAssociation Rules:")
cnt = 0
for antecedent, consequent, confidence in association_rules:
    print(f"{cnt}::{antecedent} => {consequent}, Confidence: {confidence:.2f}")
    cnt += 1

Converting transaction data to transactions: 321078it [00:50, 6334.41it/s] 
Converting transactions to vertical data: 100%|██████████| 321078/321078 [00:01<00:00, 281231.16it/s]
Getting frequent single-item itemsets: 100%|██████████| 245701/245701 [00:00<00:00, 1571249.27it/s]


[([6], [0, 7, 16, 21, 28, 29, 39, 64, 68, 90, 94, 111, 113, 115, 117, 126, 130, 134, 144, 149, 154, 155, 171, 172, 179, 214, 217, 219, 220, 224, 231, 246, 252, 258, 261, 274, 281, 285, 289, 290, 295, 296, 298, 300, 302, 335, 350, 388, 391, 393, 407, 416, 423, 428, 432, 433, 435, 444, 488, 546, 549, 598, 600, 614, 617, 618, 626, 681, 686, 693, 696, 707, 725, 733, 742, 746, 747, 748, 768, 781, 787, 791, 795, 798, 800, 805, 833, 836, 857, 873, 875, 892, 894, 908, 932, 937, 950, 957, 958, 960, 961, 962, 968, 970, 992, 996, 1011, 1021, 1042, 1044, 1045, 1046, 1049, 1052, 1054, 1055, 1058, 1079, 1081, 1089, 1099, 1103, 1106, 1108, 1128, 1150, 1160, 1190, 1201, 1202, 1204, 1210, 1212, 1225, 1226, 1234, 1249, 1252, 1265, 1275, 1311, 1346, 1349, 1369, 1387, 1412, 1415, 1416, 1423, 1424, 1426, 1440, 1441, 1447, 1451, 1453, 1454, 1455, 1458, 1459, 1462, 1468, 1472, 1475, 1477, 1484, 1494, 1513, 1515, 1526, 1528, 1530, 1543, 1545, 1575, 1583, 1602, 1603, 1632, 1634, 1653, 1661, 1672, 1684, 1686, 1

Generating candidates (k=2): 1983036it [1:13:01, 452.62it/s] 


[(['A Painted House', 6], [12721, 25892, 27430, 28150, 31967, 41849, 52757, 57109, 59706, 87554, 91794, 118344, 126489, 139673, 141898, 155028, 163936, 177181, 179106, 183423, 194689, 206660, 213353, 222702, 226677, 227229, 237191, 251971, 252008, 254373, 289733, 307085]), (['The Summons', 6], [34940, 47318, 53422, 66046, 71254, 84743, 86957, 94891, 109886, 129297, 149804, 156845, 161756, 162815, 169228, 170559, 189257, 194491, 200992, 209260, 209671, 229514, 241332, 242775, 244568, 246398, 248092, 285041, 286760, 292274, 292278, 294697, 300238, 311651, 311819]), (["Bridget Jones's Diary", 6], [20273, 28884, 33081, 40376, 40535, 61040, 71362, 76114, 85109, 94020, 115152, 128125, 131998, 133694, 137070, 151347, 153759, 156008, 156869, 159416, 161512, 167718, 175085, 179075, 180099, 210618, 211183, 214003, 232701, 245671, 251681, 254867, 255703, 263541, 298172, 313278]), (['0316666343', 6], [24050, 25275, 28228, 47259, 49027, 51472, 59267, 60881, 74042, 77785, 77865, 80433, 130847, 13464

Generating candidates (k=3): 1345620it [00:42, 31803.58it/s] 


[(['The Lovely Bones: A Novel', '0316666343', 6], [24050, 25275, 28228, 47259, 49027, 51472, 59267, 60881, 74042, 77785, 77865, 80433, 130847, 134642, 148067, 152249, 157734, 160506, 161206, 163241, 177200, 178322, 182701, 200615, 208878, 214057, 215353, 223219, 274996, 278918, 282618, 290051, 297642]), (['0971880107', 6, 'Wild Animus'], [4294, 6823, 8585, 17597, 20247, 26427, 27880, 30400, 32905, 37776, 38147, 38183, 38305, 43716, 54334, 59027, 68335, 72968, 74609, 77157, 80350, 80400, 80924, 88780, 90343, 97975, 99793, 102200, 104043, 110773, 111932, 114418, 131346, 131375, 139849, 152136, 152655, 153312, 163222, 169676, 171416, 171485, 180194, 184572, 186759, 190410, 193393, 193472, 196472, 197478, 205911, 222987, 228948, 230087, 230322, 231863, 241171, 246586, 247517, 268451, 282200, 288150, 288784, 298626, 304312, 311559]), (['A Painted House', '044023722X', 7], [1858, 9803, 19315, 33070, 45183, 49498, 52620, 58465, 62869, 83838, 87836, 100160, 101249, 104261, 105889, 111114, 1282

Generating candidates (k=4): 44850it [00:00, 190107.24it/s]
Generating association rules: 100%|██████████| 3933/3933 [00:26<00:00, 149.19it/s]  

Frequent Itemsets:
Itemset: [6], Support: 31645
Itemset: ['038550120X'], Support: 68
Itemset: [7], Support: 66305
Itemset: ['A Painted House'], Support: 300
Itemset: [9], Support: 60625
Itemset: ['Little Altars Everywhere'], Support: 32
Itemset: [8], Support: 91584
Itemset: ['0684867621'], Support: 47
Itemset: ['The Girl Who Loved Tom Gordon : A Novel'], Support: 47
Itemset: [10], Support: 70919
Itemset: ['043935806X'], Support: 201
Itemset: ['Harry Potter and the Order of the Phoenix (Book 5)'], Support: 206
Itemset: ['False Memory'], Support: 71
Itemset: ['0440498058'], Support: 80
Itemset: ['A Wrinkle In Time'], Support: 80
Itemset: ['0060096195'], Support: 51
Itemset: ['The Boy Next Door'], Support: 57
Itemset: ['0786817070'], Support: 62
Itemset: ['Artemis Fowl (Artemis Fowl, Book 1)'], Support: 94
Itemset: ['Move to Strike'], Support: 35
Itemset: ['Toxin'], Support: 40
Itemset: ['Whispers'], Support: 68
Itemset: ["Ender's Game (Ender Wiggins Saga (Paperback))"], Support: 145
Item


