<img src="../../figures/logo-esi-sba.png" width="700" height="126" align="center" alt="Logo">

# Book recommendation system
*Prepared by* 
- Benghenima Hafsa (h.benghenima@esi-sba.dz) 
- Ghandouz Amina (a.ghandouz@esi-sba.dz) 
- Benahmed Firdaws (f.benahmed@esi-sba.dz) 

## Notebook4 : FP-Growth Algorithm

In [75]:
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
import pandas as pd
import numpy as np
import pyfpgrowth

### Load the Dataset

In [76]:
file_path = "../../data/cleaned_df.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...
1,276744,038550120X,7,A Painted House
2,276747,0060517794,9,Little Altars Everywhere
3,276747,0671537458,9,Waiting to Exhale
4,276747,0679776818,8,Birdsong: A Novel of Love and War


### Transform the Data into Transaction format

In [77]:
# Step 1: convert categorical columns to codes
book_titles = pd.Categorical(df['Book-Title']).codes
user_ids = pd.Categorical(df['User-ID']).codes

book_titles[:5], user_ids[:5]

(array([ 84049,   2671,  52658, 111122,  11695], dtype=int32),
 array([60796, 60797, 60798, 60798, 60798], dtype=int32))

In [78]:
# Step 2: create a sparse matrix with rows as users and columns as books
sparse_matrix = coo_matrix(
    (df['Book-Rating'], (user_ids, book_titles)),
    shape=(len(np.unique(user_ids)), len(np.unique(book_titles)))
)

In [79]:
# Step 3: convert to csr format
sparse_matrix = sparse_matrix.tocsr()

In [80]:
# Step 4: convert sparse matrix to a list of transactions
transactions = []

for user_row in sparse_matrix:
    books = user_row.indices 
    transactions.append(books.tolist())

transactions[:1]

[[37178, 59304, 103998]]

In [81]:
# Step 5: map indices back to book titles

book_titles = pd.Categorical(df['Book-Title']).categories
transactions_with_titles = [[book_titles[i] for i in transaction] for transaction in transactions]

transactions_with_titles[:1]


[['Goodbye to the Buttermilk Sky',
  'More Cunning Than Man: A Social History of Rats and Man',
  'The Witchfinder (Amos Walker Mystery Series)']]

### Apply FP-Growth

In [82]:
# Step 1: run frequent pattern growth
patterns = pyfpgrowth.find_frequent_patterns(transactions_with_titles, 20)

In [83]:
# Step 2: generate association rules
rules = pyfpgrowth.generate_association_rules(patterns, 0.5)

### Interpret the Results

In [84]:
# function to display rules
def display_rules(rules):
    for antecedent, (consequent, confidence) in list(rules.items()):
        print(f"Rule: {antecedent} -> {consequent} (Confidence: {confidence:.2f})")

In [85]:
# display the association rules
print("association rules:")
display_rules(rules)

association rules:
Rule: ('The Green Mile: The Mouse on the Mile (Green Mile Series)',) -> ("The Green Mile: Coffey's Hands (Green Mile Series)",) (Confidence: 0.71)
Rule: ("The Green Mile: Coffey's Hands (Green Mile Series)",) -> ('The Two Dead Girls (Green Mile Series)',) (Confidence: 0.73)
Rule: ('The Two Dead Girls (Green Mile Series)',) -> ("The Green Mile: Coffey's Hands (Green Mile Series)",) (Confidence: 0.63)
Rule: ("The Green Mile: Coffey's Hands (Green Mile Series)", 'The Green Mile: Night Journey (Green Mile Series)') -> ('The Two Dead Girls (Green Mile Series)',) (Confidence: 0.95)
Rule: ("The Green Mile: Coffey's Hands (Green Mile Series)", 'The Two Dead Girls (Green Mile Series)') -> ('The Green Mile: Night Journey (Green Mile Series)',) (Confidence: 0.91)
Rule: ('The Green Mile: Night Journey (Green Mile Series)', 'The Two Dead Girls (Green Mile Series)') -> ("The Green Mile: Coffey's Hands (Green Mile Series)",) (Confidence: 0.87)
Rule: ('The Wide Window (A Series of U

In [86]:
# function to filter and display rules related to a specific book
def display_rules_for_book(rules, book_title):
    found = False
    for rule in rules:
        antecedent = rule
        consequent = rules[rule][0]
        confidence = rules[rule][1]
        # Check if the book title is in the antecedent or consequent
        if book_title in antecedent or book_title in consequent:
            print(f"association rules related to '{book_title}':")
            found = True
            print(f"Rule: {antecedent} -> {consequent} (Confidence: {confidence:.2f})")
    if not found:
        print(f"No rules found for '{book_title}'.")

In [87]:
book_title = 'Interview with the Vampire'

display_rules_for_book(rules, book_title)

association rules related to 'Interview with the Vampire':
Rule: ('Interview with the Vampire', 'The Queen of the Damned (Vampire Chronicles (Paperback))') -> ('The Vampire Lestat (Vampire Chronicles, Book II)',) (Confidence: 0.76)
association rules related to 'Interview with the Vampire':
Rule: ('Interview with the Vampire', 'The Tale of the Body Thief (Vampire Chronicles (Paperback))') -> ('The Vampire Lestat (Vampire Chronicles, Book II)',) (Confidence: 0.78)
association rules related to 'Interview with the Vampire':
Rule: ('The Queen of the Damned (Vampire Chronicles (Paperback))', 'The Vampire Lestat (Vampire Chronicles, Book II)') -> ('Interview with the Vampire',) (Confidence: 0.65)
association rules related to 'Interview with the Vampire':
Rule: ('The Tale of the Body Thief (Vampire Chronicles (Paperback))', 'The Vampire Lestat (Vampire Chronicles, Book II)') -> ('Interview with the Vampire',) (Confidence: 0.78)
association rules related to 'Interview with the Vampire':
Rule: (