In [1]:
# Step 1: Import The Packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Step 2: Load The Data
data = pd.read_csv('goodreads_data.csv')

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   10000 non-null  int64  
 1   Book         10000 non-null  object 
 2   Author       10000 non-null  object 
 3   Description  9923 non-null   object 
 4   Genres       10000 non-null  object 
 5   Avg_Rating   10000 non-null  float64
 6   Num_Ratings  10000 non-null  object 
 7   URL          10000 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 625.1+ KB


In [5]:
#Step 3: Fill The Empty Rows
data['Description'] = data['Description'].fillna('No Description Avaliable')

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   10000 non-null  int64  
 1   Book         10000 non-null  object 
 2   Author       10000 non-null  object 
 3   Description  10000 non-null  object 
 4   Genres       10000 non-null  object 
 5   Avg_Rating   10000 non-null  float64
 6   Num_Ratings  10000 non-null  object 
 7   URL          10000 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 625.1+ KB


In [30]:
# Step 4 : Clean The Title
import re

def clean_Title(title):
  return re.sub(r'[\(\[\{].*?[\)\]\}]','',title).strip()

data['Book'] = data['Book'].astype(str).apply(clean_Title)

In [10]:
# Step 5 : Combine The Description And Geners
data['Text'] = data['Description'] + " " + data['Genres']

In [11]:
# Step 6: Create TF-IDF vectors from combined text
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['Text'])

In [14]:
# Step 7 : Compute cosine similarity matrix
consie_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [31]:
#Step 8 : Book Recommendations Function
def recommend_books(title, data, consie_sim, top_k=5):
  # Finds The Index Of the Book That Matchs
  indices = data.index[data['Book'].str.strip().str.lower() == title.strip().lower()]
  if len(indices) == 0:
    return f"The Book '{title}' Not Found In The DataBase"
  idx = indices[0]
  # Finds The Consies Simlarity For This Book
  sim_scores = list(enumerate(consie_sim[idx]))
  # Sorts The Books based on Similarity Score in Descending Order
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  # Exclude The First Book[itself]
  sim_scores = sim_scores[1:top_k+1]
  # Get the book indices for top matches
  book_indices = [i[0] for i in sim_scores]
  # Retrive The Top Matching Book Info
  recommendation = data.iloc[book_indices][['Book', 'Author', 'Avg_Rating', 'URL']]

  return recommendation

In [35]:
# Interactive Terminal
def interactive_recommender(data, consie_sim,top_k=5):
  while True:
    title = input("Enter a book title (or type 'end' to exit): ")
    if title.lower() == "end":
      print("Goodbye, Have A Good Day")
      break
    recommendations = recommend_books(title, data, consie_sim, top_k)
    if isinstance(recommendations, str):
      print(recommendations)
    else:
      print(f'Top {top_k} for {title}')
      print(recommendations.to_string(index=False))
interactive_recommender(data, consie_sim, top_k=5)



Enter a book title (or type 'end' to exit): Harry Potter and the Chamber of Secrets
Top 5 for Harry Potter and the Chamber of Secrets
                                     Book       Author  Avg_Rating                                                                                URL
      Harry Potter and the Goblet of Fire J.K. Rowling        4.56          https://www.goodreads.com/book/show/6.Harry_Potter_and_the_Goblet_of_Fire
 Harry Potter and the Prisoner of Azkaban J.K. Rowling        4.58     https://www.goodreads.com/book/show/5.Harry_Potter_and_the_Prisoner_of_Azkaban
Harry Potter and the Order of the Phoenix J.K. Rowling        4.50    https://www.goodreads.com/book/show/2.Harry_Potter_and_the_Order_of_the_Phoenix
 Harry Potter and the Philosopher’s Stone J.K. Rowling        4.47 https://www.goodreads.com/book/show/72193.Harry_Potter_and_the_Philosopher_s_Stone
   Harry Potter and the Half-Blood Prince J.K. Rowling        4.58       https://www.goodreads.com/book/show/1.Harry