In [1]:
import pandas as pd 
df=pd.read_csv(r"/Users/priyankamalavade/Desktop/BRS/data/cleaned_clustered_books.csv")
print(df.head())
print(df.info())

                                           Book Name          Author  Rating  \
0  Think Like a Monk: The Secret of How to Harnes...      Jay Shetty     4.9   
1  Ikigai: The Japanese Secret to a Long and Happ...   Héctor García     4.6   
2  The Subtle Art of Not Giving a F*ck: A Counter...     Mark Manson     4.4   
3  Atomic Habits: An Easy and Proven Way to Build...     James Clear     4.6   
4  Life's Amazing Secrets: How to Find Balance an...  Gaur Gopal Das     4.6   

   Number of Reviews    Price  \
0              313.0  10080.0   
1             3658.0    615.0   
2            15838.8  10378.0   
3             4614.0    888.0   
4             4302.0   1005.0   

                                         Description  \
0  over the past three years, jay shetty has beco...   
1                         brought to you by penguin.   
2  in this generation-defining self-help guide, a...   
3                         brought to you by penguin.   
4  stop going through life,  start growi

# Content based recommendation

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import joblib

In [3]:
# Step 1: TF-IDF Vectorization on 'cleaned_text'
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['cleaned_text'])

In [4]:
# Step 2: Compute Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [5]:
# Step 3: Create a mapping from book name to index
book_indices = pd.Series(df.index, index=df['Book Name']).drop_duplicates()


In [6]:
# Save TF-IDF and cosine similarity for deployment
joblib.dump(tfidf, '/Users/priyankamalavade/Desktop/BRS/streamlit_app/model/tfidf_vectorizer.pkl')
joblib.dump(cosine_sim, '/Users/priyankamalavade/Desktop/BRS/streamlit_app/model/cosine_similarity_matrix.pkl')


['/Users/priyankamalavade/Desktop/BRS/streamlit_app/model/cosine_similarity_matrix.pkl']

In [7]:
# Recommendation function
def get_content_based_recommendations(title, top_n=5):
    idx = book_indices.get(title)
    if idx is None:
        return "Book not found."
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    book_indices_top = [i[0] for i in sim_scores]
    return df[['Book Name', 'Author', 'Genre']].iloc[book_indices_top]


In [8]:
# Example usage
recommendations = get_content_based_recommendations("Think Like a Monk: The Secret of How to Harness the Power of Positivity and Be Happy Now")
recommendations

Unnamed: 0,Book Name,Author,Genre
1137,The Facebook Effect: The Inside Story of the C...,David Kirkpatrick,Social Media
2729,One Million Followers: How I Built a Massive S...,Brendan Kane,Religion & Philosophy
1657,The Simulation Hypothesis: An MIT Computer Sci...,Rizwan Virk,Cybernetics
839,Social Media Marketing Workbook 2020,Jason McDonald PhD,Social Media
1387,Social Media Marketing 2020 Mastery 4 Books Bu...,Brandon J. Artley,Compulsive Disorders


# Clustering-Based recommendation

In [None]:

# Create mapping of book names to their cluster
cluster_map = df.set_index('Book Name')['cluster'].to_dict()

In [10]:
# Define recommendation function
def get_cluster_based_recommendations(book_title, top_n=5):
    if book_title not in cluster_map:
        return "Book not found."
    
    cluster_id = cluster_map[book_title]
    cluster_books = df[df['cluster'] == cluster_id]
    
    # Exclude the input book from recommendations
    recommendations = cluster_books[cluster_books['Book Name'] != book_title]
    
    return recommendations[['Book Name', 'Author', 'Genre']].head(top_n)


In [11]:
# Example usage
book_title = "Think Like a Monk: The Secret of How to Harness the Power of Positivity and Be Happy Now"
get_cluster_based_recommendations(book_title)

Unnamed: 0,Book Name,Author,Genre
2,The Subtle Art of Not Giving a F*ck: A Counter...,Mark Manson,Personal Success
4,Life's Amazing Secrets: How to Find Balance an...,Gaur Gopal Das,Literary Essays
6,Sapiens,Yuval Noah Harari,Anthropology
7,The Intelligent Investor Rev Ed.,Benjamin Graham,Personal Finance
8,Rich Dad Poor Dad: What the Rich Teach Their K...,Robert T. Kiyosaki,Personal Finance


# Hybrid based recommendation

In [12]:
def get_hybrid_recommendations(title, top_n=5):
    idx = book_indices.get(title)
    if idx is None:
        return "Book not found."
    
    # Step 1: Content-based similarity
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]  # Skip self

    # Step 2: Fetch books from same cluster
    input_cluster = df.loc[idx, 'cluster']

    # Step 3: Score boost based on cluster match and rating
    recommendations = []
    for i, sim in sim_scores:
        cluster_score = 1.2 if df.loc[i, 'cluster'] == input_cluster else 1.0
        rating_score = df.loc[i, 'Rating']
        final_score = sim * cluster_score * rating_score
        recommendations.append((i, final_score))

    # Step 4: Sort and return top N
    top_recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)[:top_n]
    top_indices = [i[0] for i in top_recommendations]
    return df[['Book Name', 'Author', 'Genre', 'Rating']].iloc[top_indices]


In [13]:
# Example usage
get_hybrid_recommendations("Think Like a Monk: The Secret of How to Harness the Power of Positivity and Be Happy Now")

Unnamed: 0,Book Name,Author,Genre,Rating
1137,The Facebook Effect: The Inside Story of the C...,David Kirkpatrick,Social Media,4.4
839,Social Media Marketing Workbook 2020,Jason McDonald PhD,Social Media,4.5
1657,The Simulation Hypothesis: An MIT Computer Sci...,Rizwan Virk,Cybernetics,4.4
2729,One Million Followers: How I Built a Massive S...,Brendan Kane,Religion & Philosophy,4.3
2307,Built to Serve: Find Your Purpose and Become t...,Evan Carmichael,Fiction Sagas,4.8
