In [29]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [31]:
# Load dataset
file_path = "br.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,book_id,title,author,genre,rating,num_reviews,year_published
0,1,Pride and Prejudice,Christopher Paolini,Science Fiction,3.5,2418,2021
1,2,Rich Dad Poor Dad,Andy Weir,Fiction,3.3,63,1956
2,3,The Shining,Mark Manson,Non-Fiction,3.5,619,1997
3,4,The Alchemist,Robin Sharma,Fiction,4.0,912,1999
4,5,Circe,Robin Sharma,Science Fiction,4.2,3988,1967
...,...,...,...,...,...,...,...
995,996,Rich Dad Poor Dad,Madeline Miller,Thriller,4.9,1659,1990
996,997,It Ends with Us,Charles Duhigg,Non-Fiction,3.6,978,1972
997,998,Pride and Prejudice,Sarah J. Maas,Mystery,3.0,2334,1960
998,999,Pride and Prejudice,Napoleon Hill,Self-Help,3.8,4170,1983


In [33]:
df.isnull().sum()


book_id           0
title             0
author            0
genre             0
rating            0
num_reviews       0
year_published    0
dtype: int64

In [35]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Using a content-based filtering approach (genre, author, rating)
df["combined_features"] = df["genre"] + " " + df["author"]

# Convert text data into numerical representation using TF-IDF
vectorizer = TfidfVectorizer(stop_words="english")
feature_matrix = vectorizer.fit_transform(df["combined_features"])

# Compute similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(feature_matrix)

# Function to get book recommendations based on a given book title
def recommend_books(book_title, top_n=5):
    if book_title not in df["title"].values:
        return f"Book '{book_title}' not found in the dataset."

    # Get the index of the book in the dataset
    book_idx = df[df["title"] == book_title].index[0]

    # Get similarity scores for all books
    similarity_scores = list(enumerate(similarity_matrix[book_idx]))

    # Sort books based on similarity scores in descending order
    sorted_books = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    # Get recommended book titles
    recommended_books = [df.iloc[i[0]]["title"] for i in sorted_books]
    return recommended_books

# Test the recommendation system with an example book
example_book = input("Enter a string: ").strip()
print(recommend_books(example_book))


Enter a string:  Rich Dad Poor Dad


['Deep Work', 'Dune', 'An Ember in the Ashes', 'The Shadow of the Wind', 'Red Queen']
