In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import *
import pickle
import webbrowser

In [None]:
# Importing the dataset

books = pd.read_csv('Books.csv')
ratings = pd.read_csv('Ratings.csv')
users = pd.read_csv('Users.csv')

books.dropna(inplace=True)

# Models Popularity Based Approach 

weighted rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C where:

R = average for the movie (mean) v = number of votes for the movie m = minimum votes required to be listed in the Top 250 C = the mean vote across the whole report 

In [None]:
# removing all the books whose ISBN is not 10 digits long

books = books[books["ISBN"].apply(lambda x: len(x) == 10)]

unique_ISBN = set(books["ISBN"].unique())

# removing all the ISBNs that are not in the books dataset

ratings = ratings[ratings["ISBN"].isin(unique_ISBN)]

# removing all the users who rated 0 to books

ratings = ratings[ratings["bookRating"] != 0]

In [None]:

# makeing a dataframe which stores the avg rating of a book and also the number of ratings it has got

avg_rating = pd.DataFrame()
avg_rating["ISBN"] = ratings.groupby("ISBN")["bookRating"].mean().index
avg_rating["avg_rating"] = ratings.groupby("ISBN")["bookRating"].mean().values
avg_rating["num_ratings"] = ratings.groupby("ISBN")["bookRating"].count().values

avg_rating.reset_index(inplace=True)


avg_rating.sort_values("num_ratings", ascending=False)
# sum(avg_rating["num_ratings"])

In [None]:

# weighted rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C
# where:

# R = average for the movie (mean)
# v = number of votes for the movie
# m = minimum votes required to be listed in the Top 250 
# C = the mean vote across the whole report

m = avg_rating["num_ratings"].quantile(0.99) # top 150 books
C = ratings["bookRating"].mean()

def weighted_rating(x, m = m, C=C):
    v = x["num_ratings"]
    R = x["avg_rating"]
    return (v/(v+m) * R) + (m/(m+v) * C)


avg_rating["weighted_rating"] = avg_rating.apply(weighted_rating, axis=1)

In [None]:
avg_rating.sort_values("weighted_rating", ascending=False,inplace=True)
avg_rating.drop(["index"], axis=1, inplace=True)

In [None]:
avg_rating.head(10)

In [None]:
avg_rating.to_csv("avg_rating.csv", index=False)


book covers of top rated books according to the model

In [None]:
l = avg_rating.sort_values("weighted_rating", ascending=False).head(10)["ISBN"].values

In [None]:
#show image of the book cover

from IPython.display import Image

for i in l:
    display(Image(url=books[books["ISBN"] == i]["imageURLM"].values[0]))
    print(books[books["ISBN"] == i]["bookTitle"].values[0])

# Collaborative Filtering A recommendation technique that leverages users' collective behavior and preferences to make personalized recommendations.

Steps involved in collaborative filtering:

Data representation: Create a table with users as rows and items (ISBNs) as columns to capture user-item interactions or ratings.

Standardization: Normalize the ratings within each user to remove biases and bring them to a common scale.

Similarity calculation: Compute item-item similarity using metrics like cosine similarity based on user ratings or interactions.

Recommendation generation: Find the most similar items to a given item and recommend them to users interacting with the original item.

Collaborative filtering taps into the idea that users with similar tastes for certain items are likely to have similar tastes for others, providing personalized recommendations based on user behavior and preferences.

In [None]:
# list of top 600 books according to no. of people who rated it
top_600 = avg_rating.sort_values("num_ratings", ascending=False).head(1000)["ISBN"].values

In [None]:
top_600 = set(top_600)

# removing all the books that are not in the top 600

new_ratings = ratings[ratings["ISBN"].isin(top_600)]

In [None]:
# make a dataframe in which the index is ISBN and the columns are the userIDs and the values are the ratings given by the user

new_ratings = new_ratings.pivot(index="User-ID", columns="ISBN", values="bookRating")

In [None]:
new_ratings.fillna(0, inplace=True)

In [None]:
new_ratings.head()

In [None]:
# standardizing the ratings so that the mean of each row is 0 and the values are between -1 and 1

def standardize(row):
    new_row = (row - row.mean())/(row.max() - row.min())
    return new_row

new_ratings = new_ratings.apply(standardize)

In [None]:
new_ratings

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
# making a matrix in which the index and columns are the ISBNs and the values are the similarity between the two books

item_similarity = cosine_similarity(new_ratings.T)  # .T is used to transpose the matrix because we want the similarity between the ISBNs and not the users.

In [None]:
item_similarity_df = pd.DataFrame(item_similarity, index=new_ratings.columns, columns=new_ratings.columns)
item_similarity_df.head()

In [None]:
def get_similar_books(book_code, user_rating):
    similar_score = item_similarity_df[book_code]*(user_rating-5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

In [None]:
def get_recommendations(book_ratings):
    recommendations = pd.Series()  # Empty Series to store the recommendations
    
    for book, rating in book_ratings.items():
        similar_scores = item_similarity_df[book] * (rating - 5)
        similar_scores = similar_scores.sort_values(ascending=False)
        recommendations = pd.concat([recommendations, similar_scores])
        
    recommendations = recommendations.groupby(recommendations.index).sum()
    recommendations = recommendations.sort_values(ascending=False)
    
    l = []
    for i in recommendations.index:
        if i not in book_ratings.keys():
            l.append(i)
        if len(l)==10:
            break

    return l

In [None]:
d = {
    "059035342X": 9,
    "0345370775": 10,
    "044021145X": 8,
    "0440214041": 10,
    "0440211727": 7,
}

for i in d:
    display(Image(url=books[books["ISBN"] == i]["imageURLM"].values[0]))
    print(books[books["ISBN"] == i]["bookTitle"].values[0])

print("-------------------------------------------")
l = get_recommendations(d)

for i in l:
    display(Image(url=books[books["ISBN"] == i]["imageURLM"].values[0]))
    print(books[books["ISBN"] == i]["bookTitle"].values[0])

In [None]:
# making a text file in which write the names of 600 books that we have used

f = open("books.txt", "w")
for i in top_600:
    name = books[books["ISBN"] == i]["bookTitle"].values[0]
    f.write(name+" "+i)
    f.write("\n")
f.close()

In [None]:
with open('books.txt', 'r') as f:
    data = f.read()

In [None]:
data

In [None]:

# ratings by (my dear friend)

d = {
    "0439139597": 7,
    "0345391802": 8,
    "0590353403": 6,
    "0439064864": 3,
    "0316769487": 8,
    "0439136350": 5,
    "059035342X": 8,
    "0684801523": 10,
    "0439136369": 5,
    "043935806X": 4,
    "0439064872": 3,
    "0804111359": 9,
    "0451526341": 9,
    "0156628708": 9,
    "0451524934": 7
}

for i in d:
    display(Image(url=books[books["ISBN"] == i]["imageURLM"].values[0]))
    print(books[books["ISBN"] == i]["bookTitle"].values[0])

In [None]:
Image(url=books[books["ISBN"] == i]["imageURLM"].values[0])

In [None]:
l = get_recommendations(d)

for i in l:
    display(Image(url=books[books["ISBN"] == i]["imageURLM"].values[0]))
    print(books[books["ISBN"] == i]["bookTitle"].values[0])

# Samyak was satisfied with the recommendations

In [None]:
# make a pickle file in which we store the item_similarity_df

import pickle

pickle.dump(item_similarity_df, open("item_similarity_df.pkl", "wb"))

# Content Based Recommendations
Created a numpy array called main_matrix with dimensions (50000, 900) to represent ratings for each book across 900 unique genres.

Populated main_matrix by assigning average ratings to the corresponding positions based on the book's genres.

Defined the get_recommendations(s) function to generate book recommendations based on the user's genre preferences.

Initialized a numpy array called like (shape: (900,)) with zeros to represent the user's genre preferences.

Calculated recommendation scores by performing matrix multiplication between main_matrix and like.

Sorted the recommendation scores and obtained the top 5 book recommendations based on the highest scores.

Printed the titles of the top 5 recommended books from the original dataset.

In [None]:
data = pd.read_csv("books_1.Best_Books_Ever.csv")
data
l=[]
df = data[["isbn","genres","rating"]]
df["genres"] = df["genres"].apply(eval)
df["genres"][0]
s = set()
for i in df["genres"]:
    for j in i:
        s.add(j)
with open("genres_order.txt", "w", encoding="utf-8") as f:
    d = defaultdict(int)
    j = 0

    for i in s:
        d[i] = j
        f.write(i + "\n")
        j += 1
    f.close()
df
main_matrix = np.zeros((52478, 982))
for i in range(len(df)):
    for j in df["genres"][i]:
        main_matrix[i][d[j]] = df["rating"][i]
def get_recommendations(s):
    like = np.zeros((982,))
    for i in s:
        like[d[i]] = 1
    
    ans = np.dot(main_matrix, like)
    
    for i in range(len(ans)):
        l.append((ans[i], i))
    l.sort(reverse=True)

    for i in l[:5]:
        print(data.loc[i[1], "title"])
    return l    

get_recommendations(["Fiction", "Romance", "Magic","Vampires","Action"])
with open('genre_matrix.pkl', 'wb') as file:
    pickle.dump(main_matrix, file)
with open("booktitle_chatbot.txt","w",encoding="utf-8") as file:
    for i in data["title"]:
        file.write(i + "\n")
        
    

In [None]:
for i in l[:5]:
        print(data.loc[i[1], "coverImg"])

In [None]:
list_genere = [x for x in input().split(",")]
rec_books=pd.read_csv("books_1.Best_Books_Ever.csv")

list_recomm = []

get_recommendations(list_genere)
with open('genre_matrix.pkl', 'wb') as file:
    pickle.dump(main_matrix, file)
with open("booktitle_chatbot.txt","w",encoding="utf-8") as file:
    for i in data["title"]:
        file.write(i + "\n")
        list_recomm.append(i)

In [None]:
list_recomm

In [None]:
rec_books=pd.read_csv("books_1.Best_Books_Ever.csv")
list_url=[]
for i in list_recomm:
    image_url = rec_books[rec_books["title"] == i]["coverImg"].values[0]
    list_url.append(image_url)

In [None]:
list_url

In [None]:
rec_books=pd.read_csv("books_1.Best_Books_Ever.csv")
image_url = rec_books[rec_books["title"] == "Harry Potter and the Goblet of Fire"]["coverImg"].values[0]
Image(image_url)