In [None]:
import numpy as np 
import pandas as pd

books = pd.read_csv("Books.csv")
ratings = pd.read_csv("Ratings.csv")

In [None]:
books.head()

In [None]:
ratings.head()

In [None]:
df = books.merge(ratings, how="left", on="ISBN" )
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df[df["Book-Rating"].isnull()]

In [None]:
df = df.dropna(subset=["Book-Rating", "User-ID", "Book-Author","Publisher","Image-URL-L"])

df.isnull().sum()

In [None]:
df["Book-Rating"].value_counts()

In [None]:
#Removing non-Book-Rating from the data sets
df = df[df["Book-Rating"] > 0]
df["Book-Rating"] .value_counts()

In [None]:
df["Book-Title"].value_counts()

In [None]:
rating_counts = pd.DataFrame(df["Book-Title"].value_counts())

In [None]:
rare_books = rating_counts[rating_counts["Book-Title"]< 100].index
common_books = df[~df["Book-Title"].isin(rare_books)]
common_books["Book-Title"].value_counts()

In [None]:
common_books["User-ID"].value_counts()

In [None]:
#eliminating those who rate the book under 10
common_books = common_books[common_books["User-ID"].map(common_books["User-ID"].value_counts()) >= 10]
common_books["User-ID"].value_counts()

In [None]:
user_book_df = common_books.pivot_table(index=["User-ID"],
                                        columns=["Book-Title"],values="Book-Rating")

In [None]:
#Randomly selecting user
random_user = 1320

In [None]:
# List books by random user
random_user_df = user_book_df[user_book_df.index == random_user]
random_user_df

In [None]:
# Find books rated by random user
books_read = random_user_df.columns[random_user_df.notna().any()].tolist()
books_read

In [None]:
len(books_read)

In [None]:
books_read_df = user_book_df[books_read]
books_read_df

In [None]:
#Determine how many books each user rating
user_book_count = books_read_df.T.notnull().sum()
user_book_count

In [None]:
# organizing the data set

In [None]:
#Organize the data set
user_book_count = user_book_count.reset_index()
user_book_count.columns = ["User_id", "book_count"]
user_book_count.sort_values("book_count", ascending=False)

In [None]:
users_same_book = user_book_count[user_book_count["book_count"]>5]["User_id"]
users_same_book

In [None]:
#Determine the users with the most similar behavior 
final_df = pd.concat([books_read_df[books_read_df.index.isin(users_same_book)],
                      random_user_df[books_read]])

In [None]:
# correlations for all users
# organizing the data set
corr_df = final_df.T.corr().unstack().sort_values().drop_duplicates()
corr_df = pd.DataFrame(corr_df,columns =["corr"])
corr_df = corr_df.reset_index()
top_users = corr_df[(corr_df["User_id_1"] == random_user)][["User_id-2","corr"]].reset_index(drop=True)

top_users

In [None]:
# other users with 60% Rating
top_users = corr_df[(corr_df["user_id_1"] == random_user) & (corr_df["corr"] > 0.06)][
    ["user_id_2", "corr"]].reset_index(drop = True)

top_users = top_users.sort_values(by='corr', ascending=False)

top_users.rename(columns={"user_id_2": "User-ID"}, inplace=True)
top_users

In [None]:
top_users_ratings = top_users.merge(ratings[["User-ID","ISBN",
                                            "Book-Rating"]],how ='inner')
top_users_ratings = top_users_ratings[top_users_ratings["User-ID"]
                                      != random_user]

top_users_ratings

In [None]:
recommendation_df = top_users_ratings.groupby('ISBN').agg({"weighted_rating": "mean"})
recommendation_df = recommendation_df.reset_index()
recommendation_df

In [None]:
# We list top 10 with a weighted rating 
books_to_be_recommend = recommendation_df[recommendation_df["weighted_rating"] > 6.5].sort_values("weighted_rating", ascending=False).head(10)

books_to_be_recommend

In [None]:
# we merge df to see the name of books

books_to_be_recommend.merge(df[["ISBN", "Book-Title"]]).drop_duplicates()