# Load Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from joblib import dump, load
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

# Load File with Data

In [None]:
file_path = 'data/Ratings_no0.csv'
df = pd.read_csv(file_path)

# Organize Columns

In [None]:
df['User-ID-Cat'] = df['User-ID'].astype('category').cat.codes
df['ISBN-Cat'] = df['ISBN'].astype('category').cat.codes

df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')

# Explore Database

In [None]:
average_ratings = df.groupby('ISBN')['Rating'].mean()
rating_counts = df.groupby('ISBN')['Rating'].count()

ratings_info = pd.DataFrame({
    'AverageRating': average_ratings,
    'RatingCount': rating_counts
}).reset_index()

filtered_ratings = ratings_info[ratings_info['RatingCount'] > 100]

top_rated_books = filtered_ratings.sort_values(by=['AverageRating', 'RatingCount'], ascending=False)

print(top_rated_books[['ISBN', 'AverageRating', 'RatingCount']].head(10))

# Filter Data

In [None]:
books_filtered = df['ISBN'].value_counts() > 5
books_filtered = books_filtered[books_filtered].index.tolist()

users_filtered = df['User-ID'].value_counts() > 3
users_filtered = users_filtered[users_filtered].index.tolist()

df_filtered = df[(df['ISBN'].isin(books_filtered)) & (df['User-ID'].isin(users_filtered))]

print(f"Number of unique users after reducing sparsity: {df_filtered['User-ID'].nunique()}")
print(f"Number of unique books after reducing sparsity: {df_filtered['ISBN'].nunique()}")
print(df_filtered['ISBN'].head())

# Create and Train Model

In [None]:
# user_item_matrix = df_filtered.pivot_table(index='User-ID', columns='ISBN', values='Rating').fillna(0)

# cosine_sim = cosine_similarity(user_item_matrix)

# np.fill_diagonal(cosine_sim, 0)

# user_similarity_df = pd.DataFrame(cosine_sim, index=user_item_matrix.index, columns=user_item_matrix.index)

# # Save the user similarity DataFrame
# dump(user_similarity_df, 'user_similarity_df.joblib')

# # Save the user-item matrix
# dump(user_item_matrix, 'user_item_matrix.joblib')

# Loading Model

In [None]:
user_item_matrix = load("user_item_matrix.joblib")
user_similarity_df = load("user_similarity_df.joblib")

# Recommendation Function

In [None]:
def recalculate_user_similarity(user_item_matrix):
    user_item_matrix_filled = user_item_matrix.fillna(0)
    similarity_matrix = cosine_similarity(user_item_matrix_filled)
    user_similarity_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)
    
    return user_similarity_df
    
def add_temporary_user_and_recalculate_similarity(favorite_isbns, user_item_matrix):
    valid_isbns = [isbn for isbn in favorite_isbns if isbn in user_item_matrix.columns]
    temp_user_id = user_item_matrix.index.max() + 1
    temp_user_row = pd.Series(data=10, index=valid_isbns, name=temp_user_id).reindex(user_item_matrix.columns, fill_value=np.nan)
    user_item_matrix = pd.concat([user_item_matrix, temp_user_row.to_frame().T], ignore_index=False)
    user_similarity_df = recalculate_user_similarity(user_item_matrix)
    
    return user_similarity_df, user_item_matrix, temp_user_id

In [None]:
def recommend_books_with_updated_similarity(favorite_isbns, user_item_matrix, original_user_similarity_df, top_n=10):
    user_similarity_df, user_item_matrix, temp_user_id = add_temporary_user_and_recalculate_similarity(favorite_isbns, user_item_matrix)
    temp_user_similarity_scores = user_similarity_df.loc[temp_user_id]
    top_similar_indices = temp_user_similarity_scores.nlargest(top_n + 1).index
    similar_users = [user_id for user_id in top_similar_indices if user_id != temp_user_id]
    similar_users_ratings = user_item_matrix.loc[similar_users].mean(axis=0).drop(favorite_isbns, errors='ignore')
    recommended_books = similar_users_ratings.nlargest(top_n).index.tolist()

    if temp_user_id in user_item_matrix.index:
        user_item_matrix.drop(index=temp_user_id, inplace=True)
        user_similarity_df.drop(index=temp_user_id, inplace=True, errors='ignore')
        if temp_user_id in user_similarity_df.columns:
            user_similarity_df.drop(columns=temp_user_id, inplace=True, errors='ignore')
            
    return recommended_books

# Example of Using the Function

In [None]:
favorite_isbns = ["451523881", "1592244343", "192815490"]
recommended_books_list = recommend_books_with_updated_similarity(favorite_isbns, user_item_matrix, user_similarity_df)
print(recommended_books_list)

# Get Title by ISBN

In [None]:
def get_book_titles(isbn_list):
    titles = {}
    for isbn in isbn_list:
        formatted_isbn = isbn if len(isbn) == 13 else isbn.zfill(10)
        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{formatted_isbn}&format=json&jscmd=data"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            key = f"ISBN:{formatted_isbn}"
            if key in data:
                book_info = data[key]
                title = book_info.get('title', 'Title not found')
                titles[isbn] = title
            else:
                titles[isbn] = 'Not found'
        else:
            print(f"Error fetching ISBN {isbn}: {response.status_code}")
    return titles

book_titles = get_book_titles(favorite_isbns)
print("Favorite Books:")
for isbn, title in book_titles.items():
    print(f"{isbn}: {title}")
print("---")
book_titles = get_book_titles(recommended_books_list)
print("Recommended Books:")
for isbn, title in book_titles.items():
    print(f"{isbn}: {title}")