In [64]:
# Import the libraries

import pandas as pd 
import numpy as np
from scipy.sparse import csr_matrix as sparse_matrix
import os
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors

### Tuncated SVD Model

In [65]:
# Load the dataset

book_info = pd.read_csv("books_info_svd_sample.csv")
book_info.head()

Unnamed: 0,book_id,title,authors
0,751357383,DK Eyewitness Readers - Level 2: Dinosaur Dinn...,Lee Davis
1,943403596,Derek Jeter,Richard Brenner
2,9728423853,Um estranho em Goa (SÃ©rie oriental viagens),Jose Eduardo Agualusa
3,385093187,Aeneid of Virgil,Virgil
4,760734658,The Essential Dictionary of Investing &amp; Fi...,Jerry M. Rosenberg


In [66]:
# Load the dataset

book_ratings = pd.read_csv("book_ratings_svd_sample.csv")
book_ratings.head()

Unnamed: 0,user_id,book_id,rating
0,252695,0312961677,7
1,173784,0670030643,8
2,119283,067091021X,8
3,184299,0441004822,8
4,278111,0375412123,5


In [67]:
# Define function to build a user-book matrix based on book ratings.
book_id_mapping = {book_id: idx for idx, book_id in enumerate(book_info['book_id'].unique())}
user_id_mapping = {user_id: idx for idx, user_id in enumerate(book_ratings['user_id'].unique())}


def build_user_book_matrix(book_info, book_ratings):
    
    # Create mappings for book IDs and user IDs
    #book_id_mapping = {book_id: idx for idx, book_id in enumerate(book_info['book_id'].unique())}
    #user_id_mapping = {user_id: idx for idx, user_id in enumerate(book_ratings['user_id'].unique())}

    num_users = len(user_id_mapping)
    num_books = len(book_id_mapping)
    user_book_matrix = np.zeros((num_users, num_books))
    
    # Fill the user-book matrix with ratings
    try:
        for _, row in book_ratings.iterrows():
            user_id = user_id_mapping[row['user_id']]
            book_id = book_id_mapping[row['book_id']]
            rating = row['rating']
            user_book_matrix[user_id, book_id] = rating
    except Exception:
        print("exception")
        pass

    return user_book_matrix

In [68]:
# Define function to perfrom SVC on the user_book matrix and train a nearest neighbors model

def train_models(user_book_matrix, num_latent_factors=10, num_neighbors=5):
    
    # Perform Truncated SVD on the user-book matrix
    svd = TruncatedSVD(n_components=num_latent_factors, random_state=42)
    svd_model = svd.fit_transform(user_book_matrix)
    
    # Train a nearest neighbors model on the SVD-transformed matrix
    knn_model = NearestNeighbors(n_neighbors=num_neighbors, metric='cosine')
    knn_model.fit(svd_model)

    return svd_model, knn_model

In [69]:
# Use model to generate book recommendations for a given user

def get_recommendations(user_id, svd_model, knn_model, book_info, book_ratings, num_recommendations=10):
    
    # Get the embedding of the target user
    user_embedding = svd_model[user_id].reshape(1, -1)
    
    # Find similar users based on the nearest neighbors model
    _, indices = knn_model.kneighbors(user_embedding)
    similar_user_ids = indices.flatten()
    print("similar_user_ids")
    print(similar_user_ids)
    recommendations = []
    for similar_user_id in similar_user_ids:
        similar_user_id_mapping=[k for k, v in user_id_mapping.items() if v == similar_user_id][0]
        rated_books = set(book_ratings.loc[book_ratings['user_id'] == similar_user_id_mapping, 'book_id'])
        unrated_books = [book_id for book_id in book_info['book_id'].unique() if book_id not in rated_books]
        recommendations.extend(book_id for book_id in rated_books if book_id not in recommendations)
        if len(recommendations) >= num_recommendations:
            break
    recommendations = recommendations[:num_recommendations]
    recommended_books = [book_info.loc[book_info['book_id'] == book_id, 'title'].values[0] for book_id in recommendations]
    return recommended_books



In [70]:

# Build the user-book matrix and train the models

user_book_matrix = build_user_book_matrix(book_info, book_ratings)
svd_model, knn_model = train_models(user_book_matrix)
#print(svd_model)
#print(knn_model)
# Get book recommendations for a specific user and print the recommended book titles
user_id = 500 #an example
recommendations = get_recommendations(int(user_id), svd_model, knn_model, book_info, book_ratings)
print("Recommended books:")
for book_title in recommendations:
    print(book_title)

similar_user_ids
[ 500 2725  672 1240 2282]
Recommended books:
Raging Heart: The Intimate Story of the Tragic Marriage of O.J. and Nicole Brown Simpson
The All-american Cowboy Cookbook : Over 300 Recipes From the World's Greatest Cowboys
Grace Point
The Diaries of Adam and Eve
Fallout
The WAR AGAINST BOYS: How Misguided Feminism Is Harming Our Young Men


In [71]:
user_id = 2935 #an example
recommendations = get_recommendations(int(user_id), svd_model, knn_model, book_info, book_ratings)
print("Recommended books:")
for book_title in recommendations:
    print(book_title)

similar_user_ids
[2935 1245  133 1089  506]
Recommended books:
Blue Gold : A Novel from the NUMA Files
Hawkmistress (Darkover)
Somebody's Baby
Lincoln : A Photobiography
From This Day Forward
Dynamic Great Lakes


# KNN (K-Nearest Neighbors) Model

In [72]:
ratings_new = pd.read_csv('ratings002_cleaned.csv')

In [73]:
ratings_new

Unnamed: 0.1,Unnamed: 0,user_id,book_id,rating
0,0,276725,034545104X,0
1,1,276726,0155061224,5
2,2,276727,0446520802,0
3,3,276729,052165615X,3
4,4,276729,0521795028,6
...,...,...,...,...
1149775,1149775,276704,1563526298,9
1149776,1149776,276706,0679447156,0
1149777,1149777,276709,0515107662,10
1149778,1149778,276721,0590442449,10


In [74]:
ratings_new.drop('Unnamed: 0', axis=1, inplace=True)

In [75]:
ratings_new

Unnamed: 0,user_id,book_id,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [76]:
books_with_ratings = pd.read_csv('books_with_ratings_copy.csv')

In [77]:
books_with_ratings.head()

Unnamed: 0,book_id,avg_rating,rating_count,title,authors,publication_year,Cover_image,mod_title
0,0155061224,5.0,5,Rites of Passage,Judith Rae,2001,http://images.amazon.com/images/P/0155061224.0...,rites of passage
1,052165615X,3.0,3,Help!: Level 1,Philip Prowse,1999,http://images.amazon.com/images/P/052165615X.0...,help level 1
2,0521795028,6.0,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,http://images.amazon.com/images/P/0521795028.0...,the amsterdam connection level 4 cambridge en...
3,038550120X,7.58,614,A Painted House,JOHN GRISHAM,2001,http://images.amazon.com/images/P/038550120X.0...,a painted house
4,0060517794,8.0,240,Little Altars Everywhere,Rebecca Wells,2003,http://images.amazon.com/images/P/0060517794.0...,little altars everywhere


In [78]:
# Merge the "books" dataframe with ratings 

books_with_ratings_users = ratings_new.merge(books_with_ratings,on='book_id')

In [79]:
books_with_ratings_users.head()

Unnamed: 0,user_id,book_id,rating,avg_rating,rating_count,title,authors,publication_year,Cover_image,mod_title
0,276725,034545104X,0,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
1,2313,034545104X,5,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
2,6543,034545104X,0,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
3,8680,034545104X,5,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
4,10314,034545104X,9,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel


In [80]:
book_info = books_with_ratings_users

In [81]:
book_info.head()

Unnamed: 0,user_id,book_id,rating,avg_rating,rating_count,title,authors,publication_year,Cover_image,mod_title
0,276725,034545104X,0,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
1,2313,034545104X,5,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
2,6543,034545104X,0,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
3,8680,034545104X,5,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel
4,10314,034545104X,9,6.29,176,Flesh Tones: A Novel,M. J. Rose,2002,http://images.amazon.com/images/P/034545104X.0...,flesh tones a novel


In [82]:
# Save dataframe
book_info.to_csv('books_with_userid')

In [95]:
# Take 1 % data as sample

rating_books_sample = book_info.sample(frac=0.01, random_state=3)

# Shape of the sample data
rating_books_sample.shape

(7251, 10)

In [96]:
rating_books_sample

Unnamed: 0,user_id,book_id,rating,avg_rating,rating_count,title,authors,publication_year,Cover_image,mod_title
127918,244627,0440111811,10,7.25,145,Changes,Danielle Steel,1984,http://images.amazon.com/images/P/0440111811.0...,changes
216788,233444,0553260219,0,6.33,38,Secrets of Harry Bright,Joseph Wambaugh,1986,http://images.amazon.com/images/P/0553260219.0...,secrets of harry bright
296444,124487,0553577778,0,8.00,24,"Infinity's Shore (The Uplift Saga, Book 5)",David Brin,1997,http://images.amazon.com/images/P/0553577778.0...,infinitys shore the uplift saga book 5
638880,172971,3499228114,9,8.50,17,Die schÃ?Â¶nen Seiten der Angst. Das Lesebuch ...,Wolfgang HÃ?Â¤mmerling,2000,http://images.amazon.com/images/P/3499228114.0...,die schnen seiten der angst das lesebuch des n...
645633,107301,0146000102,0,8.00,8,The Pavilion on the Links (Penguin 60s S.),Robert Louis Stevenson,1995,http://images.amazon.com/images/P/0146000102.0...,the pavilion on the links penguin 60s s
...,...,...,...,...,...,...,...,...,...,...
58509,215627,1400034779,10,8.05,1473,The No. 1 Ladies' Detective Agency (Today Show...,Alexander McCall Smith,2003,http://images.amazon.com/images/P/1400034779.0...,the no 1 ladies detective agency today show bo...
645214,95353,088179144X,10,10.00,10,Making Twig Garden Furniture,Abby Ruoff,1997,http://images.amazon.com/images/P/088179144X.0...,making twig garden furniture
283349,175886,0375826688,0,8.39,235,"Eragon (Inheritance, Book 1)",Christopher Paolini,2004,http://images.amazon.com/images/P/0375826688.0...,eragon inheritance book 1
164832,252036,0441003257,10,8.20,713,Good Omens,Neil Gaiman,1996,http://images.amazon.com/images/P/0441003257.0...,good omens


In [85]:
# Create Item-user matrix using pivot_table()
rating_books_pivot = rating_books_sample.pivot_table(index='title', columns='user_id', values='rating').fillna(0)

# Show top-5 records
rating_books_pivot.head()

user_id,17,243,254,507,585,640,753,850,876,882,...,278188,278255,278418,278435,278449,278529,278582,278633,278778,278843
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
08/15 Heute,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"1,000 More Jokes for Kids",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Lb. Penalty,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100 Cross-Stitch Christmas Ornaments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [86]:
# Import NearestNeighbors
from sklearn.neighbors import NearestNeighbors

# Build NearestNeighbors Object
model_nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=7, n_jobs=-1)

# Fit the NearestNeighbor
model_nn.fit(rating_books_pivot)

In [97]:
#Generate Recommendations

# Get top 10 nearest neighbors 
indices = model_nn.kneighbors(rating_books_pivot.loc[['Changes']], 10, return_distance=False)

# Print the recommended books
print("Recommended Books:")
print("==================")

for index, value in enumerate(rating_books_pivot.iloc[indices[0]].index):
    print((index+1),". ",value)

Recommended Books:
1 .  Manner of Death
2 .  Changes
3 .  Spec Ops Squad: Holding the Line (Spec Ops Squad)
4 .  Spectre (Star Trek)
5 .  Speaking With the Angel
6 .  Speaks the Nightbird, Vol. 2: Evil Unveiled
7 .  Spence and Lila: A Novel
8 .  Special Forces: A Guided Tour of U.S. Army Special Forces
9 .  Speak Scotch or whistle
10 .  Spence And Lila
