In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import cross_validate
import difflib
import random

In [8]:
ratings_data = pd.read_csv('Data/Ejemplo/ratings.csv')
books_metadata = pd.read_csv('Data/Ejemplo/books.csv')
ratings_data.head(10)

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4
5,1,2077,4
6,1,2487,4
7,1,2900,5
8,1,3662,4
9,1,3922,5


In [10]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_data[['user_id', 'book_id', 'rating']], reader)

In [12]:
svd = SVD(verbose=True, n_epochs=10)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8567  0.8553  0.8557  0.8559  0.0006  
MAE (testset)     0.6754  0.6751  0.6751  0.6752  0.0001  
Fit time          26.15   25.69   24.24   25.36   0.81    
Test time         4.31    3.83    3.79    3.97    0.23    


{'test_rmse': array([0.85669629, 0.85527592, 0.85572407]),
 'test_mae': array([0.67537642, 0.67509129, 0.6751373 ]),
 'fit_time': (26.15003728866577, 25.689040899276733, 24.2445011138916),
 'test_time': (4.30610990524292, 3.831223964691162, 3.7872822284698486)}

In [13]:
svd.predict(uid=10, iid=100)

Prediction(uid=10, iid=100, r_ui=None, est=3.7544608408987608, details={'was_impossible': False})

In [15]:
def get_book_id(book_title, metadata):
    
    """
    Gets the book ID for a book title based on the closest match in the metadata dataframe.
    """
    
    existing_titles = list(metadata['title'].values)
    closest_titles = difflib.get_close_matches(book_title, existing_titles)
    book_id = metadata[metadata['title'] == closest_titles[0]]['id'].values[0]
    return book_id

def get_book_info(book_id, metadata):
    
    """
    Returns some basic information about a book given the book id and the metadata dataframe.
    """
    
    book_info = metadata[metadata['id'] == book_id][['id', 'isbn', 
                                                    'authors', 'title', 'original_title']]
    return book_info.to_dict(orient='records')

def predict_review(user_id, book_title, model, metadata):
    
    """
    Predicts the review (on a scale of 1-5) that a user would assign to a specific book. 
    """
    
    book_id = get_book_id(book_title, metadata)
    review_prediction = model.predict(uid=user_id, iid=book_id)
    return review_prediction.est

def generate_recommendation(user_id, model, metadata, thresh=4):
    
    """
    Generates a book recommendation for a user based on a rating threshold. Only
    books with a predicted rating at or above the threshold will be recommended
    """
    
    book_titles = list(metadata['title'].values)
    random.shuffle(book_titles)
    
    for book_title in book_titles:
        rating = predict_review(user_id, book_title, model, metadata)
        if rating >= thresh:
            book_id = get_book_id(book_title, metadata)
            return get_book_info(book_id, metadata)

In [30]:
for i in range(5):
    print(generate_recommendation(30944, svd, books_metadata))

[{'id': 1820, 'isbn': '61449067', 'authors': 'Michael  Grant', 'title': 'Hunger (Gone, #2)', 'original_title': 'Hunger '}]
[{'id': 9216, 'isbn': '345484088', 'authors': 'Terry Brooks', 'title': "Armageddon's Children (Genesis of Shannara, #1)", 'original_title': "Armageddon's Children"}]
[{'id': 1721, 'isbn': '1586608290', 'authors': 'Oswald Chambers', 'title': 'My Utmost for His Highest', 'original_title': 'My Utmost for His Highest'}]
[{'id': 574, 'isbn': '60899220', 'authors': 'Anthony Bourdain', 'title': 'Kitchen Confidential: Adventures in the Culinary Underbelly', 'original_title': 'Kitchen Confidential: Adventures in the Culinary Underbelly'}]
[{'id': 3739, 'isbn': '312368577', 'authors': "Madeleine L'Engle", 'title': 'Many Waters (A Wrinkle in Time Quintet, #4)', 'original_title': 'Many Waters'}]


In [24]:



ratings_data['user_id'].value_counts()

30944    200
12874    200
12381    199
52036    199
28158    199
        ... 
10351      2
16592      2
24343      2
41314      2
27590      2
Name: user_id, Length: 53424, dtype: int64

In [21]:
ratings_data

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4
...,...,...,...
981751,10000,48386,5
981752,10000,49007,4
981753,10000,49383,5
981754,10000,50124,5
