In [10]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from surprise import Dataset, Reader, SVD

libro_ratings = pd.read_csv('data/Books_rating.csv')
libro_data = pd.read_csv('data/Books_data.csv')

print(libro_ratings.head())
print(libro_data.head())

           Id                           Title  Price         User_id  \
0  1882931173  Its Only Art If Its Well Hung!    NaN   AVCGYZL8FQQTD   
1  0826414346        Dr. Seuss: American Icon    NaN  A30TK6U7DNS82R   
2  0826414346        Dr. Seuss: American Icon    NaN  A3UH4UZ4RSVO82   
3  0826414346        Dr. Seuss: American Icon    NaN  A2MVUWT453QH61   
4  0826414346        Dr. Seuss: American Icon    NaN  A22X4XUPKF66MR   

                          profileName review/helpfulness  review/score  \
0               Jim of Oz "jim-of-oz"                7/7           4.0   
1                       Kevin Killian              10/10           5.0   
2                        John Granger              10/11           5.0   
3  Roy E. Perry "amateur philosopher"                7/7           4.0   
4     D. H. Richards "ninthwavestore"                3/3           4.0   

   review/time                                   review/summary  \
0    940636800           Nice collection of Julie Strai

In [11]:
popularidad = libro_ratings[['Title', 'User_id', 'review/score']].copy()
popularidad.dropna(inplace=True)

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(popularidad[['User_id', 'Title', 'review/score']], reader)
trainset = data.build_full_trainset()

entrenamiento = SVD()
entrenamiento.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1fcf15e1520>

In [12]:
tfidf = TfidfVectorizer(stop_words='english')
libro_data['description'] = libro_data['description'].fillna('')
tfidf_matrix = tfidf.fit_transform(libro_data['description'])

nn = NearestNeighbors(metric='cosine', algorithm='brute')
nn.fit(tfidf_matrix)

indices = pd.Series(libro_data.index, index=libro_data['Title']).drop_duplicates()

In [13]:
def recomendacion_de_libros(titulo, entrenamiento, df, libro_data, nn, n=3):
    if titulo not in indices:
        return ["El libro no se encuentra en la base de datos."]
    
    idx = indices[titulo]
    distances, indices_sim = nn.kneighbors(tfidf_matrix[idx], n_neighbors=n+1)
    book_indices = indices_sim[0][1:n+1]
    content_based_recommendations = libro_data['Title'].iloc[book_indices]

    predictions = []
    for title in content_based_recommendations:
        prediction = entrenamiento.predict(0, title)
        predictions.append((title, prediction.est))

    top_n = sorted(predictions, key=lambda x: x[1], reverse=True)[:n]
    recomendacion = [title for title, _ in top_n]
    return recomendacion

In [15]:
titulo = "The Church of Christ: A Biblical Ecclesiology for Today"
recomendacion = recomendacion_de_libros(titulo, entrenamiento, popularidad, libro_data, nn)
print(recomendacion)

['The Church (Contours of Christian Theology)', 'The Church: Our Story: Catholic Tradtion, Mission, and Practice', 'Joseph (People of the Promise)']
