In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# --- 1. Cargar los datos ---
ratings = pd.read_csv('casa_cultura_data/ratings.csv')
copies = pd.read_csv('casa_cultura_data/copies(ejemplares).csv')         # copy_id, book_id
books = pd.read_csv('casa_cultura_data/books.csv', quotechar='"', sep=',', encoding='utf-8', on_bad_lines='skip')           # book_id, title
# Eliminar nulos
ratings.dropna(subset=['user_id', 'copy_id', 'rating'], inplace=True)
copies.dropna(subset=['copy_id', 'book_id'], inplace=True)
books.dropna(subset=['book_id', 'title'], inplace=True)
# Eliminar ratings fuera del rango
ratings = ratings[(ratings['rating'] >= 1) & (ratings['rating'] <= 5)]
# Eliminar duplicados si los hubiera
ratings.drop_duplicates(inplace=True)
copies.drop_duplicates(inplace=True)
books.drop_duplicates(subset='book_id', inplace=True)

In [3]:
ratings.head()

Unnamed: 0,user_id,copy_id,rating
0,1,11,5
1,1,43,4
2,1,44,5
3,1,56,4
4,1,71,3


In [4]:
copies.head()

Unnamed: 0,copy_id,book_id
0,1,1
1,2,1
2,3,1
3,4,1
4,5,2


In [5]:
books.head()

Unnamed: 0,isbn,authors,original_publication_year,original_title,title,language_code,book_id,image_url
0,1554681723.0,Garth Stein,2006.0,The Art of Racing in the Rain,The Art of Racing in the Rain,eng,216,https://images.gr-assets.com/books/1377206302m...
1,,Rainbow Rowell,2013.0,,Fangirl,eng,324,https://images.gr-assets.com/books/1499565420m...
2,679735771.0,Bret Easton Ellis,1991.0,American Psycho,American Psycho,eng,499,https://images.gr-assets.com/books/1436934349m...
3,,Jojo Moyes,2015.0,After You,"After You (Me Before You, #2)",eng,566,https://images.gr-assets.com/books/1429029729m...
4,2266079999.0,Carl Sagan,1985.0,Contact,Contact,eng,1003,https://images.gr-assets.com/books/1408792653m...


In [6]:
# --- 2. Unir ratings con books ---
# Paso 1: Obtener book_id a partir de copy_id
ratings_books = ratings.merge(copies, on='copy_id', how='left')

# Paso 2: Unir con libros para tener títulos
ratings_books = ratings_books.merge(books[['book_id', 'title']], on='book_id', how='left')

In [7]:
# --- 3. Filtrar solo valoraciones positivas (>= 4) ---
ratings_books = ratings_books[ratings_books['rating'] >= 4]

In [8]:
# --- 4. Crear tabla de transacciones usuario-libros ---
# --- FILTRAR POR LIBROS POPULARES Y USUARIOS ACTIVOS ANTES DE CREAR EL BASKET ---
# Filtrar libros con al menos N valoraciones positivas
min_users = 100
libros_filtrados = ratings_books['title'].value_counts()
libros_populares = libros_filtrados[libros_filtrados >= min_users].index

# Filtrar usuarios que han valorado al menos M libros
min_books_rated = 3
user_activity = ratings_books['user_id'].value_counts()
usuarios_activos = user_activity[user_activity >= min_books_rated].index

# Aplicar ambos filtros a ratings_books antes de construir el basket
filtered_ratings = ratings_books[
    ratings_books['title'].isin(libros_populares) &
    ratings_books['user_id'].isin(usuarios_activos)
]
# Crear tabla de transacciones usuario-libros (basket)
basket = filtered_ratings.groupby(['user_id', 'title'])['rating'].count().unstack().fillna(0)
basket = basket > 0  # Convertir a booleano para Apriori

In [9]:
# --- 5. Aplicar Apriori ---
frequent_itemsets = apriori(basket, min_support=0.05, use_colnames=True)

In [10]:
# --- 6. Generar reglas ---
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules = rules.sort_values(by='lift', ascending=False)

In [11]:
# --- 7. Mostrar recomendaciones ---
def recomendar_libros(basado_en_libro, top_n=5):
    basado_en_libro = basado_en_libro.lower()
    recomendados = []

    for _, row in rules.iterrows():
        if basado_en_libro in [x.lower() for x in row['antecedents']]:
            recomendados.extend(row['consequents'])

    # Eliminar duplicados y limitar a N
    recomendados = list(set(recomendados))
    return recomendados[:top_n]

In [12]:
# Ejemplo de uso:
print("Si te gustó 'The Hobbit', también podrías leer:")
print(recomendar_libros('The Hobbit'))

Si te gustó 'The Hobbit', también podrías leer:
['The Two Towers (The Lord of the Rings, #2)', 'Harry Potter and the Goblet of Fire (Harry Potter, #4)', 'Of Mice and Men', 'Harry Potter and the Chamber of Secrets (Harry Potter, #2)', '1984']
