In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [4]:
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k  

from scipy.sparse import csr_matrix



In [11]:
ratings = pd.read_csv('data/ratings.csv') # Поставленные оценки
books = pd.read_csv('data/books.csv') # Информация о книгах
tags = pd.read_csv('data/tags.csv') # Информация о тегах
book_tags = pd.read_csv('data/book_tags.csv') # Книги с тегами 

In [12]:
book_tags.shape

(999912, 3)

In [13]:
book_tags = book_tags.merge(books[['book_id', 'goodreads_book_id']], how='left')
book_tags[book_tags['goodreads_book_id'] == 5]

Unnamed: 0,goodreads_book_id,tag_id,count,book_id
300,5,11557,40087,18
301,5,11305,39330,18
302,5,8717,17944,18
303,5,33114,12856,18
304,5,30574,11909,18
...,...,...,...,...
395,5,20781,299,18
396,5,32345,298,18
397,5,12600,282,18
398,5,3379,277,18


In [14]:
book_tags = book_tags[book_tags['tag_id'].isin(tags['tag_id'])]
book_tags.shape

(300738, 4)

In [15]:
ratings_matrix = csr_matrix((ratings.rating,(ratings.user_id,ratings.book_id))) 
# Передаём в качестве аргументов в функцию выставленный рейтинг (это будут значения матрицы), 
# а также id пользователя и id книги (это будут индексы для строк и столбцов матрицы)

In [25]:
meta_matrix  = csr_matrix(([1]*len(book_tags),(book_tags.book_id,book_tags.tag_id))) 

In [26]:
ratings_matrix.mean()

0.007086188900997592

In [27]:
model = LightFM(
    loss='warp-kos', # Определяем функцию потерь
    random_state=42, # Фиксируем случайное разбиение
    learning_rate=0.05, # Темп обучения
    no_components=100 # Размерность вектора для представления данных в модели
)

In [28]:
train, test = random_train_test_split(
    ratings_matrix, # Общая выборка
    test_percentage=0.2, # Размер тестовой выборки
    random_state=42 # Генератор случайных чисел
)

In [29]:
model = model.fit(
    train, # Обучающая выборка
    item_features=meta_matrix, # Признаки товаров
    epochs=10, # Количество эпох
    verbose=True # Отображение обучения
)

Epoch: 100%|██████████| 10/10 [06:47<00:00, 40.72s/it]


In [31]:
pres_1 = precision_at_k(model=model, test_interactions=test, item_features = meta_matrix)

In [35]:
np.mean(pres_1)

0.023531597

________________

In [1]:
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model

In [5]:
df = pd.read_csv('data/ratings.csv')

In [9]:
train, test = train_test_split(
    df, # Общая выборка
    test_size=0.2, # Размер тестовой выборки
    random_state=42 # Генератор случайных чисел
)

In [10]:
train.shape

(785404, 3)

In [16]:
n_books = df['book_id'].nunique()
n_books

10000

In [17]:
n_users = df['user_id'].nunique()
n_users

53424

In [18]:
book_input = Input(shape=[1], name="Book-Input")
book_embedding = Embedding(n_books+1, 5, name="Book-Embedding")(book_input)
book_vec = Flatten(name="Flatten-Books")(book_embedding)

In [19]:
user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

In [20]:
conc = Concatenate()([book_vec, user_vec])

In [21]:
fc1 = Dense(128, activation='relu')(conc)
fc2 = Dense(32, activation='relu')(fc1)
out = Dense(1)(fc2)

In [22]:
model2 = Model([user_input, book_input], out)

In [23]:
model2.compile(optimizer = 'adam',loss =  'mean_squared_error')

In [24]:
history = model2.fit([train.user_id, train.book_id], train.rating, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [None]:
fc1 = Dense(128, activation='relu')(conc)
fc2 = Dense(32, activation='relu')(fc1)
fc3 = Dense(8, activation='relu')(fc2)
out = Dense(1)(fc3)

model2 = Model([user_input, book_input], out)
model2.compile('adam', 'mean_squared_error')
result = model2.fit([train.user_id, train.book_id], train.rating, epochs=10, verbose=1)
model2.evaluate([test.user_id, test.book_id], test.rating)