# Задание к теме «Гибридные рекомендательные системы»

Что делать?

Датасет ml-latest

Вспомнить подходы, которые мы разбирали

Выбрать понравившийся подход к гибридным системам

Написать свою

In [1]:
from surprise import Dataset
from surprise import SVD
from surprise import KNNWithMeans
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

import pandas as pd

In [2]:
movies = pd.read_csv('../1_Введение и классификация рекомендательных систем/movies.csv')
ratings = pd.read_csv('../1_Введение и классификация рекомендательных систем/ratings.csv')

In [3]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [4]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})

In [5]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [6]:
trainset, testset = train_test_split(data, test_size=.15, random_state=42)

# алгоритм SVD

In [7]:
%%time
algo = SVD(n_factors=20, n_epochs=20)
algo.fit(trainset)

Wall time: 2.69 s


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x22b90054460>

In [9]:
current_user_id = 2.0
user_movies = movies_with_ratings[(movies_with_ratings.userId == current_user_id) & (movies_with_ratings.rating >= 4.5)].title.unique()

scores = []
titles = []

for movie in movies_with_ratings.title.unique():
    if movie in user_movies:
        continue
    estimation = algo.predict(uid=current_user_id, iid=movie).est
    if estimation >= 4:
        scores.append(estimation)
        titles.append(movie)

In [10]:
recom_SVD = pd.DataFrame({'titles': titles, 'scores': scores})
recom_SVD.sort_values('scores', ascending=False)

Unnamed: 0,titles,scores
48,"Streetcar Named Desire, A (1951)",4.382404
59,Lawrence of Arabia (1962),4.365447
93,Cool Hand Luke (1967),4.347261
26,Dr. Strangelove or: How I Learned to Stop Worr...,4.319528
161,"Lord of the Rings: The Fellowship of the Ring,...",4.318722
...,...,...
142,Double Indemnity (1944),4.002062
188,Harry Potter and the Prisoner of Azkaban (2004),4.001084
8,Heavenly Creatures (1994),4.000586
149,Gladiator (2000),4.000426


# алгоритм KNNWithMeans 

In [11]:
algo2 = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo2.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x22b90da3f70>

In [12]:
scores = []
titles = []

for movie in movies_with_ratings.title.unique():
    if movie in user_movies:
        continue
    estimation = algo2.predict(uid=current_user_id, iid=movie).est
    if estimation >= 4:
        scores.append(estimation)
        titles.append(movie)

# Гибридная рекомендательная система

In [13]:
recom_KNN = pd.DataFrame({'titles': titles, 'scores': scores})
recom_KNN.sort_values('scores', ascending=False)

Unnamed: 0,titles,scores
1202,Dune (2000),5.000000
783,"Legend of Drunken Master, The (Jui kuen II) (1...",5.000000
258,Stalker (1979),5.000000
1787,13 Assassins (Jûsan-nin no shikaku) (2010),5.000000
1778,Emma (2009),5.000000
...,...,...
104,Sliver (1993),4.002198
209,E.T. the Extra-Terrestrial (1982),4.001777
1504,Flushed Away (2006),4.001472
126,James and the Giant Peach (1996),4.001154


In [14]:
# соединяем рекомендации по двум алгоритмам, причем возьмем только общие
recom_hybrid = recom_KNN.merge(recom_SVD, on='titles')
recom_hybrid

Unnamed: 0,titles,scores_x,scores_y
0,Casino (1995),4.255229,4.115316
1,"City of Lost Children, The (Cité des enfants p...",4.573885,4.091203
2,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),4.168820,4.051618
3,"Usual Suspects, The (1995)",4.123757,4.144446
4,Bottle Rocket (1996),4.894162,4.009032
...,...,...,...
201,Untitled Spider-Man Reboot (2017),4.879492,4.088350
202,Spotlight (2015),4.371084,4.065076
203,Planet Earth (2006),4.820853,4.002551
204,Logan (2017),4.801976,4.209087


In [15]:
# усредним оценку по обоим алгоритмам
recom_hybrid['hybrid_score'] = (recom_hybrid['scores_x'] + recom_hybrid['scores_y'])/2
recom_hybrid.sort_values('hybrid_score', ascending=False)

Unnamed: 0,titles,scores_x,scores_y,hybrid_score
41,"Streetcar Named Desire, A (1951)",4.859958,4.382404,4.621181
124,Guess Who's Coming to Dinner (1967),4.968083,4.260743,4.614413
125,"Hustler, The (1961)",5.000000,4.187470,4.593735
96,"Sweet Hereafter, The (1997)",5.000000,4.182300,4.591150
36,To Catch a Thief (1955),5.000000,4.178950,4.589475
...,...,...,...,...
171,Serenity (2005),4.049757,4.017932,4.033845
104,"Untouchables, The (1987)",4.042248,4.017512,4.029880
13,"Fugitive, The (1993)",4.040585,4.017945,4.029265
173,V for Vendetta (2006),4.022304,4.002505,4.012404
