In [3]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Пример данных фильмов с описаниями и оценками пользователей
movie_data = {'title': ['Inception', 'The Matrix', 'Interstellar', 'The Dark Knight', 'Pulp Fiction'],
              'description': [
                  'A thief who steals corporate secrets through the use of dream-sharing technology.',
                  'A computer hacker learns from mysterious rebels about the true nature of his reality.',
                  'A team of explorers travel through a wormhole in space in an attempt to ensure humanity\'s survival.',
                  'When the menace known as the Joker emerges, Batman must accept one of the greatest psychological tests.',
                  'The lives of two mob hitmen, a boxer, and a gangster intertwine in a series of incidents.'
              ]}

# Пример оценок пользователей
rating_data = {'user_id': [1, 1, 2, 2, 3, 3, 4, 4, 5],
               'movie_id': [1, 2, 1, 3, 2, 4, 3, 5, 4],
               'rating': [5, 4, 5, 3, 2, 4, 3, 5, 4]}

# 1. Контентная фильтрация: векторизация текстов (описаний фильмов) с использованием TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movie_data['description'])

# Косинусное сходство между фильмами
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Функция для рекомендаций на основе контентного сходства
def content_based_recommendations(title, cosine_sim=cosine_sim, movie_data=movie_data):
    idx = movie_data['title'].index(title)
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    movie_indices = [i[0] for i in sim_scores]
    return [movie_data['title'][i] for i in movie_indices]

# 2. Коллаборативная фильтрация: работа с оценками пользователей
# Подготавливаем данные для библиотеки Surprise
reader = Reader(rating_scale=(1, 5))
ratings_df = pd.DataFrame(rating_data)
data = Dataset.load_from_df(ratings_df[['user_id', 'movie_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

# Модель коллаборативной фильтрации (SVD)
svd = SVD()
svd.fit(trainset)
predictions = svd.test(testset)
accuracy.rmse(predictions)

# Функция для коллаборативных рекомендаций на основе предсказаний
def collaborative_recommendations(user_id, svd_model=svd, top_n=5):
    # Предположим, что у нас всего 5 фильмов
    movie_ids = range(1, 6)
    user_ratings = []
    
    # Предсказываем рейтинг пользователя для каждого фильма
    for movie_id in movie_ids:
        user_ratings.append((movie_id, svd_model.predict(user_id, movie_id).est))
    
    # Сортируем фильмы по предполагаемым оценкам
    user_ratings = sorted(user_ratings, key=lambda x: x[1], reverse=True)
    return [movie_data['title'][i[0] - 1] for i in user_ratings[:top_n]]

# 3. Комбинирование результатов от обеих моделей
def hybrid_recommendations(user_id, movie_title):
    # Рекомендации от контентной модели
    content_recs = content_based_recommendations(movie_title)
    
    # Рекомендации от коллаборативной модели
    collaborative_recs = collaborative_recommendations(user_id)
    
    # Усредняем оценки двух подходов
    final_recs = set(content_recs + collaborative_recs)  # Объединение рекомендаций
    return final_recs

# Пример использования гибридной модели
user_id = 1
movie_title = 'Inception'
recommendations = hybrid_recommendations(user_id, movie_title)
print("Гибридные рекомендации:", recommendations)


ModuleNotFoundError: No module named 'surprise'

In [5]:
pip install scikit-surprise


Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for scikit-surprise [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[75 lines of output][0m
  [31m   [0m running bdist_wheel
  [31m   [0m running build
  [31m   [0m running build_py
  [31m   [0m creating build/lib.linux-x86_64-cpython-311/surprise
  [31m   [0m copying surprise/__init__.py -> build/lib.linux-x86_64-cpython-311/surprise
  [31m   [0m copying 