In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF


In [2]:
ratings = pd.read_csv('ml-latest-small/ratings.csv')
movies = pd.read_csv('ml-latest-small/movies.csv')

all_movies = movies['movieId'].unique()

user_movie_matrix = ratings.pivot_table(
    index='userId',
    columns='movieId',
    values='rating',
    fill_value=0
)

for movie_id in all_movies:
    if movie_id not in user_movie_matrix.columns:
        user_movie_matrix[movie_id] = 0

user_movie_matrix = user_movie_matrix.reindex(columns=all_movies, fill_value=0)

print(user_movie_matrix)

movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
...         ...     ...     ...     ...     ...     ...     ...     ...   
606         2.5     0.0     0.0     0.0     0.0     0.0     2.5     0.0   
607         4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
608         2.5     2.0     2.0     0.0     0.0     0.0     0.0     0.0   
609         3.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
610         5.0     0.0     0.0     0.0     0.0     5.0     0.0     0.0   

movieId  9       10     

In [5]:
# NMF
nmf = NMF(n_components=20, random_state=42, max_iter=500)
nmf_matrix = nmf.fit_transform(user_movie_matrix)  
nmf_components = nmf.components_ 



In [6]:
nmf_pred = np.dot(nmf_matrix, nmf_components)

In [7]:
def recommend_movies_nmf(user_id, n_recommendations=10):
    user_idx = user_movie_matrix.index.get_loc(user_id)
    user_ratings = nmf_pred[user_idx]  

    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    watched_movies = movies[movies['movieId'].isin(rated_movies)][['movieId', 'title']]

    movie_idx = [i for i, movie in enumerate(user_movie_matrix.columns) if movie not in rated_movies]

    recommended_movie_indices = np.argsort(user_ratings[movie_idx])[-n_recommendations:]
    recommended_movie_ids = [user_movie_matrix.columns[i] for i in recommended_movie_indices]
    recommendations = movies[movies['movieId'].isin(recommended_movie_ids)][['movieId', 'title']]

    return watched_movies, recommendations

In [8]:
user_id = 1  
watched_movies, recommendations = recommend_movies_nmf(user_id)

print(f"Фильмы, которые пользователь {user_id} уже посмотрел:")
print(watched_movies, '\n')

print(f"Рекомендации для пользователя {user_id}:")
print(recommendations)

Фильмы, которые пользователь 1 уже посмотрел:
      movieId                           title
0           1                Toy Story (1995)
2           3         Grumpier Old Men (1995)
5           6                     Heat (1995)
43         47     Seven (a.k.a. Se7en) (1995)
46         50      Usual Suspects, The (1995)
...       ...                             ...
2802     3744                    Shaft (2000)
2836     3793                    X-Men (2000)
2847     3809          What About Bob? (1991)
2991     4006  Transformers: The Movie (1986)
3673     5060    M*A*S*H (a.k.a. MASH) (1970)

[232 rows x 2 columns] 

Рекомендации для пользователя 1:
      movieId                                  title
475       542                      Son in Law (1993)
615       780   Independence Day (a.k.a. ID4) (1996)
736       956                  Penny Serenade (1941)
831      1092                  Basic Instinct (1992)
874      1163                 Mina Tannenbaum (1994)
952      1253  Day the Ea

In [None]:
# 2. Разделим данные на обучающую и тестовую выборки
train_data, test_data = train_test_split(user_movie_matrix, test_size=0.2, random_state=42)

# 3. Применяем NMF для матричной факторизации
nmf = NMF(n_components=20, random_state=42)
nmf_matrix = nmf.fit_transform(train_data)

# 4. Прогнозируем значения на тестовой выборке
nmf_pred = np.dot(nmf_matrix, nmf.components_)

# 5. Оценим качество предсказания (например, с использованием MSE)
mse = mean_squared_error(test_data.values, nmf_pred)
print(f'Mean Squared Error (NMF): {mse}')

# 6. Функция для получения рекомендаций
def recommend_movies_nmf(user_id, n_recommendations=10):
    user_idx = train_data.index.get_loc(user_id)
    user_ratings = nmf_pred[user_idx]
    
    # Сортируем по убыванию значений предсказанных рейтингов и исключаем уже просмотренные фильмы
    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    movie_idx = [i for i, movie in enumerate(user_movie_matrix.columns) if movie not in rated_movies]
    
    recommended_movie_indices = np.argsort(user_ratings[movie_idx])[-n_recommendations:]
    recommended_movie_ids = [user_movie_matrix.columns[i] for i in recommended_movie_indices]
    
    return movies[movies['movieId'].isin(recommended_movie_ids)][['movieId', 'title']]

# Пример рекомендации для пользователя
user_id = 1
recommendations = recommend_movies_nmf(user_id)
print(f"Рекомендации для пользователя {user_id}:")
print(recommendations)


