<a href="https://colab.research.google.com/github/VyacheslavGusev/Rec_sys/blob/main/Hybrid_Rec_sys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

In [11]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [12]:
movies_with_ratings = pd.merge(movies, ratings, on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [13]:
movies_with_ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [14]:
user_movie_ratings = movies_with_ratings.pivot_table(index='userId', columns='title', values='rating').fillna(0)

In [23]:
user_movie_ratings.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# Реализуем коллаборативную фильтрацию с помощью метода ближайших соседей
knn_model = NearestNeighbors(metric='minkowski', algorithm='ball_tree')
knn_model.fit(user_movie_ratings)

In [25]:
# Реализуем контентную фильтрацию на основе жанров фильмов
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(movies['genres'])
content_similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [26]:
def get_hybrid_recommendations(user_id, num_recommendations=5):
    user_ratings = user_movie_ratings.loc[user_id].values.reshape(1, -1)

    # Получение ближайших соседей пользователя
    _, user_indices = knn_model.kneighbors(user_ratings, n_neighbors=5)

    # Объединение рекомендаций коллаборативной и контентной фильтрации
    recommendations = set()
    for idx in user_indices.flatten():
        similar_movies = list(movies_with_ratings['title'][movies_with_ratings['userId'] == idx])
        recommendations.update(similar_movies)

    similar_movies = list(movies_with_ratings['title'].unique())
    for movie in user_movie_ratings.columns:
        if movie not in recommendations:
            similar_movies.append(movie)

    user_profile = movies_with_ratings[movies_with_ratings['userId'] == user_id]['genres'].values
    user_similarity = cosine_similarity(tfidf.transform(user_profile), tfidf_matrix).flatten()
    similar_movies = [(movie, score) for movie, score in zip(similar_movies, user_similarity)]
    similar_movies.sort(key=lambda x: x[1], reverse=True)

    return [movie[0] for movie in similar_movies[:num_recommendations]]


Попробуем применить модель для рекомендаций для пользователей

In [27]:
user_id = 123
recommendations = get_hybrid_recommendations(user_id)
print("Рекомендации для пользователя", user_id, ":")
for i, movie in enumerate(recommendations, 1):
    print(i, ".", movie)

Рекомендации для пользователя 123 :
1 . Seven (a.k.a. Se7en) (1995)
2 . Just Cause (1995)
3 . Underneath (1995)
4 . True Crime (1996)
5 . Rear Window (1954)


