In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns

# Carregar os dados
!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip -d movielens_data

# Carregar os arquivos CSV
ratings = pd.read_csv('movielens_data/ml-latest-small/ratings.csv')
movies = pd.read_csv('movielens_data/ml-latest-small/movies.csv')

# Exibir as primeiras linhas dos dados
print(ratings.head())
print(movies.head())


--2024-10-04 22:55:07--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2024-10-04 22:55:08 (3.23 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]

Archive:  ml-latest-small.zip
   creating: movielens_data/ml-latest-small/
  inflating: movielens_data/ml-latest-small/links.csv  
  inflating: movielens_data/ml-latest-small/tags.csv  
  inflating: movielens_data/ml-latest-small/ratings.csv  
  inflating: movielens_data/ml-latest-small/README.txt  
  inflating: movielens_data/ml-latest-small/movies.csv  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815

In [5]:
# Criar a matriz de utilidade
utility_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Exibir a matriz de utilidade
print(utility_matrix.head())



movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \
userId                   ...                                                   
1           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
2           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0

In [6]:
def recommend_movies_user_based(user_id, num_recommendations=5):
    user_similarity = cosine_similarity(utility_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=utility_matrix.index, columns=utility_matrix.index)

    similar_users = user_similarity_df[user_id].sort_values(ascending=False).iloc[1:]  # Ignorar o próprio usuário
    similar_users_indices = similar_users.index

    similar_users_ratings = utility_matrix.loc[similar_users_indices]

    recommendations = similar_users_ratings.mean(axis=0).sort_values(ascending=False)
    recommended_movies = recommendations[recommendations > 0].index.tolist()[:num_recommendations]

    return movies[movies['movieId'].isin(recommended_movies)]

# Exemplo de recomendação para um usuário
user_id = 1
print("Recomendações Baseadas em Usuário:")
print(recommend_movies_user_based(user_id))


Recomendações Baseadas em Usuário:
      movieId                             title                       genres
257       296               Pulp Fiction (1994)  Comedy|Crime|Drama|Thriller
277       318  Shawshank Redemption, The (1994)                  Crime|Drama
314       356               Forrest Gump (1994)     Comedy|Drama|Romance|War
510       593  Silence of the Lambs, The (1991)        Crime|Horror|Thriller
1939     2571                Matrix, The (1999)       Action|Sci-Fi|Thriller


In [7]:
def recommend_movies_item_based(movie_id, num_recommendations=5):
    item_similarity = cosine_similarity(utility_matrix.T)
    item_similarity_df = pd.DataFrame(item_similarity, index=utility_matrix.columns, columns=utility_matrix.columns)

    similar_items = item_similarity_df[movie_id].sort_values(ascending=False).iloc[1:]  # Ignorar o próprio item
    recommended_movies = similar_items.index.tolist()[:num_recommendations]

    return movies[movies['movieId'].isin(recommended_movies)]

# Exemplo de recomendação para um filme
movie_id = 1  # "Toy Story (1995)"
print("Recomendações Baseadas em Item:")
print(recommend_movies_item_based(movie_id))


Recomendações Baseadas em Item:
      movieId                                      title  \
224       260  Star Wars: Episode IV - A New Hope (1977)   
314       356                        Forrest Gump (1994)   
418       480                       Jurassic Park (1993)   
615       780       Independence Day (a.k.a. ID4) (1996)   
2355     3114                         Toy Story 2 (1999)   

                                           genres  
224                       Action|Adventure|Sci-Fi  
314                      Comedy|Drama|Romance|War  
418              Action|Adventure|Sci-Fi|Thriller  
615              Action|Adventure|Sci-Fi|Thriller  
2355  Adventure|Animation|Children|Comedy|Fantasy  


In [8]:
def recommend_movies_ensemble(user_id, movie_id, num_recommendations=5):
    user_recommendations = recommend_movies_user_based(user_id, num_recommendations)
    item_recommendations = recommend_movies_item_based(movie_id, num_recommendations)

    # Combinar as recomendações
    combined_recommendations = pd.Series(user_recommendations['movieId'].tolist() + item_recommendations['movieId'].tolist()).value_counts()

    # Obter as top N recomendações
    final_recommendations = combined_recommendations.index.tolist()[:num_recommendations]

    return movies[movies['movieId'].isin(final_recommendations)]

# Exemplo de recomendação Ensemble
print("Recomendações Ensemble:")
print(recommend_movies_ensemble(user_id, movie_id))


Recomendações Ensemble:
      movieId                             title                       genres
257       296               Pulp Fiction (1994)  Comedy|Crime|Drama|Thriller
277       318  Shawshank Redemption, The (1994)                  Crime|Drama
314       356               Forrest Gump (1994)     Comedy|Drama|Romance|War
510       593  Silence of the Lambs, The (1991)        Crime|Horror|Thriller
1939     2571                Matrix, The (1999)       Action|Sci-Fi|Thriller


In [9]:
from sklearn.model_selection import train_test_split

# Dividir os dados em treino e teste
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)

# Criar a matriz de utilidade para o conjunto de treino
utility_matrix_train = train_data.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Criar a matriz de utilidade para o conjunto de teste
utility_matrix_test = test_data.pivot(index='userId', columns='movieId', values='rating').fillna(0)


In [10]:
def recommend_movies_user_based(user_id, num_recommendations=5, utility_matrix=utility_matrix_train):
    user_similarity = cosine_similarity(utility_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=utility_matrix.index, columns=utility_matrix.index)

    similar_users = user_similarity_df[user_id].sort_values(ascending=False).iloc[1:]  # Ignorar o próprio usuário
    similar_users_indices = similar_users.index

    similar_users_ratings = utility_matrix.loc[similar_users_indices]

    recommendations = similar_users_ratings.mean(axis=0).sort_values(ascending=False)
    recommended_movies = recommendations[recommendations > 0].index.tolist()[:num_recommendations]

    return recommended_movies

def recommend_movies_item_based(movie_id, num_recommendations=5, utility_matrix=utility_matrix_train):
    item_similarity = cosine_similarity(utility_matrix.T)
    item_similarity_df = pd.DataFrame(item_similarity, index=utility_matrix.columns, columns=utility_matrix.columns)

    similar_items = item_similarity_df[movie_id].sort_values(ascending=False).iloc[1:]  # Ignorar o próprio item
    recommended_movies = similar_items.index.tolist()[:num_recommendations]

    return recommended_movies

def recommend_movies_ensemble(user_id, movie_id, num_recommendations=5):
    user_recommendations = recommend_movies_user_based(user_id, num_recommendations)
    item_recommendations = recommend_movies_item_based(movie_id, num_recommendations)

    combined_recommendations = pd.Series(user_recommendations + item_recommendations).value_counts()
    final_recommendations = combined_recommendations.index.tolist()[:num_recommendations]

    return final_recommendations


In [11]:
def evaluate_recommendations(num_recommendations=5):
    hits = 0
    total_recommendations = 0

    for user_id in utility_matrix_test.index:
        # Verificar quais filmes foram realmente avaliados pelo usuário no conjunto de teste
        true_movies = utility_matrix_test.loc[user_id][utility_matrix_test.loc[user_id] > 0].index.tolist()

        # Se não houver filmes avaliados, pular o usuário
        if not true_movies:
            continue

        # Fazer recomendações usando o modelo
        recommended_movies = recommend_movies_ensemble(user_id, movie_id=1, num_recommendations=num_recommendations)

        # Contar acertos
        hits += len(set(recommended_movies) & set(true_movies))
        total_recommendations += num_recommendations

    # Calcular a precisão
    precision = hits / total_recommendations if total_recommendations > 0 else 0
    return precision

precision_score = evaluate_recommendations(num_recommendations=5)
print(f"Acurácia (Precisão) do Modelo: {precision_score:.4f}")


KeyboardInterrupt: 