Sistemas de Recomendação: Filtragem Colaborativa
o	Construa um sistema de recomendação baseado em filtragem colaborativa usando um conjunto de dados de avaliações de filmes. Utilize tanto a abordagem baseada em usuário quanto a baseada em item.


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# 1. Carregar o conjunto de dados MovieLens
# Usando um subconjunto de MovieLens 100K para simplicidade
url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.data'
columns = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv(url, sep='\t', names=columns)

In [4]:
# 2. Pré-processamento
# Criar uma matriz de avaliações
ratings_matrix = ratings.pivot_table(index='user_id', columns='item_id', values='rating')

In [5]:
# Preencher NaN com 0 para que possamos calcular similaridade
ratings_matrix.fillna(0, inplace=True)

In [6]:
# 3. Filtragem Colaborativa Baseada em Usuário
# Calcular similaridade entre usuários
user_similarity = cosine_similarity(ratings_matrix)

In [10]:
# Criar um DataFrame de similaridade de usuários
user_similarity_df = pd.DataFrame(user_similarity, index=ratings_matrix.index, columns=ratings_matrix.index)
display(user_similarity_df)

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.166931,0.047460,0.064358,0.378475,0.430239,0.440367,0.319072,0.078138,0.376544,...,0.369527,0.119482,0.274876,0.189705,0.197326,0.118095,0.314072,0.148617,0.179508,0.398175
2,0.166931,1.000000,0.110591,0.178121,0.072979,0.245843,0.107328,0.103344,0.161048,0.159862,...,0.156986,0.307942,0.358789,0.424046,0.319889,0.228583,0.226790,0.161485,0.172268,0.105798
3,0.047460,0.110591,1.000000,0.344151,0.021245,0.072415,0.066137,0.083060,0.061040,0.065151,...,0.031875,0.042753,0.163829,0.069038,0.124245,0.026271,0.161890,0.101243,0.133416,0.026556
4,0.064358,0.178121,0.344151,1.000000,0.031804,0.068044,0.091230,0.188060,0.101284,0.060859,...,0.052107,0.036784,0.133115,0.193471,0.146058,0.030138,0.196858,0.152041,0.170086,0.058752
5,0.378475,0.072979,0.021245,0.031804,1.000000,0.237286,0.373600,0.248930,0.056847,0.201427,...,0.338794,0.080580,0.094924,0.079779,0.148607,0.071459,0.239955,0.139595,0.152497,0.313941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.118095,0.228583,0.026271,0.030138,0.071459,0.111852,0.107027,0.095898,0.039852,0.071460,...,0.066039,0.431154,0.258021,0.226449,0.432666,1.000000,0.087687,0.180029,0.043264,0.144250
940,0.314072,0.226790,0.161890,0.196858,0.239955,0.352449,0.329925,0.246883,0.120495,0.342961,...,0.327153,0.107024,0.187536,0.181317,0.175158,0.087687,1.000000,0.145152,0.261376,0.241028
941,0.148617,0.161485,0.101243,0.152041,0.139595,0.144446,0.059993,0.146145,0.143245,0.090305,...,0.046952,0.203301,0.288318,0.234211,0.313400,0.180029,0.145152,1.000000,0.101642,0.095120
942,0.179508,0.172268,0.133416,0.170086,0.152497,0.317328,0.282003,0.175322,0.092497,0.212330,...,0.226440,0.073513,0.089588,0.129554,0.099385,0.043264,0.261376,0.101642,1.000000,0.182465


In [8]:
# 4. Filtragem Colaborativa Baseada em Item
# Calcular similaridade entre itens
item_similarity = cosine_similarity(ratings_matrix.T)

In [12]:
# Criar um DataFrame de similaridade de itens
item_similarity_df = pd.DataFrame(item_similarity, index=ratings_matrix.columns, columns=ratings_matrix.columns)
display(item_similarity_df)

item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.402382,0.330245,0.454938,0.286714,0.116344,0.620979,0.481114,0.496288,0.273935,...,0.035387,0.0,0.000000,0.000000,0.035387,0.0,0.0,0.0,0.047183,0.047183
2,0.402382,1.000000,0.273069,0.502571,0.318836,0.083563,0.383403,0.337002,0.255252,0.171082,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.078299,0.078299
3,0.330245,0.273069,1.000000,0.324866,0.212957,0.106722,0.372921,0.200794,0.273669,0.158104,...,0.000000,0.0,0.000000,0.000000,0.032292,0.0,0.0,0.0,0.000000,0.096875
4,0.454938,0.502571,0.324866,1.000000,0.334239,0.090308,0.489283,0.490236,0.419044,0.252561,...,0.000000,0.0,0.094022,0.094022,0.037609,0.0,0.0,0.0,0.056413,0.075218
5,0.286714,0.318836,0.212957,0.334239,1.000000,0.037299,0.334769,0.259161,0.272448,0.055453,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.094211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,1.0,1.0,1.0,0.000000,0.000000
1679,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,1.0,1.0,1.0,0.000000,0.000000
1680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,1.0,1.0,1.0,0.000000,0.000000
1681,0.047183,0.078299,0.000000,0.056413,0.000000,0.000000,0.051498,0.082033,0.057360,0.000000,...,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,1.000000,0.000000


In [13]:
# Função para recomendar filmes baseados em filtragem colaborativa
def get_user_recommendations(user_id, num_recommendations=5):
    # Obter as classificações do usuário
    user_ratings = ratings_matrix.loc[user_id]
    
    # Obter similaridade do usuário com outros usuários
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:]  # Exclui o próprio usuário
    
    # Calcular a pontuação para cada filme não avaliado pelo usuário
    scores = {}
    for other_user, similarity in similar_users.items():
        other_user_ratings = ratings_matrix.loc[other_user]
        for movie_id, rating in other_user_ratings.items():
            if user_ratings[movie_id] == 0:  # Filme não avaliado pelo usuário
                if movie_id not in scores:
                    scores[movie_id] = 0
                scores[movie_id] += similarity * rating
    
    # Ordenar filmes por pontuação e retornar os mais recomendados
    recommended_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:num_recommendations]
    
    return recommended_movies

In [14]:
def get_item_recommendations(movie_id, num_recommendations=5):
    # Obter similaridade do item
    similar_items = item_similarity_df[movie_id].sort_values(ascending=False)[1:]  # Exclui o próprio item
    
    # Retornar os mais recomendados
    return similar_items.index[:num_recommendations].tolist()

# Testar as recomendações
user_id = 1  # Altere o ID do usuário conforme necessário
user_recommendations = get_user_recommendations(user_id)
item_recommendations = get_item_recommendations(1)  # Altere o ID do filme conforme necessário

In [15]:
print(f"Recomendações para o usuário {user_id}:")
print(user_recommendations)

print(f"\nRecomendações de filmes semelhantes para o filme ID 1:")
print(item_recommendations)

Recomendações para o usuário 1:
[(318, 472.05529036884326), (423, 433.9720062420637), (357, 415.6271777986989), (286, 404.10289939530526), (288, 393.35046489592884)]

Recomendações de filmes semelhantes para o filme ID 1:
[50, 181, 121, 117, 405]
