# MovieLens 데이터셋을 활용한 추천 시스템 구축

In [2]:
import networkx as nx
import pandas as pd
import numpy as np

In [4]:
# 1. 데이터 로드
ratings = pd.read_csv('./data/ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv('./data/ml-100k/u.item', sep='|', encoding='latin-1', header=None, names=['item_id', 'title']+[str(i) for i in range(22)])
movies = movies[['item_id', 'title']]

In [5]:
display(ratings.head(), movies.head())

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


Unnamed: 0,item_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [None]:
# 2. 데이터 전처리
ratings = ratings[['user_id', 'item_id', 'rating']]
ratings = ratings.merge(movies, on='item_id')

In [12]:
# 3. 그래프 생성
G = nx.Graph()

# 사용자 노드 추가
user_ids = ratings['user_id'].unique()
G.add_nodes_from(user_ids, node_type='user')

# 아이템 노드 추가
item_ids = ratings['item_id'].unique()
G.add_nodes_from(item_ids, node_type='item')

# 사용자-아이템 엣지 추가 (평점 4 이상인 경우만 연결)
threshold = 4
filtered_ratings = ratings[ratings['rating'] >= threshold]
for idx, row in filtered_ratings.iterrows():
    user = row['user_id']
    item = row['item_id']
    G.add_edge(user, item, edge_type='rated')

In [21]:
# 4. 그래프 기반 추천 알고리즘 적용 (예: Personalized PageRank)
def personalized_pagerank(graph, user, alpha=0.85):
    personalization = {node: 0 for node in graph.nodes()}
    personalization[user] = 1
    pr = nx.pagerank(graph, alpha=alpha, personalization=personalization)
    return pr

# 특정 사용자에 대한 추천
user_id = 196  # 예시 사용자 ID
pr_scores = personalized_pagerank(G, user_id)

# 아이템 노드 중에서 해당 사용자가 아직 평가하지 않은 아이템을 추천
interacted_items = set(ratings[ratings['user_id'] == user_id]['item_id'])
candidate_items = [node for node in G.nodes() if G.nodes[node].get('node_type') == 'item' and node not in interacted_items]
recommendations = sorted(candidate_items, key=lambda x: pr_scores.get(x, 0), reverse=True)[:10]

# 추천 결과 출력
recommended_movies = movies[movies['item_id'].isin(recommendations)]
print('Recommendations for user {}:'.format(user_id))
print(recommended_movies['title'].tolist())

Recommendations for user 196:
['Toy Story (1995)', 'Twelve Monkeys (1995)', 'Star Wars (1977)', 'Shawshank Redemption, The (1994)', 'Willy Wonka and the Chocolate Factory (1971)', 'Raiders of the Lost Ark (1981)', 'Dead Poets Society (1989)', 'Leaving Las Vegas (1995)', 'Star Trek V: The Final Frontier (1989)', 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)']
