In [25]:
import pandas as pd
import numpy as np
# 간단한 영화 데이터 생성
movie_data = {
    'movie_id': [101, 102, 103, 104, 105],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
    'genre': ['Action', 'Comedy', 'Action', 'Comedy', 'Drama']
}

user_ratings = {
    'user_id': [1, 1, 1, 2, 2],
    'movie_id': [101, 102, 103, 104, 105],
    'rating': [5, 3, 4, 4, 5]
}

movie_df = pd.DataFrame(movie_data)
user_ratings_df = pd.DataFrame(user_ratings)

# 장르를 one-hot encoding하여 콘텐츠 기반 행렬 생성
movie_df['genre_encoded'] = pd.get_dummies(movie_df['genre']).values.tolist()

# 특정 사용자가 평가한 영화의 장르 벡터를 가져옴
target_user_ratings = user_ratings_df[user_ratings_df['user_id'] == 1]
print("===========================================================================================")
print("target_user_ratings\n\n",target_user_ratings)
print("===========================================================================================")
rated_movies = movie_df[movie_df['movie_id'].isin(target_user_ratings['movie_id'])]
print("rated_movies\n\n",rated_movies)
print("===========================================================================================")
user_genre_profile = np.mean(np.array(rated_movies['genre_encoded'].tolist()), axis=0)
print(f"user_genre_profile = {user_genre_profile}")
print("===========================================================================================")
# 나머지 영화와의 유사도 계산
movie_df['similarity'] = movie_df['genre_encoded'].apply(lambda x: np.dot(user_genre_profile, x))       # 내적
print(f"movie_df['genre_encoded']  \n\n{movie_df['genre_encoded']}")
print("===========================================================================================")
print(f"movie_df['similarity']  \n\n{movie_df['similarity']}")
print("===========================================================================================")
# 추천할 영화 선택 (사용자가 보지 않은 영화)
recommendations = movie_df[~movie_df['movie_id'].isin(target_user_ratings['movie_id'])].sort_values(by='similarity', ascending=False)
print(f"Recommendations for user 1 based on content: {recommendations[['title', 'similarity']]}")

target_user_ratings

    user_id  movie_id  rating
0        1       101       5
1        1       102       3
2        1       103       4
rated_movies

    movie_id    title   genre         genre_encoded
0       101  Movie A  Action  [True, False, False]
1       102  Movie B  Comedy  [False, True, False]
2       103  Movie C  Action  [True, False, False]
user_genre_profile = [0.66666667 0.33333333 0.        ]
movie_df['genre_encoded']  

0    [True, False, False]
1    [False, True, False]
2    [True, False, False]
3    [False, True, False]
4    [False, False, True]
Name: genre_encoded, dtype: object
movie_df['similarity']  

0    0.666667
1    0.333333
2    0.666667
3    0.333333
4    0.000000
Name: similarity, dtype: float64
Recommendations for user 1 based on content:      title  similarity
3  Movie D    0.333333
4  Movie E    0.000000


In [34]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 간단한 사용자-아이템 평점 데이터 생성
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 4, 4, 4],
    'item_id': [101, 102, 103, 101, 104, 101, 102, 103, 104, 105],
    'rating': [5, 3, 4, 4, 5, 3, 4, 2, 4, 5]
}

df = pd.DataFrame(data)
user_item_matrix = df.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
print("===========================================================================================")
print("user_item_matrix\n\n",user_item_matrix)
# 사용자 간 유사도 계산 (코사인 유사도)
user_similarity = cosine_similarity(user_item_matrix)
print("===========================================================================================")
print("user_similarity\n\n",user_similarity)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)


# 특정 사용자의 추천 아이템 (예: user_id=1)
target_user = 1
similar_users = user_similarity_df[target_user].sort_values(ascending=False).index[1:]

print("===========================================================================================")
print("similar_users\n\n",similar_users)

# 유사한 사용자의 아이템 중에서, target_user가 평가한 아이템 구해두었다가 나중에 제외할 것
items_rated_by_target = user_item_matrix.loc[target_user, user_item_matrix.loc[target_user] > 0].index

print("===========================================================================================")
print("items_rated_by_target\n\n",items_rated_by_target)

recommendations = []
for user in similar_users:
    items = user_item_matrix.loc[user, user_item_matrix.loc[user] > 0].index
    new_recommendations = [item for item in items if item not in items_rated_by_target]
    recommendations.extend(new_recommendations)
    if len(recommendations) >= 2:
        break

print(f"Recommendations for user {target_user}: {set(recommendations)}")

user_item_matrix

 item_id  101  102  103  104  105
user_id                         
1        5.0  3.0  4.0  0.0  0.0
2        4.0  0.0  0.0  5.0  0.0
3        3.0  4.0  0.0  0.0  0.0
4        0.0  0.0  2.0  4.0  5.0
user_similarity

 [[1.         0.4417261  0.76367532 0.16865481]
 [0.4417261  1.         0.37481703 0.4656202 ]
 [0.76367532 0.37481703 1.         0.        ]
 [0.16865481 0.4656202  0.         1.        ]]
similar_users

 Index([3, 2, 4], dtype='int64', name='user_id')
items_rated_by_target

 Index([101, 102, 103], dtype='int64', name='item_id')
Recommendations for user 1: {104, 105}


In [32]:
# 협업 필터링을 통한 추천 결과 (간단한 예시)
collaborative_recommendations = [101, 104]

# 콘텐츠 기반 필터링을 통한 추천 결과
content_recommendations = [105, 102]

# 하이브리드 추천 (가중치 결합)
final_recommendations = list(set(collaborative_recommendations + content_recommendations))
print(f"Hybrid Recommendations: {final_recommendations}")

Hybrid Recommendations: [104, 105, 101, 102]
