# Train 데이터 불러오기

In [1]:
import numpy as np
import pandas as pd
import scipy

df = pd.read_csv('../data/train_data_by_time.csv')
df.movie_title = df.movie_title.str[:-1]

# Implicit 라이브러리를 활용한 ALS 모델 학습

In [2]:
from implicit.evaluation import *
from implicit.als import AlternatingLeastSquares as ALS
from implicit.bpr import BayesianPersonalizedRanking as BPR

users = list(np.sort(df['user_id'].unique()))
movies = list(df['movie_title'].unique())

rows = df['user_id'].astype('category').cat.codes
cols = df['movie_title'].astype('category').cat.codes
rating_matrix = scipy.sparse.csr_matrix((df['rating'], (rows, cols)))

R = scipy.sparse.csr_matrix.toarray(rating_matrix)

als_model = ALS(factors=20, regularization=0.01, iterations=10)
als_model.fit(rating_matrix)

result = np.dot(als_model.user_factors, als_model.item_factors.T) * 5
result = pd.DataFrame(result.T, index=movies)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 10/10 [00:00<00:00, 24.27it/s]


# 추천 영화 목록 출력함수

In [3]:
def get_recommand_movies(user, cos_x=result, top=20):
    # 해당 user가 평점을 높게 줄 것으로 예상되는 영화 목록 내림차순
    re_li = cos_x.iloc[:, user-1].sort_values(ascending=False).index
    
    # 해당 user가 이미 시청한 영화 삭제
    watched = list(df[df['user_id'] == user]['movie_title'])
    re_li.drop(watched)
    
    return re_li[:top]

# Test 데이터 불러오기

In [4]:
test = pd.read_csv('../data/test_data_by_time.csv')
test.movie_title = test.movie_title.str[:-1]

# 성능평가 함수

In [5]:
def test_score():
    precision_list = []
    recall_list = []
    for i in test['user_id'].unique():
        test_title = list(test[(test.rating>3) & (test.user_id==i)].movie_title)
        try:
            if len(test_title)>1:
                recommended_list = list(get_recommand_movies(i))
                count=0
                for value in test_title:
                    if value in recommended_list:
                        count+=1
            
            precision = count/len(recommended_list)
            recall = count/len(test_title)
            precision_list.append(precision)
            recall_list.append(recall)

        except:
            pass

    return f'precision: {np.mean(precision_list)}, recall: {np.mean(recall_list)}'

In [6]:
test_score()

'precision: 0.029621848739495794, recall: 0.054939682631716266'