# 아이템 기반 최근점 이웃 협업 필터링 실습
- 사용자가 영화의 평점을 매긴 사용자 - 영화 평점 행렬 데이터셋 이용
- https://grouplens.org/datasets/movielens/
- 속도를 감안해 1M 데이터 셋 사용

http://files.grouplens.org/datasets/movielens/ml-latest-small-README.html

In [1]:
import numpy as np
import pandas as pd

In [None]:
# 데이터 셋 3가지 : 
'''
1. movies.dat : 영화 관련 데이터
2. ratings.dat : 영화 평점 데이터

'''

In [28]:
movies = pd.read_csv('./data/movielens/ml-latest-small/movies.csv')

In [31]:
movies.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [24]:
ratings = pd.read_csv('./data/movielens/ml-latest-small/ratings.csv')

In [32]:
ratings.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


In [30]:
movies.shape, ratings.shape

((9742, 3), (100836, 4))

### 데이터 전처리

In [36]:
# 1. ratings의 timestamp 컬럼삭제 
ratings = ratings[['userId','movieId','rating']]
ratings.head(3)

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0


In [43]:
# 2. 피벗 테이블 구성
## 본 예제에서 기본이 될 데이터 셋 구성
ratings_matrix = ratings.pivot_table(
    'rating'
    , index = 'userId'
    , columns= 'movieId'
)

In [44]:
ratings_matrix.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,


In [45]:
# movieId 를 title로 매치시키는 작업 필요
# title 추출
# 1. merge
rating_movies = pd.merge(ratings, movies, on='movieId')

# columns = 'title' 로 title 컬럼으로 pivot 수행
ratings_matrix = rating_movies.pivot_table(
    'rating'
    , index='userId'
    , columns= 'title'
)

In [46]:
# movieID가 title로 바뀐 피봇테이블 확인
ratings_matrix.head(3)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,


- ratings_movies : 영화와 평점 데이터셋을 조인한 데이터셋
- ratings_matrix : 유저와 영화평점의 피봇테이블

In [48]:
# 유저가 안본 영화에 대해서는 NaN 이 되어있음 => 0으로 바꾸기
ratings_matrix = ratings_matrix.fillna(0)
ratings_matrix.head(3)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 영화와 영화들 간 유사도 추출

In [55]:
# 행열 전치 : transpose()
ratings_matrix_T = ratings_matrix.transpose()
ratings_matrix_T.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
# 영화 간 유사도 추출 
from sklearn.metrics.pairwise import cosine_similarity

item_sim = cosine_similarity(ratings_matrix_T, ratings_matrix_T)

item_sim_df = pd.DataFrame(
      data=item_sim
    , index=ratings_matrix.columns   # 영화 제목
    , columns=ratings_matrix.columns  # 동일하게 영화제목
)

In [58]:
item_sim_df.shape

(9719, 9719)

In [60]:
# 평점 기반 유사도
# 해당 영화 간 받은 평점이 유사하면 유사도가 높음
item_sim_df.head(2)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.141653,0.0,...,0.0,0.342055,0.543305,0.707107,0.0,0.0,0.139431,0.327327,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,1.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
item_sim_df['Godfather, The (1972)'].sort_values(ascending=False)[:5]

title
Godfather, The (1972)                        1.000000
Godfather: Part II, The (1974)               0.821773
Goodfellas (1990)                            0.664841
One Flew Over the Cuckoo's Nest (1975)       0.620536
Star Wars: Episode IV - A New Hope (1977)    0.595317
Name: Godfather, The (1972), dtype: float64

###  아이템 기반 최근접 이웃 협업 필터링으로 개인화된 영화 추천
- 개인에게 최적화된 영화 추천 구현

In [66]:
# R^= S*R /|S|
# R : 실제 평점
# S : 유사도
# 파라미터들
# ratings_arr : 실제 평점
# item_sim_arr : 유사도
# top_N개가 아닌 모든 예측평점을 구하는 함수 생성

def predict_rating(ratings_arr, item_sim_arr):
    ratings_pred = ratings_arr.dot(item_sim_arr)/np.array([np.abs(item_sim_arr).sum(axis=1)])
    return ratings_pred

# 예측평점ratings_pred 를 반환

- ratings_movies : 영화와 평점 조인 데이터
- ratinvs_matrix : 유저와 영화와 평점을 피봇해둔 데이터
- item_sim_df : 영화간 유사도

In [69]:
ratings_pred = predict_rating(ratings_matrix.values, item_sim_df.values)
ratings_pred_matrix = pd.DataFrame(ratings_pred, index=ratings_matrix.index, columns=ratings_matrix.columns)
ratings_pred_matrix.head(3)
# 유저별 영화 예측 평점을 구했다. # 정규화된 값이므로 1이 최대

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.070345,0.577855,0.321696,0.227055,0.206958,0.194615,0.249883,0.102542,0.157084,0.178197,...,0.113608,0.181738,0.133962,0.128574,0.006179,0.21207,0.192921,0.136024,0.292955,0.720347
2,0.01826,0.042744,0.018861,0.0,0.0,0.035995,0.013413,0.002314,0.032213,0.014863,...,0.01564,0.020855,0.020119,0.015745,0.049983,0.014876,0.021616,0.024528,0.017563,0.0
3,0.011884,0.030279,0.064437,0.003762,0.003749,0.002722,0.014625,0.002085,0.005666,0.006272,...,0.006923,0.011665,0.0118,0.012225,0.0,0.008194,0.007017,0.009229,0.01042,0.084501


In [70]:
# 위의 예측평점과, 실제 유저가 준 평점을 비교해서 오차를 산출
# MSE 방식
from sklearn.metrics import mean_squared_error

# 사용자가 평점을 부여한 영화에 대해서만, 예측과의 오차를 리턴하는 함수 생성
def get_mse(pred, actual):
    pred = pred[actual.nonzero()].flatten()  #flatten() : 데이터 프레임을 1차원으로 변경, nonzero():0이 아닌 애들만 true 반환
    actual = actual[actual.nonzero()].flatten()  
    return mean_squared_error(pred, actual)

In [123]:
# mse 오차 구하기
get_mse(ratings_pred, ratings_matrix.values)


9.895354759094706

In [117]:
ratings_pred.shape

(610, 9719)

- 스케일이 달라서 오차가크다

- 특정 영화와 가장 비슷한 유사도를 가지는 영화 상위top_n개에 대해서만 유사도 벡터 적용해보기

In [120]:
def predict_rating_topsim(ratings_arr, item_sim_arr, n=20):
    # 사용자 아이템 평점 행렬 크기만큼 0으로 채운 저장공간 초기화
    pred = np.zeros(ratings_arr.shape)
    # 사용자-아이템 평점행렬의 열 크기만큼 Loop 수행
    for col in range(ratings_arr.shape[1]):  # 영화개수만큼 반복
        # [start : end : step] 구조를 사용, step이 -1이므로 역순으로 정렬됨.
        top_n_items = [np.argsort(item_sim_arr[:, col])[:-n-1:-1]]   # 역순으로 top_n개(20개) 인덱스 추출
        # 개인화된 예측 평점을 계산
        for row in range(ratings_arr.shape[0]):  # 유저 명수만큼 반복  
            # 유저별(row), 영화별(col) 예측 평점, # 내적구하기위해 Transpose. T
            pred[row, col] = item_sim_arr[col, :][top_n_items].dot(ratings_arr[row, :][top_n_items].T)
            pred[row, col] /= np.sum(np.abs(item_sim_arr[col, :][top_n_items]))
    
    return pred

In [76]:
ratings_matrix.shape

(610, 9719)

In [77]:
ratings_matrix.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [105]:
print(item_sim[:,0].argsort()[:-21:-1])
len(item_sim[:,0].argsort()[:-21:-1])


[   0  179 7085 6471 2253 5591 7674 7095 2247 3584 4925 3565 7537 8267
 7676 5111  183 8251 3990  199]


20

In [121]:
# 위 함수 적용하여 특정영화와 비슷 한 20개만
pred = predict_rating_topsim(ratings_matrix.values, item_sim_df.values, n=20)

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


In [122]:
get_mse(pred, ratings_matrix.values)
# 기존의 9.89보다 향상된 3.69

3.695009387428144

In [124]:
# 계산된 예측 평점 데이터는 DataFrame으로 재생성
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, index= ratings_matrix.index, columns=ratings_matrix.columns)

In [126]:
# 사용자 기반 예측 평점 처리된 데이터프레임
ratings_pred_matrix.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.070345,0.577855,0.321696,0.227055,0.206958,0.194615,0.249883,0.102542,0.157084,0.178197,...,0.113608,0.181738,0.133962,0.128574,0.006179,0.21207,0.192921,0.136024,0.292955,0.720347
2,0.01826,0.042744,0.018861,0.0,0.0,0.035995,0.013413,0.002314,0.032213,0.014863,...,0.01564,0.020855,0.020119,0.015745,0.049983,0.014876,0.021616,0.024528,0.017563,0.0
3,0.011884,0.030279,0.064437,0.003762,0.003749,0.002722,0.014625,0.002085,0.005666,0.006272,...,0.006923,0.011665,0.0118,0.012225,0.0,0.008194,0.007017,0.009229,0.01042,0.084501
4,0.049145,0.277628,0.160448,0.206892,0.309632,0.042337,0.130048,0.116442,0.099785,0.097432,...,0.051269,0.076051,0.055563,0.054137,0.008343,0.159242,0.100941,0.062253,0.146054,0.231187
5,0.007278,0.066951,0.041879,0.01388,0.024842,0.01824,0.026405,0.018673,0.021591,0.018841,...,0.009689,0.022246,0.01336,0.012378,0.0,0.025839,0.023712,0.018012,0.028133,0.052315


In [127]:
ratings_pred_matrix.shape

(610, 9719)

In [156]:
# 9번 유저의  실제 평점 출력
user_rating_id = ratings_matrix.loc[9, :]
# 9번  유저가 이미 본영화들 확인
user_rating_id[user_rating_id>0].sort_values(ascending=False)[:10]

title
Adaptation (2002)                                                                 5.0
Austin Powers in Goldmember (2002)                                                5.0
Lord of the Rings: The Fellowship of the Ring, The (2001)                         5.0
Lord of the Rings: The Two Towers, The (2002)                                     5.0
Producers, The (1968)                                                             5.0
Citizen Kane (1941)                                                               5.0
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    5.0
Back to the Future (1985)                                                         5.0
Glengarry Glen Ross (1992)                                                        4.0
Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)                                     4.0
Name: 9, dtype: float64

In [147]:
# 유저별 안본 영화 반환하는 함수 생성
def get_unseen_movies(ratings_matrix, userId):
    # 해당 유저id의 실제 평점 추출
    user_rating = ratings_matrix.loc[userId,:]
    # 해당 유저가 이미 본 영화의 인덱스 추출
    already_seen = user_rating[user_rating>0].index.tolist()
    
    # 모든 영화명을 list 객체로 만듬
    movies_list = ratings_matrix.columns.tolist()
    # 안본 영화를 리스트로 추출
    unseen_list = [movie for movie in movies_list if movie not in already_seen]
    
    return unseen_list

In [148]:
# 9번 유저가 관람하지 않은 영화명 추출
unseen_list = get_unseen_movies(ratings_matrix, 9)

In [149]:
# 예측평점 추출
# 가장 예측 평점이 높은 순으로 정렬
def recomm_movie_by_userid(pred_df, userId, unseen_list, top_n=10):
    recomm_movies = pred_df.loc[userId, unseen_list].sort_values(ascending=False)[:top_n]
    return recomm_movies

In [154]:
recomm_movies = recomm_movie_by_userid(ratings_pred_matrix, 9 , unseen_list, top_n=10)

In [155]:
recomm_movies

title
Venom (1982)                                      0.303278
Dr. Goldfoot and the Bikini Machine (1965)        0.258705
Frankie and Johnny (1966)                         0.234754
English Vinglish (2012)                           0.214774
Harmonists, The (1997)                            0.169338
Story of Women (Affaire de femmes, Une) (1988)    0.163884
3:10 to Yuma (1957)                               0.163884
Passenger, The (Professione: reporter) (1975)     0.163884
Child, The (L'enfant) (2005)                      0.163884
Cassandra's Dream (2007)                          0.163884
Name: 9, dtype: float64

In [158]:
recomm_movies_df = pd.DataFrame(
    recomm_movies.values, index=recomm_movies.index, columns=['pred_score']
)
recomm_movies_df.head()

Unnamed: 0_level_0,pred_score
title,Unnamed: 1_level_1
Venom (1982),0.303278
Dr. Goldfoot and the Bikini Machine (1965),0.258705
Frankie and Johnny (1966),0.234754
English Vinglish (2012),0.214774
"Harmonists, The (1997)",0.169338
