In [4]:
import pandas as pd

In [5]:
ratings=pd.read_csv('ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
ratings.shape

(100836, 4)

In [7]:
#사용자수, 영화수
ratings.userId.nunique(),ratings.movieId.nunique()

(610, 9724)

In [8]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 4.7 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1630134 sha256=4394e4b58d817f5c91ca61b509d1183681a6218719326003799de9952693b6b6
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [10]:
from surprise import SVD, Reader
from surprise.dataset import DatasetAutoFolds

reader=Reader(line_format='user item rating timestamp',sep=',',rating_scale=(0.5,5))
data_folds = DatasetAutoFolds('ratings_noh.csv',reader=reader)

In [11]:
#전체 데이터를 학습 데이터로 사용
trainset=data_folds.build_full_trainset()

In [12]:
#모델 생성 및 학습
model=SVD(n_epochs=20, n_factors=50, random_state=2022)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f7493ae8510>

- 사용자 Id:9, 영화 Id:42

In [13]:
#영화 정보
mdf=pd.read_csv('movies.csv')
mdf.head(40).tail(5)

Unnamed: 0,movieId,title,genres
35,39,Clueless (1995),Comedy|Romance
36,40,"Cry, the Beloved Country (1995)",Drama
37,41,Richard III (1995),Drama|War
38,42,Dead Presidents (1995),Action|Crime|Drama
39,43,Restoration (1995),Drama


In [14]:
# 사용자 9번이 42번 영화를 봤는지 확인.
movieIds = ratings[ratings.userId ==9]['movieId']
movieIds[movieIds==42].count()

0

In [15]:
# 사용자 9번의 42번 영화에 대한 예상 평점 (est값이 예측 평점)
#{'was_impossible': False} 이전에 안 본 영화란 뜻.
uid,mid=str(9),str(42)
model.predict(uid,mid,verbose=True)

user: 9          item: 42         r_ui = None   est = 3.25   {'was_impossible': False}


Prediction(uid='9', iid='42', r_ui=None, est=3.249924377339538, details={'was_impossible': False})

In [17]:
seen_movies = ratings[ratings.userId ==9]['movieId'].tolist()
total_movies = mdf.movieId.tolist()
unseen_movies=[movie for movie in total_movies if movie not in seen_movies]
len(seen_movies),len(unseen_movies)

(46, 9696)

In [18]:
uid=str(9)
predictions=[model.predict(uid,str(mid)) for mid in unseen_movies]


In [19]:
def sortkey_est(pred):
    return pred.est

In [28]:
predict_lst= [model.predict(str(9), str(mid)) for mid in unseen_movies]


In [29]:
predictions.sort(key=sortkey_est, reverse=True)
predictions[:5]

[Prediction(uid='9', iid='318', r_ui=None, est=4.070330794979969, details={'was_impossible': False}),
 Prediction(uid='9', iid='1217', r_ui=None, est=4.063731956995097, details={'was_impossible': False}),
 Prediction(uid='9', iid='1261', r_ui=None, est=4.051908410348554, details={'was_impossible': False}),
 Prediction(uid='9', iid='1204', r_ui=None, est=4.0227662213503805, details={'was_impossible': False}),
 Prediction(uid='9', iid='3275', r_ui=None, est=4.011500870494226, details={'was_impossible': False})]

In [34]:
top_movie_ids = [int(pred.iid) for pred in predictions[:10]]
top_movie_ratings=[pred.est for pred in predictions[:10]]
top_movie_titles = [mdf[mdf.movieId == id].iloc[0,1] for id in top_movie_ids]


In [35]:
top_df=pd.DataFrame({
    '영화명':top_movie_titles,
    '예상평점':top_movie_ratings
})

top_df.index.name ='mdf_index'
top_df

Unnamed: 0_level_0,영화명,예상평점
mdf_index,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"Shawshank Redemption, The (1994)",4.070331
1,Ran (1985),4.063732
2,Evil Dead II (Dead by Dawn) (1987),4.051908
3,Lawrence of Arabia (1962),4.022766
4,"Boondock Saints, The (2000)",4.011501
5,"Amelie (Fabuleux destin d'Amélie Poulain, Le) ...",3.999696
6,Spotlight (2015),3.985348
7,Boogie Nights (1997),3.979885
8,"Usual Suspects, The (1995)",3.978625
9,"Philadelphia Story, The (1940)",3.978415


In [32]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [33]:
top_movie_titles

[318, 1217, 1261, 1204, 3275, 4973, 142488, 1673, 50, 898]