## 잠재요인 모델 기반 협업 필터링

In [1]:
from surprise import Dataset
data = Dataset.load_builtin('ml-100k')

In [3]:
import pandas as pd
ratings = pd.DataFrame(data.raw_ratings, columns = ['user','item','rate','timestamp'])
ratings.head()

Unnamed: 0,user,item,rate,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [6]:
u_cols=['user_id','age','sex','occupation','zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names = u_cols)
print(users.shape)
users.head()

(943, 5)


Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [7]:
m_cols=['movie_id','title','release_date','video_release_data','imdb_url']
movies = pd.read_csv('ml-100k/u.item', sep='|', names = m_cols, usecols =range(5), encoding='latin1')
print(movies.shape)
movies.head()

(1682, 5)


Unnamed: 0,movie_id,title,release_date,video_release_data,imdb_url
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995)


In [10]:
from surprise.model_selection import train_test_split
from surprise import KNNBasic
from surprise import SVD
trainset, testset = train_test_split(data, test_size=0.5, random_state=0)
model = SVD(n_factors=100, random_state=0)
predictions = model.fit(trainset).test(testset)
predictions[:3]

[Prediction(uid='18', iid='211', r_ui=5.0, est=3.9276307122324847, details={'was_impossible': False}),
 Prediction(uid='903', iid='187', r_ui=5.0, est=4.448397987705509, details={'was_impossible': False}),
 Prediction(uid='283', iid='393', r_ui=4.0, est=3.7683050633419897, details={'was_impossible': False})]

In [11]:
from surprise import accuracy
rmse = accuracy.rmse(predictions)

RMSE: 0.9565


In [12]:
def recommend(predictions, n, k):
    print("recommend list")
    uids = [pred.uid for pred in predictions][:n]
    for uid in uids:
        seen_movies = ratings[ratings.user == uid]['item'].tolist()
        total_movies = movies['movie_id'].tolist()
        unseen_movies = [movie for movie in total_movies if movie not in seen_movies]
        pred = [model.predict(str(uid), str(item)) for item in unseen_movies]
        pred.sort(key=lambda pred: pred[3], reverse=True)
        top_pred = pred[:k]
        top_ids = [int(pred.iid) for pred in top_pred]
        top_titles = movies[movies.movie_id.isin(top_ids)]['title']
        top_rating = [pred.est for pred in top_pred]
        top_preds=[(id, title, rating) for id, title, rating in zip(top_ids, top_titles, top_rating)]
        print("ID", uid)
        for top_movie in top_preds:
            print(top_movie[1], ":", top_movie[2])
recommend(predictions, 3, 3)

recommend list
ID 18
Usual Suspects, The (1995) : 4.737288357661713
Wrong Trousers, The (1993) : 4.71249408605412
Chinatown (1974) : 4.632246151774594
ID 903
Star Wars (1977) : 4.754275130090958
To Kill a Mockingbird (1962) : 4.740851710161463
North by Northwest (1959) : 4.703333276928568
ID 283
Wrong Trousers, The (1993) : 5
Good Will Hunting (1997) : 5
Close Shave, A (1995) : 5


출처 : https://www.youtube.com/watch?v=ioUwsOt4LgI