In [None]:
from surprise import SVD, Dataset, accuracy
from surprise.model_selection import train_test_split

data = Dataset.load_builtin('ml-100k')

trainset, testset = train_test_split(data, test_size=.25, random_state=0)

In [None]:
algo = SVD()
algo.fit(trainset)

In [None]:
predictions = algo.test(testset)
predictions[:5]

In [None]:
[(pred.uid, pred.iid, pred.est) for pred in predictions[:3]]

In [None]:
uid = str(196)
iid = str(302)
pred = algo.predict(uid, iid)
pred

In [None]:
accuracy.rmse(predictions)

In [None]:
import pandas as pd
import os

HOME = os.path.expanduser('~')

ratings = pd.read_csv(HOME + '/Downloads/ratings.csv')
ratings.to_scv(HOME + '/Downloads/ratings_noh.csv', index=False, header=False)

In [None]:
from surprise import Reader

reader = Reader(line_format='user item rating timestamp', sep=', ', rating_scale=(0.5, 5))
data = Dataset.load_from_file(HOME + '/Downloads/ratings_noh.csv', reader=reader)

In [None]:
algo = SVD(n_factors=50, random_state=0)

algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)

In [None]:
reader = Reader(rating_scale=(.5, 5.))

data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=.25, random_state=0)

algo = SVD(n_factors=50, random_state=0)

algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)

In [None]:
from surprise.model_selection import cross_validate

algo = SVD(random_state=0)
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

In [None]:
from surprise.model_selection import GridSearchCV

param_grid = {'n_epochs': [20,40,60], 'n_factors': [50,100,200]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

print(gs.best_score['rmse'], gs.best_params['rmse'])

In [None]:
from surprise.dataset import DatasetAutoFolds

reader = Reader(line_format='user item rating timestamp', sep=', ', rating_scale=(.5, 5.))
data_folds = DatasetAutoFolds(ratings_file=HOME + '/Downloads/ratings_noh.csv', reader=reader)

trainset = data_folds.build_full_trainset()

In [None]:
algo = SVD(n_epochs=20, n_factors=50, random_state=0)
algo.fit(trainset)

In [None]:
movies = pd.read_csv(HOME + '/Downloads/movies.csv')
movieIds = ratings[ratings['userId']==9]['movieId']
if movieIds[movieIds==42].count() == 0:
    print("No ratings")
    
print(movies[movies==42])

In [None]:
uid = str(9)
iid = str(42)
pred = algo.predict(uid, iid, verbose=True)

In [None]:
def get_unseen_surprise(ratings, movies, userId):
    seen_movies = ratings[ratings['userId']==userId]['movieId'].tolist()
    total_movies = movies['movieId'].tolist()
    unseen_movies = [movie for movie in total_movies if movie not in seen_movies]
    print('평점 매긴 영화 수: ', len(seen_movies), ' 추천 대상 영화수: ', len(unseen_movies),
         ' 전체 영화 수: ', len(total_movies))
    return un_seen_movies

In [None]:
def recomm_movie_by_surprise(algo, userId, unseen_movies, top_n=10):
    predictions = [algo.predict(str(userId), str(movieId)) for movieId in unseen_movies]
    
    def sortkey_est(pred):
        return pred.est
    
    predictions.sort(key=sortkey_est, reverse=True)
    
    top_predictions = predictions[:top_n]
    
    top_movie_ids = [int(pred.iid) for pred in top_predictions]
    top_movie_rating = [pred.est for pred in top_predictions]
    top_movie_titles = movies[movies.movieId.isin(top_movie_ids)]['title']
    
    top_movie_preds = [(id1, title, rating) for id1, title, rating in 
                      zip(top_movie_ids, top_movie_titles, top_movie_ratings)]
    return top_movie_preds

In [None]:
unseen_movies = get_unseen_surprise(ratings, movies, 9)
top_movie_preds = recomm_movie_by_surprise(algo, 9, unseen_movies, top_n=10)

print('### Top-10 추천 영화 리스트 ###')
for top_movie in top_movie_preds:
    print(top_movie[1], ": ", top_movie[2])