<a href="https://colab.research.google.com/github/hipster4020/RecommendationSystem/blob/master/LatentFactorCollaborativeFiltering_Surprise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [130]:
# Surprise 설치
!pip install scikit-surprise



In [131]:
# Library import
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.dataset import DatasetAutoFolds

import os

# 맥os의 경우 라이브러리를 중복 사용해 오류가 발생할 수 있으므로 환경변수를 아래와 같이 설정한다.
os.environ['KMP_DUPLICATE_LIB_OK']='True'


import pandas as pd
from surprise import Reader, Dataset

# **데이터 가공 및 변환**

In [132]:
ratings = pd.read_csv('/content/sample_data/Book-Ratings.csv')
books = pd.read_csv('/content/sample_data/BX-Books.csv')
bookratings = pd.merge(ratings, books, on='ISBN')
bookratings.rename(columns={"ISBN":"item"}, inplace=True)
books.rename(columns={"Book-Title":"title", "ISBN":"item"}, inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


In [133]:
# ratings_noh.csv 파일로 언로드 시 인덱스와 헤더를 모두 제거한 새로운 파일 생성.
bookratings.to_csv('/content/sample_data/Book-Ratings_noh.csv', index=False, header=False)

# **OS 파일 데이터로 Surprise 데이터 세트로 로딩**

In [134]:
col = 'user item rating'
reader = Reader(line_format=col, sep=',', rating_scale=(1, 10))
data=Dataset.load_from_file('/content/sample_data/Book-Ratings_noh.csv', reader=reader)

In [135]:
trainset, testset = train_test_split(data, test_size=.25, random_state=0)

# 수행 시마다 동일한 결과를 도출하기 위해 random_state 설정
algo = SVD(n_factors=50, random_state=0)

# 학습 데이터 세트로 학습하고 나서 테스트 데이터 세트로 평점 예측 후 RMSE 평가
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 3.5305


3.530515750032821

# **Surprise를 이용한 개인화 도서 추천 시스템 구축**



In [136]:
# 다음 코드는 train_test_split()으로 분리되지 않은 데이터 세트에 fit()을 호출해 오류가 발생한다.
data = Dataset.load_from_df(bookratings[['user','item','rating']], reader)
algo = SVD(n_factors=50, random_state=0)
algo.fit(data)

AttributeError: ignored

In [137]:
col = 'user item rating'
reader = Reader(line_format=col, sep=',', rating_scale=(1, 10))
# DatasetAutoFolds 클래스를 ratings_noh.csv 파일 기반으로 생성.
data_folds = DatasetAutoFolds(ratings_file='/content/sample_data/Book-Ratings_noh.csv', reader=reader)

# 전체 데이터를 학습 데이터로 생성함.
trainset = data_folds.build_full_trainset()

In [138]:
algo = SVD(n_epochs=20, n_factors=50, random_state=0)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f50554389b0>

In [139]:
bookratings.head(5)

Unnamed: 0,user,item,rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,,,
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,,,
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,,,
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,,,
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,,,


In [140]:
# userId=9 ISBN 데이터를 추출해 ISBN '034545104X' 데이터가 있는지 확인.
ISBNs = bookratings[bookratings['user']==9]['item']

if ISBNs[ISBNs=='034545104X'].count() ==0 :
  print('사용자 아이디 2313는 ISBN ''034545104X''의 평점 없음')

# 도서에 대한 상세 속성 정보 DataFrame 로딩
print(books[books['item']=='034545104X'])

사용자 아이디 2313는 ISBN 034545104X의 평점 없음
            item                 title  ... Unnamed: 9 Unnamed: 10
2966  034545104X  Flesh Tones: A Novel  ...        NaN         NaN

[1 rows x 11 columns]


In [141]:
uid = str(9)
iid = str('034545104X')

pred = algo.predict(uid, iid, verbose=True)

user: 9          item: 034545104X r_ui = None   est = 2.31   {'was_impossible': False}


In [142]:
def get_unread_surprise(bookratings, books, userId):
  # 입력값으로 들어온 userId에 해당하는 사용자가 평점을 매긴 모든 도서를 리스트로 생성
  read_books = bookratings[bookratings['user']==userId]['item'].tolist()

  # 모든 도셔의 ISBN를 리스트로 생성.
  total_books = books['item'].tolist()

  # 모든 도서의 ISBN 중 이미 평점을 매긴 도서의 ISBN를 제외한 후 리스트로 생성
  unread_books = [book for book in total_books if book not in read_books]
  print('평점 매긴 도서 수 : ', len(read_books), '추천 대상 도서 수 : ', len(unread_books),
        '전체 도서 수 : ', len(total_books))
  
  return unread_books

unread_books = get_unread_surprise(bookratings, books, 276847)

평점 매긴 도서 수 :  47 추천 대상 도서 수 :  271332 전체 도서 수 :  271379


In [143]:
def recomm_book_by_surprise(algo, userId, unread_books, top_n=10):

  # 알고리즘 객체의 predict() 메서드를 평점이 없는 영화에 반복 수행한 후 결과를 list 객체로 저장
  predictions = [algo.predict(str(userId), str(item)) for item in unread_books]
  
  # predictions list 객체는 surprise의 Prediction 객체를 원소로 가지고 있음.
  # [Prediction(uid='276847', iid='1', est=3.69), Prediction(uid='276847', iid='2', est=2.98),,,,]

  # 이를 est 값으로 정렬하기 위해서 아래의 sortkey_eat 함수를 정의함.
  # sortkey_est 함수는 list 객체의 sort() 함수의 키 값으로 사용되어 정렬 수행.
  def sortkey_est(pred):
    return pred.est
    
  # sortkey_est() 반환값의 내림 차순으로 정렬 수행하고 top_n개의 최상위 값 추출.
  predictions.sort(key=sortkey_est, reverse=True)
  top_predictions = predictions[:top_n]

  # top_n으로 추출된 영화의 정보 추출, 영화 아이디, 추천 예상 평점, 제목 추출
  top_book_ids = [ int(pred.iid) for pred in top_predictions]
  top_book_rating = [ pred.est for pred in top_predictions]
  top_book_titles = bookratings[bookratings.item.isin(top_book_ids)]['Book-Title']

  top_book_preds = [ (id, rating) for id, rating in 
                      zip(top_book_ids, top_book_rating)]
  return top_book_preds

unread_books = get_unread_surprise(bookratings, books, 276847)
top_book_preds = recomm_book_by_surprise(algo, 276847, unread_books, top_n=10)

print('  ')
print('  ')
print('***************************')
print('*** 추천 영화 리스트 Top 10 ***')
print('  ')
for top_book in top_book_preds :
  print(top_book[0], ":", top_book[1])

평점 매긴 도서 수 :  47 추천 대상 도서 수 :  271332 전체 도서 수 :  271379
  
  
***************************
*** 추천 영화 리스트 Top 10 ***
  
440234743 : 10
440225701 : 10
671042858 : 10
60976845 : 10
451526341 : 10
345335465 : 10
385503857 : 10
60932759 : 10
312195516 : 10
156027321 : 10


In [144]:
list = ['440234743', '440225701', '671042858', '60976845', '451526341', '345335465', '385503857', '60932759', '312195516', '156027321']

bookstitle=books[books.item.isin(list)]
bookstitle

Unnamed: 0,item,title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Unnamed: 8,Unnamed: 9,Unnamed: 10
18,440234743,The Testament,John Grisham,1999,Dell,http://images.amazon.com/images/P/0440234743.0...,http://images.amazon.com/images/P/0440234743.0...,http://images.amazon.com/images/P/0440234743.0...,,,
52,440225701,The Street Lawyer,JOHN GRISHAM,1999,Dell,http://images.amazon.com/images/P/0440225701.0...,http://images.amazon.com/images/P/0440225701.0...,http://images.amazon.com/images/P/0440225701.0...,,,
126,671042858,The Girl Who Loved Tom Gordon,Stephen King,2000,Pocket,http://images.amazon.com/images/P/0671042858.0...,http://images.amazon.com/images/P/0671042858.0...,http://images.amazon.com/images/P/0671042858.0...,,,
134,60976845,Little Altars Everywhere: A Novel,Rebecca Wells,1996,Perennial,http://images.amazon.com/images/P/0060976845.0...,http://images.amazon.com/images/P/0060976845.0...,http://images.amazon.com/images/P/0060976845.0...,,,
285,451526341,Animal Farm,George Orwell,2004,Signet,http://images.amazon.com/images/P/0451526341.0...,http://images.amazon.com/images/P/0451526341.0...,http://images.amazon.com/images/P/0451526341.0...,,,
453,345335465,Dragonflight (Dragonriders of Pern Trilogy (Pa...,Anne McCaffrey,1991,Del Rey Books,http://images.amazon.com/images/P/0345335465.0...,http://images.amazon.com/images/P/0345335465.0...,http://images.amazon.com/images/P/0345335465.0...,,,
480,385503857,Oryx and Crake,Margaret Atwood,2003,Nan A. Talese,http://images.amazon.com/images/P/0385503857.0...,http://images.amazon.com/images/P/0385503857.0...,http://images.amazon.com/images/P/0385503857.0...,,,
519,60932759,Daughter of Fortune,Isabel Allende,2000,Perennial,http://images.amazon.com/images/P/0060932759.0...,http://images.amazon.com/images/P/0060932759.0...,http://images.amazon.com/images/P/0060932759.0...,,,
522,312195516,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA,http://images.amazon.com/images/P/0312195516.0...,http://images.amazon.com/images/P/0312195516.0...,http://images.amazon.com/images/P/0312195516.0...,,,
563,156027321,Life of Pi,Yann Martel,2003,Harvest Books,http://images.amazon.com/images/P/0156027321.0...,http://images.amazon.com/images/P/0156027321.0...,http://images.amazon.com/images/P/0156027321.0...,,,
