<a href="https://colab.research.google.com/github/emilyliublair/Machine-Learning-Projects/blob/main/book_recommendation_using_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import libraries
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

In [None]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [None]:
#merge 2 datasets
data=pd.merge(df_books, df_ratings, on='isbn')

user_count = data.groupby('user')['rating'].count().reset_index().rename(columns = {'rating':'user_count'})
book_count = data.groupby('isbn')['rating'].count().reset_index().rename(columns={'rating':'book_count'})

data = data.merge(user_count, on='user')
data=data.merge(book_count, on='isbn')

In [None]:
#clean out dataset to only retain statistically significant data
data = data.loc[data['user_count'] >=200]
data = data.loc[data['book_count'] >=100]
data = data.drop_duplicates(subset=['title', 'user'])

In [None]:
#rearrange dataframe with title first and ratings as values
data_pivot = data.pivot(index='title', columns='user', values=['rating']).fillna(0)

In [None]:
#build model
model = NearestNeighbors(algorithm='brute', metric='cosine')
model.fit(data_pivot)

In [None]:
# function to return recommended books
def get_recommends(book = ""):
  row = data_pivot.loc[data_pivot.index == book]
  distances, indices = model.kneighbors(np.reshape(row,[1,-1]),5, True)
  recommended_books = []
  list_books = []

  for i in range(0, len(distances.flatten())):
    if i==0:
      recommended_books.append(book)
    if not(i==0):
      book = []
      book.append(data_pivot.index[indices[0][i]])
      book.append(distances[0][i])
      list_books.append(book)
  recommended_books.append(list_books[::-1])
  return recommended_books


books = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(books)

In [None]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2): 
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You havn't passed yet. Keep trying!")

test_book_recommendation()