# Book Recommender

### Import libraries and Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [2]:
books_df = pd.read_csv('./data/clean/books_df.csv', index_col=0)

In [3]:
books_df.head(1)

Unnamed: 0,book_name,book_id,user_id,rating
0,Agile Web Development with Rails: A Pragmatic ...,13083,1,5


### Prepare recommender using cosine distances

In [4]:
#pivot table from the books_df dataframe
pivot_df = pd.pivot_table(books_df, index='book_name', columns='user_id', values='rating')
#create sparse matrix
sparse_df = sparse.csr_matrix(pivot_df.fillna(0))
#calculate cosine distances for similarities
recommender = pairwise_distances(sparse_df, metric='cosine')
#export as a dataframe
rec_df = pd.DataFrame(recommender, columns=pivot_df.index, index=pivot_df.index)

### Define book Recommender function

In [5]:
def book_recommender(book_title):
    return 1- rec_df[book_title].sort_values()[1:6]

### Example Recommendations

In [6]:
#Using the recommender function, input a book title to give cosine similarities, sorted
book_recommender('To Kill a Mockingbird')

book_name
The Great Gatsby                      0.631748
The Catcher in the Rye                0.583633
The Adventures of Huckleberry Finn    0.516044
Animal Farm                           0.512917
Lord of the Flies                     0.504994
Name: To Kill a Mockingbird, dtype: float64

Similar to "To Kill a Mockingbird" on Goodreads.com:
- The Great Gatsby
- The Catcher in the Rye
- Animal Farm
- Lord of the Flies

In [7]:
book_recommender('1984')

book_name
Animal Farm               0.567584
Brave New World           0.543643
Lord of the Flies         0.539524
The Catcher in the Rye    0.526647
Fahrenheit 451            0.505689
Name: 1984, dtype: float64

Similar to "1984" on Goodreads.com:
- Catcher in the Rye
- Fahrenheit 451
- Brave New World
- Lord of the Flies

In [8]:
book_recommender("The Fellowship of the Ring (The Lord of the Rings, #1)")

book_name
The Two Towers (The Lord of the Rings, #2)            0.786718
The Return of the King (The Lord of the Rings, #3)    0.689853
The Hobbit, or There and Back Again                   0.518376
The Count of Monte Cristo                             0.437408
The Eye of the World (The Wheel of Time, #1)          0.381910
Name: The Fellowship of the Ring (The Lord of the Rings, #1), dtype: float64

Similar to "The Fellowship of the Ring" on Goodreads.com:
- The Two Towers
- The Hobbit, or There and Back Again

In [9]:
book_recommender('Life of Pi')

book_name
Memoirs of a Geisha                                  0.484109
The Curious Incident of the Dog in the Night-Time    0.463734
The Kite Runner                                      0.460003
Water for Elephants                                  0.452543
The Girl Who Played with Fire (Millennium, #2)       0.452521
Name: Life of Pi, dtype: float64

Similar to "Life of Pi" on Goodreads.com:
- The Kite Runner
- Memoirs of a Geisha
- The Curious Incident of the Dog in the Night-Time
- Water for Elephants

### Export for streamlit app

In [10]:
#add book name, re-arrange for streamlit app
rec_df.insert(0, "book_name", rec_df.index)

# save rec_df for use in streamlit app
rec_df.to_pickle('./streamlit_app/data/books.pk1')