### Notebook purpose
To create a search engine for our list of books, so our recommendation page wont have to require the exact title of the book and more user friendly. <br>
User then will be directed to use the `isbn_index` in our recommendation system to find the similar books.

In [49]:
import logging
logging.captureWarnings(True)

import numpy as np
import pandas as pd

import pickle
import re

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

In [44]:
df = pd.read_csv("data/clean_data.csv")
images = pd.read_csv("data/images.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,isbn,book_title,book_author,year_of_publication,publisher,mod_title,user_id,book_rating,location,age,isbn_index
0,0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,classical mythology,2.0,0.0,"stockton, california, usa",18.0,25028
1,1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,clara callan,8.0,5.0,"timmins, ontario, canada",24.0,73
2,2,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,clara callan,11400.0,0.0,"ottawa, ontario, canada",49.0,73
3,3,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,clara callan,11676.0,8.0,"n/a, n/a, n/a",24.0,73
4,4,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,clara callan,41385.0,0.0,"sudbury, ontario, canada",24.0,73


In [45]:
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(df["mod_title"])

In [109]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

def search(query,vectorizer):
    processed = re.sub("[^a-zA-Z0-9 ]", "", query.lower())
    query_vec = vectorizer.transform([query])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -10)[-10:]
    results = df.iloc[indices].merge(images,how = 'left', on = "isbn")[["isbn_index","isbn","book_title","book_author","year_of_publication","image_url_s"]].drop_duplicates()
    results = results.head(5)
    # results = results.sort_values("ratings", ascending=False)
    
    # return results.style.format({'image_url_s': show_image}) #use this if you want a dataframe as an output
    return results.set_index("isbn_index").T.to_dict() # use this if you want a dictionary as an output

In [110]:
search("harry potter", vectorizer)

{125414: {'isbn': '059035342X',
  'book_title': "Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))",
  'book_author': 'J. K. Rowling',
  'year_of_publication': 1999,
  'image_url_s': 'http://images.amazon.com/images/P/059035342X.01.THUMBZZZ.jpg'},
 85032: {'isbn': '043955490X',
  'book_title': 'Harry Potter and the Goblet of Fire (Harry Potter)',
  'book_author': 'J. K. Rowling',
  'year_of_publication': 2003,
  'image_url_s': 'http://images.amazon.com/images/P/043955490X.01.THUMBZZZ.jpg'},
 85031: {'isbn': '0439554896',
  'book_title': 'Harry Potter and the Chamber of Secrets (Harry Potter)',
  'book_author': 'J. K. Rowling',
  'year_of_publication': 2003,
  'image_url_s': 'http://images.amazon.com/images/P/0439554896.01.THUMBZZZ.jpg'}}

In [111]:
pickle.dump(vectorizer, open('pickles/search_define.pkl','wb'))
pickle.dump(tfidf, open('pickles/search_fit_transform.pkl','wb'))
pickle.dump(search, open('pickles/search_result.pkl','wb'))

In [112]:
search_result = pickle.load(open('pickles/search_result.pkl', 'rb'))
search_define = pickle.load(open('pickles/search_define.pkl', 'rb'))
search_fit_transform = pickle.load(open('pickles/search_fit_transform.pkl', 'rb'))

In [113]:
search_result("harry potter",search_define)

{125414: {'isbn': '059035342X',
  'book_title': "Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))",
  'book_author': 'J. K. Rowling',
  'year_of_publication': 1999,
  'image_url_s': 'http://images.amazon.com/images/P/059035342X.01.THUMBZZZ.jpg'},
 85032: {'isbn': '043955490X',
  'book_title': 'Harry Potter and the Goblet of Fire (Harry Potter)',
  'book_author': 'J. K. Rowling',
  'year_of_publication': 2003,
  'image_url_s': 'http://images.amazon.com/images/P/043955490X.01.THUMBZZZ.jpg'},
 85031: {'isbn': '0439554896',
  'book_title': 'Harry Potter and the Chamber of Secrets (Harry Potter)',
  'book_author': 'J. K. Rowling',
  'year_of_publication': 2003,
  'image_url_s': 'http://images.amazon.com/images/P/0439554896.01.THUMBZZZ.jpg'}}