In [None]:
import pandas as pd
from elasticsearch import Elasticsearch, helpers

In [None]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])

In [None]:
def index_data(records: {}, index: str):
    for record in records:
        yield {
            "_index": index,
            "_source": record
        }

In [None]:
book_records = pd.read_csv('../data/pp_books.csv').to_dict(orient='records')
helpers.bulk(es, index_data(book_records, 'books'))

In [None]:
rating_records = pd.read_csv('../data/ratings.csv').to_dict(orient='records')
helpers.bulk(es, index_data(rating_records, 'ratings'))

In [None]:
def search_books(size: int, query: str, uid: int, percent: float = 0.1):
    fields = ["book_author", "book_title", "summary", "category", "publisher"]
    if query.isnumeric():
        fields.append("year_of_publication")
        
    books_res = es.search(
        index='books',
        size=size,
        query={
            "multi_match": {
                "query": query,
                "type": "most_fields",
                "fields": fields
            }
        })
    book_hits = books_res['hits']['hits']
    isbn_list = [book_hit['_source']['isbn'] for book_hit in book_hits]

    ratings_res = es.search(
        index='ratings',
        size=size,
        query={
            "bool": {
                "must": [
                    {"term": {"uid": uid}},
                    {"terms": {"isbn": isbn_list}}
                ]
            }
        })
    rating_hits = ratings_res['hits']['hits']
    rated_isbn_dict = {
        rating_hit['_source']['isbn']: rating_hit['_source']['rating']
        for rating_hit in rating_hits
    }

    for book_hit in book_hits:
        isbn = book_hit['_source']['isbn']
        if isbn in rated_isbn_dict:
            book_hit['_score'] += 2 * rated_isbn_dict[isbn]

    book_hits.sort(key=lambda x: x['_score'], reverse=True)

    book_hits = book_hits[:int(len(book_hits) * percent)]

    return book_hits

In [None]:
term = input("Enter search term: ")
user_id = input("Enter user ID: ")

results = search_books(size=50, query=term, uid=user_id)

In [None]:
df_columns = ['score', 'isbn', 'book_title', 'book_author', 'summary', 'category', 'publisher', 'year_of_publication']
df_data = []
for item in results:
    df_data.append([
        round(item['_score'], 2),
        item['_source']['isbn'],
        item['_source']['book_title'],
        item['_source']['book_author'],
        item['_source']['summary'],
        item['_source']['category'],
        item['_source']['publisher'],
        item['_source']['year_of_publication']
    ])

pd.DataFrame(columns=df_columns, data=df_data)