In [1]:
from elasticsearch import Elasticsearch, helpers
from pandas import read_csv, DataFrame, merge
from csv import DictReader

## Connect to Elastic Search

In [2]:
# returns a ES connection object
def connect_to_elastic():
    es = Elasticsearch([{'host':'localhost', 'port':9200, 'scheme': 'http'}], verify_certs=True)
    try:
        if es.ping(): return es
    except ex:
        raise ValueError('Connection failed')

In [3]:
es = connect_to_elastic()

## Data Derormalization for each possible query

In [4]:
books = read_csv('Books/BX-Books.csv')
books.head()

Unnamed: 0,isbn,book_title,book_author,year_of_publication,publisher,summary,category
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,Provides an introduction to classical myths pl...,['social science']
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,"In a small town in Canada, Clara Callan reluct...",['actresses']
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,"Here, for the first time in paperback, is an o...",['1940-1949']
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,"Describes the great flu epidemic of 1918, an o...",['medical']
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton & Company,A look at the incredibly well-preserved ancien...,['design']


In [6]:
user_ratings = read_csv('Books/BX-Book-Ratings.csv')
user_ratings.head()

Unnamed: 0,uid,isbn,rating
0,2,195153448,0
1,8,2005018,5
2,11400,2005018,0
3,11676,2005018,8
4,41385,2005018,0


In [7]:
avg_user_ratings = user_ratings.drop(columns=['uid']).groupby(by=['isbn']).mean()
avg_user_ratings.head()

Unnamed: 0_level_0,rating
isbn,Unnamed: 1_level_1
0000913154,8.0
0001010565,0.0
0001046438,9.0
0001046713,0.0
000104687X,6.0


In [8]:
books_avg_ratings = merge(left=books.loc[:, ['isbn', 'book_title', 'summary']], right=avg_user_ratings, left_on='isbn', right_on='isbn')
books_avg_ratings.head()

Unnamed: 0,isbn,book_title,summary,rating
0,195153448,Classical Mythology,Provides an introduction to classical myths pl...,0.0
1,2005018,Clara Callan,"In a small town in Canada, Clara Callan reluct...",4.928571
2,60973129,Decision in Normandy,"Here, for the first time in paperback, is an o...",5.0
3,374157065,Flu: The Story of the Great Influenza Pandemic...,"Describes the great flu epidemic of 1918, an o...",4.272727
4,393045218,The Mummies of Urumchi,A look at the incredibly well-preserved ancien...,0.0


In [10]:
books_avg_ratings.set_index('isbn').to_csv('books_avg_ratings.csv')

In [11]:
# stores data to elastic
def bulk_to_elastic(filename, index_name):
    es = connect_to_elastic()
    try:
        with open(filename, encoding='utf8') as csv_file:
            # convert csv to dictionary
            reader = DictReader(csv_file)
            # Index documents
            helpers.bulk(es, reader, index=index_name)
    except FileNotFoundError:
        print('File "' + str(filename) + '" doesn\'t exists..')

In [12]:
# bulk data to elastic
bulk_to_elastic('books_avg_ratings.csv', 'books_avg_ratings')