# Project Ανάκτηση Πληροφορίας
### Part 1

#### Imports:

In [19]:
try:
    import os
    import sys
    
    import elasticsearch
    from elasticsearch import Elasticsearch 
    import pandas as pd
    from elasticsearch import helpers
    print("All Modules Loaded ! ")
    import warnings   
    warnings.filterwarnings('ignore')
except Exception as e:
    print("Some Modules are Missing {}".format(e))

All Modules Loaded ! 


In [20]:
for x in os.listdir():
    print(x)

.ipynb_checkpoints
BX-Book-Ratings.csv
BX-Books.csv
BX-Users.csv
Code.py
D2V_Model.model
Datasets.zip
information_retrieval_project_2021.docx
ir2021_1053708_1066488.zip
output.csv
Part_1.ipynb
Part_2.ipynb
Part_3.0.ipynb
Part_4.0.ipynb
vector.jpg


## Read Dataset

In [21]:
df = pd.read_csv("BX-Books.csv")

## Creating Elasticsearch Instance

In [22]:
ENDPOINT = "http://localhost:9200/"
es = Elasticsearch(timeout=600,hosts=ENDPOINT)

In [23]:
es.ping()

True

In [24]:
df.head(2)

Unnamed: 0,isbn,book_title,book_author,year_of_publication,publisher,summary,category
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,Provides an introduction to classical myths pl...,['social science']
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,"In a small town in Canada, Clara Callan reluct...",['actresses']


In [25]:
df.columns

Index(['isbn', 'book_title', 'book_author', 'year_of_publication', 'publisher',
       'summary', 'category'],
      dtype='object')

In [26]:
df.shape

(134692, 7)

In [27]:
df.isna().sum()

isbn                   0
book_title             0
book_author            0
year_of_publication    0
publisher              0
summary                0
category               0
dtype: int64

#### Convert CSV to JSON in order to send to Elasticsearch 

In [35]:
df2 = df.to_dict('records')

In [36]:
df2[0]

{'isbn': '0195153448',
 'book_title': 'Classical Mythology',
 'book_author': 'Mark P. O. Morford',
 'year_of_publication': 2002,
 'publisher': 'Oxford University Press',
 'summary': 'Provides an introduction to classical myths placing the addressed\ntopics within their historical context, discussion of archaeological\nevidence as support for mythical events, and how these themes have\nbeen portrayed in literature, art, ...',
 'category': "['social science']"}

#### Format data


In [32]:
def generator(df2):
    for c, line in enumerate(df2):
        yield{
            '_index':'books_elk',
            '_type':'_doc',
            '_id':line.get("isbn",None),
            '_source':{
                "title":line.get("book_title",""),
                "author":line.get("book_author",""),
                "summary":line.get("summary",""),
                "category":line.get("category",""),
                "year":line.get("year_of_publication",None),
                "publisher":line.get("publisher",""),
                
                
            }        
    }
    raise StopIteration

In [37]:
custom = generator(df2)

In [38]:
next(custom)

{'_index': 'books_elk',
 '_type': '_doc',
 '_id': '0195153448',
 '_source': {'title': 'Classical Mythology',
  'author': 'Mark P. O. Morford',
  'summary': 'Provides an introduction to classical myths placing the addressed\ntopics within their historical context, discussion of archaeological\nevidence as support for mythical events, and how these themes have\nbeen portrayed in literature, art, ...',
  'category': "['social science']",
  'year': 2002,
  'publisher': 'Oxford University Press'}}

## Upload in ElasticSearch

In [41]:
try:
    res = helpers.bulk( es , generator(df2))
    print("Working")
except Exception as e : 
    pass

## Query

In [42]:
inp = input("Search Book: ")
myquery = {
  "_source":["title"],
  "size":10,"query": {
    "match": {
      "title": inp
    }
  }
}

Search Book: Harry Potter


### Response 

In [43]:
res = es.search (
    index='books_elk',
    size= 5,
    body= myquery  
 )

In [44]:
res

{'took': 3085,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 142, 'relation': 'eq'},
  'max_score': 18.176891,
  'hits': [{'_index': 'books_elk',
    '_type': '_doc',
    '_id': '059035342X',
    '_score': 18.176891,
    '_source': {'title': "Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))"}},
   {'_index': 'books_elk',
    '_type': '_doc',
    '_id': '043965548X',
    '_score': 18.176891,
    '_source': {'title': 'Harry Potter and the Prisoner of Azkaban (Harry Potter)'}},
   {'_index': 'books_elk',
    '_type': '_doc',
    '_id': '1594130027',
    '_score': 17.492256,
    '_source': {'title': 'Harry Potter and the Prisoner of Azkaban (Harry Potter (Paperback))'}},
   {'_index': 'books_elk',
    '_type': '_doc',
    '_id': '031226481X',
    '_score': 17.38835,
    '_source': {'title': 'We Love Harry Potter!'}},
   {'_index': 'books_elk',
    '_type': '_doc',
    '_id': '0970844204',
    '_score'

Clean the received Response 

In [45]:
r=res["hits"].get('hits')
for i in range(len(r)):
    for key in r[i]:
        if key == '_source': 
            print(i+1,":",r[i][key]['title'],'\n')

1 : Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)) 

2 : Harry Potter and the Prisoner of Azkaban (Harry Potter) 

3 : Harry Potter and the Prisoner of Azkaban (Harry Potter (Paperback)) 

4 : We Love Harry Potter! 

5 : The Magical Worlds of Harry Potter 

