### Delete index

In [None]:
import requests

headers = {
        'Content-Type': 'application/json'
}

res=requests.delete('http://localhost:9200/news',headers=headers)#,data=json_body)
print(res.json())

### Create new index with static mapping

In [None]:
import requests

headers = {
        'Content-Type': 'application/json'
}

json_body='''
    {
        "mappings":
            {
              "properties":{
                "url" :{"type":"text"}, 
                "title":{"type":"text","analyzer": "standard"},
                "description":{"type" : "text","analyzer": "standard"},  
                "author":{"type":"text"},
                "publishedAt":{"type":"date","format":"yyyy-MM-dd HH:mm:ss"},
                "source":{"type":"keyword"}
                }
            }
        }
'''

res=requests.put('http://localhost:9200/news',headers=headers,data=json_body)
print(res.json())

### Insert news articles to index

In [None]:
import pandas as pd
filename="news.csv"
df=pd.read_csv(filename,encoding='utf-8',header=[0],index_col=[0])

In [None]:
df['publishedAt']=df['publishedAt'].apply(lambda x:x.split('.')[0])

In [None]:
df.info()

In [None]:
df.tail()

### Indexing a document

In [None]:
for i in df.index:
    print(i)
    json_body=df.loc[i].to_json()
    res=requests.post('http://localhost:9200/news/_doc',headers=headers,data=json_body)
    print(res.json())

### Count the number of indexed documents

In [None]:
import requests

res=requests.get('http://localhost:9200/news/_doc/_count')
print(res.json())

### Search

In [None]:
def search_text(text):
    json_body='{"query":{"query_string":{"query":"'+text+'"}}}'
    return json_body

def search_match_field(field,text):
    json_body='{"query":{"match":{"'+field+'":"'+text+'"}}}'
    return json_body

def search_match_field_wildcard(field,text):
    json_body='{"query":{"wildcard":{"'+field+'":"'+text+'"}}}'
    return json_body

def search_multi_match_fields(fields,text):
    json_body='{"query":{ "multi_match":{"query":"'+text+'","fields":'+fields+'}}}'
    print(json_body)
    return json_body

def search_query_range_date(start,end):
    json_body = '{"query": {"range": {"date": {"gte": "'+start+'","lte": "'+end+'","boost": 2.0}}}}'
    return json_body

def search_match_range_date(start,end,field,text):
    json_body = '{"query":{"bool":{"must":[{"match":{"'+field+'":"'+text+'"}},{"range":{"date":{"gte": "'+start+'","lte":"'+end+'"}}}]}}}'
    return json_body

def search_fuzzy_match(field,text,operator,fuzziness):
    json_body='{"query":{"match":{"'+field+'":{"query":"'+text+'","operator":"'+operator+'","fuzziness":'+fuzziness+'}}}}'
    return json_body

In [None]:
json_body=search_text("קורונה")
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_match_field("title","קורונה")
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_multi_match_fields('["title","description"]',"סגר")
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_query_range_date('2020-10-10','2020-10-12')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_match_range_date('2020-10-10','2020-10-12','title','חיסון')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_match_field_wildcard('title','*משכנ*')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
json_body=search_fuzzy_match('title','מגפה','and','2')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

### Aggregate query

In [None]:
def aggregate_query():
    json_body='{"size":0,"aggs":{"dates":{"terms":{"field":"publishedAt"}}}}'
    return json_body

json_body=aggrete_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])

In [None]:
def aggregate_field_query():
    json_body='{ "size":0, "aggs":{"dates":{"terms":{"field":"publishedAt"},"aggs": {"sources":{"terms":{"field":"source"}}}}}}'
    return json_body

json_body=aggrete_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])


In [None]:
def aggregate_fields_quer():
    json_body='{ "size":0,"query":{"match":{"title":{"query":"בנק ישראל","fuzziness": 2}}}, "aggs":{"dates":{"terms":{"field":"publishedAt"},"aggs": {"sources":{"terms":{"field":"source"}}}}}}'
    return json_body

json_body=aggrete_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])