### Delete index

In [None]:
import requests

headers = {
        'Content-Type': 'application/json'
}

res=requests.delete('http://localhost:9200/news',headers=headers)#,data=json_body)
print(res.json())

### Create new index with static mapping

In [None]:
import requests

headers = {
        'Content-Type': 'application/json'
}

json_body='''
    {
        "mappings":
            {
              "properties":{
                "url" :{"type":"text"}, 
                "title":{"type":"text","analyzer": "standard"},
                "publishedAt":{"type":"date","format":"yyyy-MM-dd"},
                "source":{"type":"keyword"}
                }
            }
        }
'''

res=requests.put('http://localhost:9200/news',headers=headers,data=json_body)
print(res.json())

### Load data

In [None]:
import pandas as pd
df=pd.read_csv("news.csv",encoding='utf-8',header=[0],index_col=[0])
df.info()

### Indexing a document

In [None]:
for i,r in df.iterrows():
    json_body='{"url":"'+r['url']+'","title":"'+r['title'].replace('"','')+'","publishedAt":"'+r['publishedAt']+'","source":"'+r['source']+'"}'
    res=requests.post('http://localhost:9200/news/_doc',headers=headers,data=json_body.encode('utf-8'))

### Count the number of indexed documents

In [None]:
import requests
import plotly.graph_objects as go

res=requests.get('http://localhost:9200/news/_doc/_count')
print(res.json())

### Search

In [None]:
def query_sigle_term(field,term):
    json_body='{"size":10,"query": {"term": {"'+field+'": "'+term+'"}}}'
    return json_body

json_body=query_sigle_term("title","קורונה")
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
def query_multiple_terms(field,terms):
    json_body='{"size":10,"query": {"terms": {"'+field+'": '+terms+'}}}'
    return json_body

json_body=query_multiple_terms('title','["קורונה","חיסון"]')  
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
def query_range_date(start,end):
    json_body = '{"query": {"range": {"publishedAt": {"gte": "'+start+'","lte": "'+end+'","boost": 2.0}}}}'
    return json_body

json_body=query_range_date('2020-10-10','2020-10-14')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json())

In [None]:
def wildcard_query(field,text):
    json_body='{"query":{"wildcard":{"'+field+'":"'+text+'"}}}'
    return json_body

json_body=wildcard_query('title','*קורונ*')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

### Aggregate query

In [None]:
def aggregate_query():
    json_body='''
        {
      "size": 0,
      "aggs": {
        "group_by_day": {
          "date_histogram": {
            "field": "publishedAt",
            "interval": "day"
          }
        }
      }
}
    '''
    return json_body

json_body=aggregate_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])

In [None]:
def aggregate_field_query():
    json_body='''
        {
      "size": 0,
      "aggs": {
        "group_by_day": {
          "date_histogram": {
            "field": "publishedAt",
            "interval": "day"
          },
          "aggs": {
            "group_by_source": {
              "terms": {
                "field": "source"
              }
            }
          }
        }
      }
}
    '''
    return json_body

json_body=aggregate_field_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])

In [None]:
def aggregate_fields_query(text):
    json_body='{ "size":0,"query":{"match":{"title":{"query":"'+text+'","fuzziness": 2}}}, "aggs":{"dates":{"terms":{"field":"publishedAt"},"aggs": {"sources":{"terms":{"field":"source"}}}}}}'
    return json_body

json_body=aggregate_fields_query("קורונה")
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['aggregations'])

### Highlight query

In [None]:
def highlight_query(field,text):
    json_body='{"query": {"match": { "'+field+'": "'+text+'" }},"highlight": {"fields": {"'+field+'": {}}}}'
    return json_body

json_body=highlight_query('title','קורונה')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json()['hits'])

In [None]:
def suggest_query(field,text):
    json_body='{"query" : {"match": {"'+field+'": "'+text+'"}},"suggest" : {"suggestion" : {"text" : "'+text+'","term" : {"field" : "'+field+'" } }}}'
    return json_body

json_body=suggest_query('description','חגיג')
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
print(res.json())

### Dashboard with Plotly

In [None]:
!pip install chart_studio

In [None]:
import ipywidgets as widgets
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt
from bidi.algorithm import get_display
import requests
import pandas as pd

plot_output = widgets.Output()
number_output = widgets.Output()
pie_output = widgets.Output()
output = widgets.Output()

json_body=aggregate_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
df=pd.DataFrame(data=res.json()['aggregations']['group_by_day']['buckets'])
df['date']=pd.to_datetime(df['key_as_string'])
df['date']=df['date'].dt.date
df=df.sort_values(by='date')

plot_output.clear_output()
with plot_output:
    import plotly.express as px 
    fig = px.bar(df, x="date", y="doc_count", hover_data=['date']) 
    fig.update_layout(bargap=0.1)
    fig.show()
    
with number_output:
    import plotly.graph_objects as go
    fig = go.Figure()
    res=requests.get('http://localhost:9200/news/_doc/_count')
    fig.add_trace(go.Indicator(
        mode = "number",
        value = int(res.json()['count']),
        title = {"text": "Number of Articles"},
        domain = {'row': 0, 'column': 0}))
    fig.show()
    
json_body=aggregate_field_query()
res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
dates=list()
sources=list()
doc_counts=list()
for b in res.json()['aggregations']['group_by_day']['buckets']:
    for source in b['group_by_source']['buckets']:
        dates.append(b['key_as_string'])
        sources.append(get_display(source['key']))
        doc_counts.append(source['doc_count'])
        
df=pd.DataFrame()
df['date']=dates
df['sources']=sources
df['doc_count']=doc_counts
df['date']=pd.to_datetime(df['date'])
df['date']=df['date'].dt.date
df=df.sort_values(by='date')

plot_output.clear_output()
with plot_output:
    import plotly.express as px 
    fig = px.bar(df, x="date", y="doc_count", color="sources", hover_data=['date'], barmode = 'group') 
    fig.update_layout(bargap=0.1)
    fig.show()

with pie_output:
    import plotly.express as px
    fig = px.pie(df, values='doc_count', names='sources', title='')
    fig.update_traces(textposition='outside')
    fig.show()

def btn_eventhandler(obj):
    def query_sigle_term(field,term):
        json_body='{"size":10,"query": {"term": {"'+field+'": "'+term+'"}}}'
        return json_body

    json_body=query_sigle_term("title",search.value)
    res=requests.get('http://localhost:9200/news/_search',headers=headers,data=json_body.encode('utf-8'))
    lst=list()
    for s in res.json()['hits']['hits']:
        lst.append(s['_source'])

    df=pd.DataFrame(data=lst)
    df=df.drop_duplicates(['title'])
    df['title']=df['title'].apply(lambda x:get_display(x))
    df['source']=df['source'].apply(lambda x:get_display(x))
    output.clear_output()
    with output:
        import plotly.graph_objects as go
        fig = go.Figure(data=[go.Table(
            header=dict(values=list(['title','date','source']),
                        align='left'),
            cells=dict(values=[df.title, df.publishedAt, df.source],
                       align='right'))
        ])
        fig.show()


btn = widgets.Button(description='search')
search=widgets.Text(
    value='',
    placeholder='searchbox',
    description='Query text:',
    disabled=False
)

title=widgets.HTML('</br><h1>News Articles Dashboard</h1></br>')
display(title)
numer_widgets= widgets.HBox(
[number_output,pie_output])
display(numer_widgets)
display(plot_output)

input_widgets = widgets.HBox(
[search,btn])
btn.on_click(btn_eventhandler)
display(input_widgets)
display(output)