### Imports

In [72]:
import json
import os
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q

### Elastic Search Setup

In [73]:
def index(article, search_engine, index_name, article_id):
    search_engine.index(index = index_name, doc_type = "article",id = article_id, body = article)

def file_index(file_name):
    return int(file_name.split('-')[0])

def build_index(root_path, n, search_engine):
     for root, dirs, files in os.walk(root_path):
         for file in files[:n]:
            if file.endswith('.json'):
                with open(os.path.join(root, file), 'r') as f:
                     article = json.load(f)
                     index(article, search_engine, "scrapped", file_index(file))



es = Elasticsearch()
build_index("articles", 500, es)

### Print a sample index entry.

In [74]:
sample_article = es.get(index = "scrapped", id = 0)
print(json.dumps(sample_article['_source'], indent = 2))

{
  "title": "Pope Francis: Mass Is for Lifting Hearts, Not Cellphones",
  "authors": [
    "Megan Specia"
  ],
  "url": "https://www.nytimes.com/2017/11/08/world/europe/pope-phones-mass.html",
  "body": "Photo\n\nPope Francis has a message for Catholics: Put down your smartphone during Mass.\n\nDuring a general audience at St. Peter\u2019s Square in Vatican City on Wednesday morning, Francis chastised Catholics who use their phones during Mass.\n\n\u201cAt some point, the priest during the Mass says, \u2018Lift up your hearts,\u2019\u201d the pontiff said. \u201cHe does not say, \u2018Lift up your cellphones to take pictures.\u2019\u201d\n\nThe remarks drew applause, particularly after Francis called the use of phones during services a \u201cvery ugly thing.\u201d\n\nGeneral audiences, held regularly on Wednesdays, usually consist of short teachings and readings. While the rest of Francis\u2019s message was focused on other matters, he was clear on the use of cellphones at Mass.\n\nAd

## Queries Examples

### 1. Search for articles containing a word in their body.

In [75]:
s = Search(using = es, index = "scrapped") \
    .query("match", body = "supporters")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

#Hits 10
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’
234. ISIS Claims Responsibility, Calling Paris Attacks ‘First of the Storm’
66. Le Pen Calls Parties in France ‘Completely Rotten’ as They Unite to Fend Her Off
131. Voices From Europe’s Far Right
245. Jeremy Corbyn, Leader of Britain’s Labour Party, Loses No-Confidence Vote
70. After French Vote, a Question: How Were the Polls So Right?
35. François Fillon, French Presidential Candidate, Vows to Run Despite Inquiry
122. With Jeremy Corbyn Elected as New Leader, Britain’s Labour Party Takes a Hard Left Turn
133. Emmanuel Macron, French Economy Minister, Hints at Presidential Run
45. British Elections Pose Test for Jeremy Corbyn, Labour Party Leader


### 2. Search for articles containing a word in their title.

In [76]:
s = Search(using = es, index = "scrapped") \
    .query("match", title = "Twitter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

#Hits 2
15. Marine Le Pen Loses Parliamentary Immunity Over Twitter Case
41. Parliament Asks Twitter About Russian Meddling in Brexit Vote


### 3. Search for articles written by a specific author.

In [77]:
s = Search(using = es, index = "scrapped") \
    .query("match", authors = "Adam Nossiter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title, "--by--" , hit.authors)

#Hits 10
106. An Unlikely Contender Rises in France as the Antithesis of Trump --by-- ['Adam Nossiter']
15. Marine Le Pen Loses Parliamentary Immunity Over Twitter Case --by-- ['Adam Nossiter']
159. In France, the Mood Darkens as a Harsh Reality Sets In --by-- ['Adam Nossiter']
175. A New Generation’s Anger Resounds From a Packed Plaza in Paris --by-- ['Adam Nossiter']
35. François Fillon, French Presidential Candidate, Vows to Run Despite Inquiry --by-- ['Adam Nossiter']
44. Le Pen Loses Luster, Signaling Far Right’s Retreat in France, and Maybe Beyond --by-- ['Adam Nossiter']
48. Le Pen’s Inner Circle Fuels Doubt About Bid to ‘Un-Demonize’ Her Party --by-- ['Adam Nossiter']
130. Nicolas Sarkozy Mounts What He Hopes Is a Comeback in France --by-- ['Adam Nossiter']
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’ --by-- ['Adam Nossiter']
29. Marine Le Pen May Get a Lift From an Unlikely Source: The Far Left --by-- ['Adam Nossiter']


### 4. Search for a fuzzy query.

In [78]:
s = Search(using = es, index = "scrapped") \
    .query("fuzzy", body = "sapporters")
response = s.execute()

print("Fuzzy Query")
print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)
    
#Compare it with a normal match query.
s = Search(using = es, index = "nyt") \
    .query("match", body = "sapporters")
response = s.execute()

print("\nMatch Query")
print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

Fuzzy Query
#Hits 10
122. With Jeremy Corbyn Elected as New Leader, Britain’s Labour Party Takes a Hard Left Turn
106. An Unlikely Contender Rises in France as the Antithesis of Trump
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’
234. ISIS Claims Responsibility, Calling Paris Attacks ‘First of the Storm’
66. Le Pen Calls Parties in France ‘Completely Rotten’ as They Unite to Fend Her Off
59. Once in the Shadows, Europe’s Neo-Fascists Are Re-emerging
131. Voices From Europe’s Far Right
245. Jeremy Corbyn, Leader of Britain’s Labour Party, Loses No-Confidence Vote
70. After French Vote, a Question: How Were the Polls So Right?
35. François Fillon, French Presidential Candidate, Vows to Run Despite Inquiry

Match Query
#Hits 0


### 5. Search for a query on two fields.

In [79]:
s = Search(using = es, index = "scrapped") \
    .query("match", body = "supporters") \
    .query("match", authors = "Adam Nossiter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title, "--by--" , hit.authors)

#Hits 7
35. François Fillon, French Presidential Candidate, Vows to Run Despite Inquiry --by-- ['Adam Nossiter']
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’ --by-- ['Adam Nossiter']
106. An Unlikely Contender Rises in France as the Antithesis of Trump --by-- ['Adam Nossiter']
29. Marine Le Pen May Get a Lift From an Unlikely Source: The Far Left --by-- ['Adam Nossiter']
66. Le Pen Calls Parties in France ‘Completely Rotten’ as They Unite to Fend Her Off --by-- ['Aurelien Breeden', 'Adam Nossiter']
133. Emmanuel Macron, French Economy Minister, Hints at Presidential Run --by-- ['Adam Nossiter']
152. Emmanuel Macron, Eyeing French Presidency, Resigns as Economy Minister --by-- ['Adam Nossiter']
