### Imports

In [72]:
import json
import os
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q

### Elastic Search Setup

In [73]:
def index(article, search_engine, index_name, article_id):
    search_engine.index(index = index_name, doc_type = "article",id = article_id, body = article)

def file_index(file_name):
    return int(file_name.split('-')[0])

def build_index(root_path, n, search_engine):
     for root, dirs, files in os.walk(root_path):
         for file in files[:n]:
            if file.endswith('.json'):
                with open(os.path.join(root, file), 'r') as f:
                     article = json.load(f)
                     index(article, search_engine, "nyt", file_index(file))



es = Elasticsearch()
build_index("articles", 500, es)

### 1. Search for articles containing a word in their body.

In [74]:
s = Search(using = es, index = "nyt") \
    .query("match", body = "supporters")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

#Hits 10
193. For Marine Le Pen, Migration Is a Ready-Made Issue
154. Emmanuel Macron, French Economy Minister, Hints at Presidential Run
293. ISIS Claims Responsibility, Calling Paris Attacks ‘First of the Storm’
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’
495. Big Anti-Immigration Rally in Germany Prompts Counterdemonstrations
139. Divisive Election in Spain’s Catalonia Gives Win to Separatist Parties
245. BBC Journalist Turned Away From Flight to U.S. Because She Was Born in Iran
454. German Who Posed as Hitler Returns to Position in Anti-Immigrant Group Pegida
441. Anti-Immigration Rallies in Germany Defy Calls to Desist
138. Martin Schulz, Merkel Rival, Wins His Party’s Nomination With 100 Percent of Vote


### 2. Search for articles containing a word in their title.

In [75]:
s = Search(using = es, index = "nyt") \
    .query("match", title = "Twitter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

#Hits 4
13. Marine Le Pen Loses Parliamentary Immunity Over Twitter Case
200. ‘Brexit’ Debate Even Divides Cats (and Dogs) on Twitter
220. Twitter Cats to the Rescue in Brussels Lockdown
505. Murdoch and Fox News Mocked on Twitter for Claims About Muslims


### 3. Search for articles written by a specific author.

In [76]:
s = Search(using = es, index = "nyt") \
    .query("match", authors = "Adam Nossiter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title, "--by--" , hit.authors)

#Hits 10
70. François Fillon Blames ‘Media Lynching’ for His Campaign Crisis --by-- ['Adam Nossiter']
12. Le Pen and Macron Clash in Vicious Presidential Debate in France --by-- ['Adam Nossiter']
142. Emmanuel Macron, French Economy Minister, Hints at Presidential Run --by-- ['Adam Nossiter']
55. François Fillon, French Presidential Candidate, Faces Formal Investigation --by-- ['Adam Nossiter']
106. Nicolas Sarkozy Mounts What He Hopes Is a Comeback in France --by-- ['Adam Nossiter']
210. A Belated Welcome in France Is Drawing Few Migrants --by-- ['Adam Nossiter']
1. Marine Le Pen Loses French Parliamentary Immunity Over Tweets --by-- ['Adam Nossiter']
13. Marine Le Pen Loses Parliamentary Immunity Over Twitter Case --by-- ['Adam Nossiter']
79. A Candidate Rises on Vows to Control Islam and Immigration. This Time in France. --by-- ['Adam Nossiter']
123. Emmanuel Macron, Eyeing French Presidency, Resigns as Economy Minister --by-- ['Adam Nossiter']


### 4. Search for a fuzzy query.

In [77]:
s = Search(using = es, index = "nyt") \
    .query("fuzzy", body = "sapporters")
response = s.execute()

print("Fuzzy Query")
print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)
    
#Compare it with a normal match query.
s = Search(using = es, index = "nyt") \
    .query("match", body = "sapporters")
response = s.execute()

print("\nMatch Query")
print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title)

Fuzzy Query
#Hits 10
193. For Marine Le Pen, Migration Is a Ready-Made Issue
154. Emmanuel Macron, French Economy Minister, Hints at Presidential Run
293. ISIS Claims Responsibility, Calling Paris Attacks ‘First of the Storm’
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’
495. Big Anti-Immigration Rally in Germany Prompts Counterdemonstrations
139. Divisive Election in Spain’s Catalonia Gives Win to Separatist Parties
245. BBC Journalist Turned Away From Flight to U.S. Because She Was Born in Iran
454. German Who Posed as Hitler Returns to Position in Anti-Immigrant Group Pegida
441. Anti-Immigration Rallies in Germany Defy Calls to Desist
138. Martin Schulz, Merkel Rival, Wins His Party’s Nomination With 100 Percent of Vote

Match Query
#Hits 0


### 5. Search for a query on two fields.

In [78]:
s = Search(using = es, index = "nyt") \
    .query("match", body = "supporters") \
    .query("match", authors = "Adam Nossiter")
response = s.execute()

print("#Hits", len(response))
for hit in response:
    print(hit.meta.id + ".", hit.title, "--by--" , hit.authors)

#Hits 7
142. Emmanuel Macron, French Economy Minister, Hints at Presidential Run --by-- ['Adam Nossiter']
193. For Marine Le Pen, Migration Is a Ready-Made Issue --by-- ['Adam Nossiter']
154. Emmanuel Macron, French Economy Minister, Hints at Presidential Run --by-- ['Adam Nossiter']
14. Marine Le Pen Leads Far-Right Fight to Make France ‘More French’ --by-- ['Adam Nossiter']
55. François Fillon, French Presidential Candidate, Faces Formal Investigation --by-- ['Adam Nossiter']
79. A Candidate Rises on Vows to Control Islam and Immigration. This Time in France. --by-- ['Adam Nossiter']
123. Emmanuel Macron, Eyeing French Presidency, Resigns as Economy Minister --by-- ['Adam Nossiter']
