## Example Python script for downloading articles from the Event Registry database

As a first step we import all necessary libraries and define a search query.

In [None]:
import csv, os
from eventregistry import *
import datetime

#Please register on eventregistry.org and update the API Key.
er = EventRegistry(apiKey = "PUT YOUR API KEY HERE")

## TWO ALTERNATIVES
#1) QUERY BASED ON ARTICLE CATEGORIES
q = QueryArticlesIter(conceptUri = QueryItems.OR([er.getConceptUri("Politics"),er.getConceptUri("Election"),er.getConceptUri("Referendum"),er.getConceptUri("Elections in Italy"),\
                      er.getConceptUri("Political party"),er.getConceptUri("Political campaign"),er.getConceptUri("Referendums in Italy"),er.getConceptUri("Politician"),er.getConceptUri("General election")]),\
                      dateStart = datetime.date(2016, 10, 3), dateEnd = datetime.date(2016, 12, 4),\
                      isDuplicateFilter = "skipDuplicates",\
                      lang = "ita")

#2) QUERY BASED ON KEYWORDS CONTAINED IN THE ARTICLE TEXT
q = QueryArticlesIter(keywords =QueryItems.AND(["salvini", "carola"]), \
                      dateStart = datetime.date(2019, 7, 1), dateEnd = datetime.date(2019, 7, 19),\
                      isDuplicateFilter = "skipDuplicates",\
                      lang = "ita")
print(q)

Now we can run the search query and save the data in a csv file.

In [None]:
#Create a CSV file
ff = open("C:/Users/YOUR USER NAME/Desktop/XXX.csv",'w', encoding='utf8', newline='')
wr = csv.writer(ff, delimiter = '|', quotechar = '^')
wr.writerow(['Title', 'Body', 'Date', 'Language', 'Source', 'Category', 'Concepts', 'Sentiment', 'IsDuplicate', 'Url'])

#Sort articles by date and define their max number (multiples of 100)
for art in q.execQuery(er, sortBy = "date", maxItems = 10000):  
    if 'title' in art.keys():
        title = art['title']
        title = title.replace(",", " ").replace('"', " ").replace('\r', ' ').replace('\n', ' ')
    else:
        title = ""
    
    if 'body' in art.keys():
        body = art['body']
        body = body.replace(",", " ").replace('"', " ").replace('\r', ' ').replace('\n', ' ')
    else:
        body = ""
        
    if 'date' in art.keys():
        date = art['date']
    else:
        date = ""
        
    if 'source' in art.keys() and 'title' in art['source'].keys():
        source = art['source']['title']
        source = source.replace(",", " ").replace('"', " ").replace('\r', ' ').replace('\n', ' ')
    else:
        source = ""
        
    if 'lang' in art.keys():
        lang = art['lang']
    else:
        lang = ""
        
    if 'categories' in art.keys():
        cat = art['categories']
        cat = cat.replace(",", "//").replace('"', " ").replace('\r', ' ').replace('\n', ' ')
    else:
        cat = ""
        
    if 'concepts' in art.keys():
        concept = art['concepts']
        concept = concept.replace(",", "//").replace('"', " ").replace('\r', ' ').replace('\n', ' ')
    else:
        concept = ""
        
    if 'sentiment' in art.keys():
        sent = art['sentiment']
    else:
        sent = ""
    
    newline = [title, body, date, lang, source, cat, concept, sent, str(art['isDuplicate']), art['url'] ]
    wr.writerow(newline)
    ff.flush()
    
ff.close()
print("Done!")