In [9]:
from eventregistry import *
import datetime
from time import strftime
import pickle

In [10]:
er = EventRegistry()

found apiKey in settings file which will be used for making requests
Event Registry host: http://eventregistry.org
Text analytics host: http://analytics.eventregistry.org


### Get List of URIs Corresponding to Top 20 US News Sites per Alexa Global Rank (as of 1/12/2019)

In [11]:
def getUri (site_list):
    uris = []
    for site in site_list:
        uri = er.getNewsSourceUri(site)
        uris.append(uri)
    return uris       

In [12]:
site_list = ('CNN.com', 'Nytimes', 'theguardian', 'news.yahoo.com', 'foxnews', 'washingtonpost', 'forbes',
             'huffingtonpost', 'cnbc', 'usatoday', 'bloomberg', 'wsj', 'reuters', 'nbcnews', 'nypost',
             'usnews', 'thehill', 'cbsnews', 'time.com', 'thedailybeast')

In [13]:
uri_list = getUri(site_list)

### Get News Articles from the EventRegistry API (Note: API Key in Settings File, See Documentation

In [15]:
#Function to create dictionary of first and last days of each week

def weekdict (start_date, no_weeks):
    dictionary = {}
    i = 0
    delta = datetime.timedelta(days=7)
    first_of_week = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    last_of_week = first_of_week + datetime.timedelta(days=6)
    for x in range (no_weeks):
        dictionary[i] = {}
        dictionary[i]['first'] = first_of_week.strftime('%Y-%m-%d')
        dictionary[i]['last'] = last_of_week.strftime('%Y-%m-%d')
        first_of_week += delta
        last_of_week += delta
        i += 1
    return dictionary    

In [16]:
#Helper function to save pickle files

def picklesave(obj, file_name):
    with open('../data/'+file_name+'.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)    

In [17]:
#Function to return top weekly news articles for each keyword from the top news sites, ranked by relevance

def top_by_source(week_dict, pages, keyword, source_uri_list):
    for i in range(len(week_dict)):
        for page_num in range(1, pages+1):
            file_name = keyword+'_'+week_dict[i]['first']+'_page_'+str(page_num)+'_by_Source'
            q = QueryArticles(keywords = keyword, keywordsLoc = 'title', categoryUri = er.getCategoryUri('Politics'),
                              sourceLocationUri = er.getLocationUri('United States'), lang = 'eng', 
                              sourceUri = QueryItems.OR(source_uri_list), dateStart=week_dict[i]['first'], 
                              dateEnd=week_dict[i]['last'], dataType = 'news', isDuplicateFilter = 'skipDuplicates')
            
            q.setRequestedResult(RequestArticlesInfo(page = page_num, sortBy = "rel",
                    returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(socialScore= True, sentiment=True))))
            
            ret = er.execQuery(q)
            picklesave(ret, file_name)

In [18]:
#Function to return top weekly news articles for each keyword according to total facebook shares

def top_by_shares(week_dict, keyword):
    for i in range(len(week_dict)):
        file_name = keyword+'_'+week_dict[i]['first']+'_'+'by_Shares'
        q = QueryArticles(keywords = keyword, keywordsLoc = 'title', categoryUri = er.getCategoryUri('Politics'),
                          sourceLocationUri = er.getLocationUri('United States'), lang = 'eng', 
                          dateStart=week_dict[i]['first'], dateEnd=week_dict[i]['last'], dataType = 'news', 
                          isDuplicateFilter = 'skipDuplicates')
        
        q.setRequestedResult(RequestArticlesInfo(page = 1, sortBy = "facebookShares",
                returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(socialScore= True, sentiment=True))))
        
        ret = er.execQuery(q)
        picklesave(ret, file_name)

In [43]:
weeks = weekdict('2016-01-03', 45)

In [62]:
#top_by_source(week_dict = weeks, pages = 3, keyword = 'Trump', source_uri_list = uri_list)

In [63]:
#top_by_shares(week_dict = weeks, keyword = 'Trump')

In [64]:
#top_by_source(week_dict = weeks, pages = 3, keyword = 'Clinton', source_uri_list = uri_list)

In [65]:
#top_by_shares(week_dict = weeks, keyword = 'Clinton')