In [1]:
from folium.plugins import MarkerCluster
import datetime
import wikipedia
import folium
import requests

In [1]:
NYTRoot = 'https://api.nytimes.com/svc/'

In [3]:
def newsWire(returnURI=False, default=True, source=False, section=False, timePeriod=False, limit=False, offset=False, printHeadlines=False, debug=False, report=False):
    date = datetime.datetime.strftime(datetime.datetime.today(), '%A %D %H:%m %p')
    if limit!=False:
        if limit > 20:
            limit = 20
    else:
        limit = 20
    if source != False:
        default=False
    if default==True:
        source = 'all'
        section = 'all'
    if source!=False:
        if section==False:
            section = 'all'
    URI = NYTRoot+'news/v3/content/'+source+'/'+section
    if timePeriod !=False:
        assert type(int(limit))==int,'timePeriod must be a number'
        URI+='/{}'.format(timePeriod)
    URI +='.json?'
    if limit!= False:
        assert type(int(limit))==int,'limit must be a number'
        URI+= '&limit={}'.format(limit)
    if offset!=False:
        URI+='&offset={}'.format(offset)
    URI += '&api-key='+NYTkeys['newsWire']
    if debug == True:
        print(URI)
    if returnURI == True:
        return(URI)
    results = requests.get(URI)
    assert results.ok==True, "Could not complete request.  Possibly no results. Enable debug and check URI."
    results = results.json()
    total = results['num_results']
    if report == True:
        print('New York Times News Wire Query')
        print('Date: {}'.format(date))
        print('Status: {}'.format(results['status']))
        print('Number of results: {}'.format(total))
        if offset==False:
            print('Results: 0 - {}'.format(total))
        else:
            if offset+limit < total:
                print('Results: {} - {}'.format(offset, offset+limit))
            else:
                print('Results: {} - {}'.format(offset, total))
        if printHeadlines == True:
            for article in results['results']:
                print(article['title'])
    return results

def followURLSMap(urls, report=False):
    report = {}
    x=0
    targets = [x['url'] for x in urls if 'url' in x.keys()]
    for i in range(len(targets)):
        #print(targets[i])
        try:
            rArticle = specificArticle(targets[i])
            results = rArticle['results'][0]
            report[x] = results
            x += 1
        except Exception as e:
            #print(targets[i], 'download failed')
            pass
    return report

def fetchAllNewsWireresults(isource='all', isection='all', itimePeriod=120):
    idx = 20
    results = newsWire(source=isource, section=isection, timePeriod=itimePeriod, report=False)
    total = int(results['num_results'])
    paper = results['results']
    while idx <= total:
        chunk = newsWire(source=isource, section=isection, timePeriod=itimePeriod, report=False, offset=idx)
        for article in chunk['results']:
            paper.append(article)
        idx+=20
    print('Total Records Retrieved: {}'.format(total))
    return paper

def topStories(section, report=True, debug=False, returnURI=False, printHeadlines=True):
    '''
    sections are: home, arts, automobiles, books, business, fashion,
    food, health, insider, magazine, movies, national, nyregion,
    obituaries, opinion, politics, realestate, science, sports,
    sundayreview, technology, theater, tmagazine, travel, upshot,
    and world.
    '''
    date = datetime.datetime.strftime(datetime.datetime.today(), '%A %D %H:%m %p')
    URI = NYTRoot+'topstories/v1/'+section+'.json?api-key='+NYTkeys['topStories']
    if debug == True:
        print(URI)
    if returnURI == True:
        return(URI)
    results = requests.get(URI)
    assert results.ok==True, "Could not complete request. Possibly no results. Enable debug and check URI."
    results = results.json()
    total = results['num_results']
    if report == True:
        print('New York Times Top Stories Query')
        print('Date: {}'.format(date))
        print('Status: {}'.format(results['status']))
        print('Number of results: {}'.format(total))
        if printHeadlines == True:
            for article in results['results']:
                print(article['title'])
    return results

def mostPopular(section, pType, pTypeTarget='facebook;twitter', timePeriod=1, report=True, debug=False, returnURI=False, printHeadlines=True):
    '''
    sections are: home, arts, automobiles, books, business, fashion,
    food, health, insider, magazine, movies, national, nyregion,
    obituaries, opinion, politics, realestate, science, sports,
    sundayreview, technology, theater, tmagazine, travel, upshot,
    and world.
    supported time periods are 1,7 and 30
    '''
    date = datetime.datetime.strftime(datetime.datetime.today(), '%A %D %H:%m %p')
    if pType != 'mostshared':
        URI = NYTRoot+'mostpopular/v2/'+pType+'/'+section
    else:
        URI = NYTRoot+'mostpopular/v2/'+pType+'/'+section+'/'+pTypeTarget
    URI+= '/{}'.format(timePeriod)
    URI +='.json?apikey='+NYTkeys['mostPopular']
    if debug == True:
        print(URI)
    if returnURI == True:
        return(URI)
    results = requests.get(URI)
    assert results.ok==True, "Could not complete request. Possibly no results. Enable debug and check URI."
    results = results.json()
    total = results['num_results']
    if report == True:
        print('New York Times Most Popular Stories Query')
        print('Date: {}'.format(date))
        print('Status: {}'.format(results['status']))
        print('Number of results: {}'.format(total))
        if printHeadlines == True:
            for article in results['results']:
                print(article['title'])
    return results

def semanticSearchbyType(concept, specific_concept_name, report=True, debug=False, returnURI=False, printHeadlines=True):
    '''
    concept Types =  {
        'Location':'nytd_geo',
        'Person':'nytd_per',
        'Organization':'nytd_org',
        'Descriptor':'nytd_des',
        }
    '''
    date = datetime.datetime.strftime(datetime.datetime.today(), '%A %D %H:%m %p')
    URI = NYTRoot+'semantic/v2/concept/name/'+concept + '/' + specific_concept_name +'.json?fields=all&api-key='+NYTkeys['topStories']
    if debug == True:
        print(URI)
    if returnURI == True:
        return(URI)
    results = requests.get(URI)
    assert results.ok==True, "Could not complete request. Possibly no results. Enable debug and check URI."
    results = results.json()
    total = results['num_results']
    if report == True:
        print('New York Times Semantics Query')
        print('Date: {}'.format(date))
        print('Status: {}'.format(results['status']))
        print('Number of results: {}'.format(total))
        '''if printHeadlines == True:
            for article in results['results']:
                print(article['title'])'''
    return results

def specificArticle(url, debug=False, returnURI=False):
    date = datetime.datetime.strftime(datetime.datetime.today(), '%A %D %H:%m %p')
    URI = NYTRoot+'news/v3/content.json?url={}'.format(url)
    URI += '&api-key='+NYTkeys['topStories']
    if debug == True:
        print(URI)
    if returnURI == True:
        return(URI)
    results = requests.get(URI)
    assert results.ok==True, "Could not complete request. Possibly no results. Enable debug and check URI."
    results = results.json()
    #total = results['num_results']
    return results

def layer3Relations(layer1_relations, layer2):
    i=0
    layer1_urls = []
    targets = [x['url'] for x in layer1_relations if 'url' in x.keys()]
    new_urls = []
    for x in layer2:
        try:
            urls = layer2[x]['related_urls']
            for x in range(1,len(urls), 2):
                #print(urls[x]['url'])
                if urls[x]['url'] not in targets:
                    if urls[x] not in new_urls:
                        new_urls.append(urls[x])
        except:pass
    return new_urls

def dataLayerFromRelated(urls):
    layer_report = {}
    x = 0
    urls = [x['url'] for x in urls if 'url' in x.keys()]
    for article in urls:
        try:
            #print(article)
            layer_report[x] = specificArticle(article)['results'][0]
            x+=1
        except Exception as e:
            #print(e)
            pass
    return layer_report

def makePopUp(results):
    summary = results['abstract']
    article_template = """<b>Title: </b>{}</br>
    <b>Byline:</b> {}</br>
    <b>Abstract</b>: {}</br>
    <b>link:</b> {}""".format(results['title'],
                                   results['byline'],
                                   results['abstract'],
                                   results['url'])
    if 'multimedia' in results and results['multimedia'] != '':
        media = [x for x in results['multimedia'] if x['type'] == 'image'][-1]
        article_template+="""</br>
         <img src="{}" alt={}></br>
         <b>Caption: </b>{}</br>
         """.format(media['url'], media['copyright'],media['caption'])
    return article_template

def addCluster(layer, cluster, color):
    for article in layer:
        for place in layer[article]['geo_facet']:
            try:
                coordinate = wikipedia.page(place).coordinates
            except:
                continue
            sub_article_template = makePopUp(layer[article])
            secondaryPopUp = folium.Html(sub_article_template, script=True)
            articlePopup = folium.Popup(secondaryPopUp, max_width=450,)
            Marker = folium.Marker(location=coordinate,
                                   popup=articlePopup,
                                   icon = folium.Icon(color = color)
                                  ).add_to(cluster)

def relatedURLS(article, report=False):
    if 'related_urls' in article:
        #print(article)
        try:
            related = article['related_urls']
            facets = {}
            urls = []
            for record in related:
                for item in record:
                    urls.append({item:record[item]})
            for record in article:
                if 'facet' in record:
                    facets[record] = article[record]
        except:
            print('No related URLS, use facets instead')
        return urls, facets

def mapArticleRelations(article):
    center = (39, 34)
    primaryPost = (40.45, -73.59)
    layer1 = specificArticle(article)
    if layer1['results'][0]['geo_facet'] != 0:
        try:
            primaryPost = wikipedia.page(layer1['results'][0]['geo_facet']).coordinates
        except:
            pass
    newsMap = folium.Map(location=primaryPost,tiles='Stamen Watercolor', zoom_start=2)
    layer2Cluster = MarkerCluster(name='Layer 2 Links').add_to(newsMap)
    layer3Cluster = MarkerCluster(name='Layer 3 Links').add_to(newsMap)
    print(layer1['results'][0]['title'])
    print('Primary: Blue')
    print('Layer 2 Relations: Red')
    print('Layer 3 Relations: Green')
    layer1 = layer1['results'][0]
    article_template = makePopUp(layer1)
    primaryPopUp = folium.Html(article_template, script=True)
    popup = folium.Popup(primaryPopUp, max_width=450)
    folium.Marker(primaryPost, popup=popup).add_to(newsMap)
    layer1_relations, layer1_facets = relatedURLS(layer1)
    layer2 = followURLSMap(layer1_relations, report=False)
    print('layer2  {}'.format(len(layer2)))
    layer3_urls = layer3Relations(layer1_relations, layer2)
    layer3 = dataLayerFromRelated(layer3_urls)
    print('layer3 {}'.format(len(layer3)))
    addCluster(layer2, layer2Cluster, color='red')
    addCluster(layer3, layer3Cluster, color='green')
    layer2Cluster.add_to(newsMap)
    layer3Cluster.add_to(newsMap)
    folium.TileLayer('openstreetmap').add_to(newsMap)
    folium.TileLayer('Stamen Toner').add_to(newsMap)
    folium.TileLayer('cartodbpositron').add_to(newsMap)
    folium.LayerControl().add_to(newsMap)
    return newsMap

# 1. Pull News for Topic and TimeFrame
Form -> Topic Selector  
Form -> Time Range  
Form -> News Source Selector(all, iht, nyt)

In [10]:
report = newsWire(source='all', section='World', timePeriod=120, report=False)

In [11]:
report.keys()

dict_keys(['status', 'copyright', 'num_results', 'results'])

In [12]:
report['num_results']

73

# Restrict Results to articles with a Geo Facet

In [10]:
report = [x for x in report['results'] if x['geo_facet'] != '' ]

In [11]:
report = [x for x in report if x['related_urls'] != None]

In [12]:
len(report)

15

In [13]:
report[1]

{'slug_name': '10Mali',
 'section': 'World',
 'subsection': 'Africa',
 'title': 'Massacre in Mali Leaves at Least 95 Dead, Government Says',
 'abstract': 'An attack on a village of the Dogon ethnic group in central Mali is the latest in violence that has displaced tens of thousands of civilians.',
 'url': 'https://www.nytimes.com/2019/06/10/world/africa/mali-terrorism-dogon-fulani.html',
 'byline': 'By THE ASSOCIATED PRESS',
 'thumbnail_standard': '',
 'item_type': 'Article',
 'source': 'The New York Times',
 'updated_date': '2019-06-10T16:02:25-04:00',
 'created_date': '2019-06-10T15:31:32-04:00',
 'published_date': '2019-06-09T20:00:00-04:00',
 'first_published_date': '2019-06-10T15:30:02-04:00',
 'material_type_facet': 'News',
 'kicker': None,
 'subheadline': None,
 'des_facet': ['Terrorism'],
 'org_facet': ['Dan Na Ambassagou'],
 'per_facet': '',
 'geo_facet': ['Mali'],
 'related_urls': [{'suggested_link_text': 'Confronting Mali’s New Jihadist Threat',
   'url': 'https://www.nytime

# Check for Related Articles 
Check report entries and store as Map Target if found

In [14]:
mapTargets = []
for article in report:
    if article['related_urls'] not in ['', None, False, "None"]:
        #print(article['slug_name'])
        #print(article['related_urls'])
        mapTargets.append(article)

In [15]:
len(mapTargets)

15

# Pull Layer One Relations and Facets for Map Targets

In [17]:
mapTargets[3]['title']

'Looking for Free Speech in Russia? Try YouTube'

In [19]:
layer1_relations, layer1_facets = relatedURLS(mapTargets[3])

In [20]:
layer1_facets

{'material_type_facet': 'News',
 'des_facet': '',
 'org_facet': ['YouTube.com',
  'News and News Media',
  'Propaganda',
  'Censorship'],
 'per_facet': ['Putin, Vladimir V',
  'Navalny, Aleksei A',
  'Medvedev, Dmitri A',
  'Tolokonnikova, Nadezhda'],
 'geo_facet': ['Russia']}

In [21]:
layer1_relations

[{'suggested_link_text': 'In Russia, Political Criticism Is a 4-Letter Word (and a $470 Fine)'},
 {'url': 'https://www.nytimes.com/2019/04/24/world/europe/russia-putin-criticism-law.html'},
 {'suggested_link_text': 'Kremlin Moves Toward Control of Internet, Raising Censorship Fears'},
 {'url': 'https://www.nytimes.com/2019/04/11/world/europe/russia-internet-censorship.html'},
 {'suggested_link_text': 'Punk Riffs Take on God and Putin'},
 {'url': 'https://www.nytimes.com/2012/03/21/world/europe/21iht-letter21.html'}]

In [194]:
layer1 = []
for article in mapTargets:
    layer1.append(relatedURLS(article))

# Collect Layer Two Data

In [197]:
layer2_data = []

#### TODO - Fix broken URL Downloads

In [203]:
for article_group in layer1:
    layer2_data.append(followURLSMap(article_group[0], report=False))

https://www.nytimes.com/interactive/2018/06/27/world/europe/europe-migrant-crisis-change.html download failed
https://www.nytimes.com/2018/06/29/world/europe/us-migrant-crisis.html download failed
https://www.nytimes.com/2016/02/03/world/middleeast/saudi-court-spares-poets-life-but-gives-him-8-years-and-800-lashes.html download failed
https://www.nytimes.com/2016/12/06/world/middleeast/saudi-arabia-iran-spying-trial.html download failed
https://www.nytimes.com/2016/01/05/world/middleeast/fewer-countries-use-death-penalty-but-death-sentences-surge.html download failed
https://www.nytimes.com/2019/04/11/world/europe/russia-internet-censorship.html download failed
https://www.nytimes.com/2012/03/21/world/europe/21iht-letter21.html download failed
https://www.nytimes.com/2015/05/22/world/europe/moldova-eyes-russias-embrace-as-flirtation-with-europe-fades.html download failed
https://www.nytimes.com/2016/01/26/world/europe/oppositions-groups-in-moldova-unite-to-protest-new-government.html d

In [204]:
layer1[1][0]

[{'suggested_link_text': 'Saudi Court Spares Poet’s Life but Gives Him 8 Years and 800 Lashes'},
 {'url': 'https://www.nytimes.com/2016/02/03/world/middleeast/saudi-court-spares-poets-life-but-gives-him-8-years-and-800-lashes.html'},
 {'suggested_link_text': 'Saudi Arabia Executes 37 in One Day for Terrorism'},
 {'url': 'https://www.nytimes.com/2019/04/23/world/middleeast/saudi-arabia-executions.html'},
 {'suggested_link_text': 'Saudi Court Orders Executions for 15 Accused of Spying for Iran'},
 {'url': 'https://www.nytimes.com/2016/12/06/world/middleeast/saudi-arabia-iran-spying-trial.html'},
 {'suggested_link_text': 'Death Sentences Surge, Even as More Countries Drop Capital Punishment'},
 {'url': 'https://www.nytimes.com/2016/01/05/world/middleeast/fewer-countries-use-death-penalty-but-death-sentences-surge.html'}]

In [205]:
len(layer2_data[0])

2

In [206]:
layer2_data

[{0: {'slug_name': '30central-america',
   'section': 'World',
   'subsection': 'Americas',
   'title': 'Trump Turns U.S. Policy in Central America on Its Head',
   'abstract': 'His plan to cut aid to Guatemala, Honduras and El Salvador is “shooting yourself in the foot,” said one human rights worker. Mexico has urged more funding for development.',
   'url': 'https://www.nytimes.com/2019/03/30/world/americas/trump-turns-us-policy-in-central-america-on-its-head.html',
   'byline': 'By ELISABETH MALKIN',
   'item_type': 'Article',
   'source': 'The New York Times',
   'updated_date': '2019-04-24T16:43:44-04:00',
   'created_date': '2019-03-30T20:35:18-04:00',
   'published_date': '2019-03-29T20:00:00-04:00',
   'first_published_date': '2019-03-30T20:33:48-04:00',
   'material_type_facet': 'News',
   'kicker': None,
   'subheadline': None,
   'des_facet': ['United States Politics and Government',
    'Humanitarian Aid',
    'Gangs',
    'Immigration and Emigration'],
   'org_facet': ['Un

# 3 Collect Layer 3 urls
##### TODO Troubleshoot layer 3 urls to map are not working correctly

In [210]:
layer3_urls = []
for x in range(len(layer1)):
    layer3_urls.append(layer3Relations(layer1[x][0], layer2_data[x]))

In [211]:
layer3_urls

[[{'suggested_link_text': 'Spring Brings Surge of Migrants, Stretching Border Facilities Far Beyond Capacity',
   'url': 'https://www.nytimes.com/2019/03/30/us/border-migrants-el-paso-bridge-spring-surge.html'},
  {'suggested_link_text': 'Trump’s Surprising New Ally in Mexico? The Government',
   'url': 'https://www.nytimes.com/2019/03/01/world/americas/mexico-migration-trump.html'}],
 [{'suggested_link_text': 'Iranian Protesters Ransack Saudi Embassy After Execution of Shiite Cleric',
   'url': 'https://www.nytimes.com/2016/01/03/world/middleeast/saudi-arabia-executes-47-sheikh-nimr-shiite-cleric.html'}],
 [{'suggested_link_text': 'Havoc in Hong Kong Legislature Over Extradition Bill',
   'url': 'https://www.nytimes.com/2019/05/11/world/asia/hong-kong-extradition-law.html'}],
 [{'suggested_link_text': 'New Prison Rule in Russia: No Swearing',
   'url': 'https://www.nytimes.com/2016/01/15/world/europe/russia-bans-foul-language-in-some-prisons-no-word-on-enforcement.html'}],
 [],
 [],
 

# Collect Layer 3 Data

In [218]:
layer3_data = []
for article_group in range(len(layer3_urls)):
    layer3_data.append(dataLayerFromRelated(layer3_urls[article_group]))

https://www.nytimes.com/2019/03/30/us/border-migrants-el-paso-bridge-spring-surge.html
https://www.nytimes.com/2019/03/01/world/americas/mexico-migration-trump.html
https://www.nytimes.com/2016/01/03/world/middleeast/saudi-arabia-executes-47-sheikh-nimr-shiite-cleric.html
https://www.nytimes.com/2019/05/11/world/asia/hong-kong-extradition-law.html
https://www.nytimes.com/2016/01/15/world/europe/russia-bans-foul-language-in-some-prisons-no-word-on-enforcement.html


In [219]:
layer3_data[0]

{0: {'slug_name': '30SPRINGSURGE',
  'section': 'U.S.',
  'subsection': '',
  'title': 'Spring Brings Surge of Migrants, Stretching Border Facilities Far Beyond Capacity',
  'abstract': 'Migration along the southern border typically increases in the spring, but authorities said the current numbers have reached “unmanageable” levels.',
  'url': 'https://www.nytimes.com/2019/03/30/us/border-migrants-el-paso-bridge-spring-surge.html',
  'byline': 'By MIRIAM JORDAN and SIMON ROMERO',
  'item_type': 'Article',
  'source': 'The New York Times',
  'updated_date': '2019-03-31T12:59:26-04:00',
  'created_date': '2019-03-30T11:51:32-04:00',
  'published_date': '2019-03-29T20:00:00-04:00',
  'first_published_date': '2019-03-30T11:50:02-04:00',
  'material_type_facet': 'News',
  'kicker': None,
  'subheadline': None,
  'des_facet': ['Illegal Immigration',
   'Immigration and Emigration',
   'Immigration Detention'],
  'org_facet': ['Border Patrol (US)',
   'Customs and Border Protection (US)',
   

In [220]:
layer3_data[1]

{}

In [221]:
layer3_data

[{0: {'slug_name': '30SPRINGSURGE',
   'section': 'U.S.',
   'subsection': '',
   'title': 'Spring Brings Surge of Migrants, Stretching Border Facilities Far Beyond Capacity',
   'abstract': 'Migration along the southern border typically increases in the spring, but authorities said the current numbers have reached “unmanageable” levels.',
   'url': 'https://www.nytimes.com/2019/03/30/us/border-migrants-el-paso-bridge-spring-surge.html',
   'byline': 'By MIRIAM JORDAN and SIMON ROMERO',
   'item_type': 'Article',
   'source': 'The New York Times',
   'updated_date': '2019-03-31T12:59:26-04:00',
   'created_date': '2019-03-30T11:51:32-04:00',
   'published_date': '2019-03-29T20:00:00-04:00',
   'first_published_date': '2019-03-30T11:50:02-04:00',
   'material_type_facet': 'News',
   'kicker': None,
   'subheadline': None,
   'des_facet': ['Illegal Immigration',
    'Immigration and Emigration',
    'Immigration Detention'],
   'org_facet': ['Border Patrol (US)',
    'Customs and Border 

# Render Map with Article Groups as Layers

In [21]:
import wikipedia

In [26]:
mapTargets[1]

{'slug_name': '10Mali',
 'section': 'World',
 'subsection': 'Africa',
 'title': 'Massacre in Mali Leaves at Least 95 Dead, Government Says',
 'abstract': 'An attack on a village of the Dogon ethnic group in central Mali is the latest in violence that has displaced tens of thousands of civilians.',
 'url': 'https://www.nytimes.com/2019/06/10/world/africa/mali-terrorism-dogon-fulani.html',
 'byline': 'By THE ASSOCIATED PRESS',
 'thumbnail_standard': '',
 'item_type': 'Article',
 'source': 'The New York Times',
 'updated_date': '2019-06-10T16:02:25-04:00',
 'created_date': '2019-06-10T15:31:32-04:00',
 'published_date': '2019-06-09T20:00:00-04:00',
 'first_published_date': '2019-06-10T15:30:02-04:00',
 'material_type_facet': 'News',
 'kicker': None,
 'subheadline': None,
 'des_facet': ['Terrorism'],
 'org_facet': ['Dan Na Ambassagou'],
 'per_facet': '',
 'geo_facet': ['Mali'],
 'related_urls': [{'suggested_link_text': 'Confronting Mali’s New Jihadist Threat',
   'url': 'https://www.nytime

In [33]:
news_map = mapArticleRelations(mapTargets[1]['url'])

Massacre in Mali Leaves at Least 95 Dead, Government Says
Primary: Blue
Layer 2 Relations: Red
Layer 3 Relations: Green
layer2  0
layer3 0


#### TODO Save multiple versions of the map so that the web server can refresh the map to simulate streaming data

In [34]:
news_map