#Project Fletcher
Description: Getting New York Times articles to make personalized article suggestions

Sources:
* http://dlab.berkeley.edu/blog/scraping-new-york-times-articles-python-tutorial
* http://developer.nytimes.com/docs/read/article_search_api_v2
* http://brooksandrew.github.io/simpleblog/articles/new-york-times-api-to-mongodb/
* https://docs.mongodb.org/getting-started/python/insert/
* http://open.blogs.nytimes.com/2015/08/11/building-the-next-new-york-times-recommendation-engine/

KEY: 888f546089cc789d146a2d70b4f2c804:9:74609839

In [3]:
api_key = '888f546089cc789d146a2d70b4f2c804:9:74609839'

####IMPORTS

In [1]:
from nytimesarticle import articleAPI
from pymongo import MongoClient
from gensim import corpora, models, similarities
from gensim.matutils import Sparse2Corpus
from sklearn.feature_extraction.text import CountVectorizer
from datetime import datetime
import requests

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

####FUNCTIONS

Build_query helps build the API query to fetch metadata on news articles from the New York Times

In [4]:
def build_query(year,key,page):
    base  = 'http://api.nytimes.com/svc/search/v2/articlesearch.json?'
    filtr = 'fq=source:("The+New+York+Times")'
    start = '&begin_date=' + str(year) + '0101'
    end = '&end_date=' + str(year) + '1231'
    pages = '&page=' + str(page)
    query = base + filtr + start + end + pages + '&sort=oldest&api-key=' + key
    return query

Get_articles returns in an array all the articles fetched from the New York Times

In [5]:
def get_articles(year):
    '''
    This function accepts a year in string format (e.g.'1980')
    and a query (e.g.'Amnesty International') and it will 
    return a list of parsed articles (in dictionaries)
    for that year.
    '''
    all_articles = []
    for i in range(0,100): #NYT limits pager to first 100 pages. But rarely will you find over 100 pages of results anyway.
        q = build_query(year,api_key,i)
        print("Page #%i, %s" % (i,q)) 
        response = requests.get(q)
        articles = response.json()
        print(response)
        articles = parse_articles(articles)
        all_articles = all_articles + articles
    return(all_articles)

Parse_articles converts each json response from the API into a dictionary with the meta information that I would like to keep from each article

In [6]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries
    
    * _id
    * author name: byline.person.firstname & byline.person.lastname
    * document_type
    * headline
    * keywords: name & value
    * lead_paragraph
    * news_desk
    * pub_date
    * section_name
    * source
    * subsection_name
    * type_of_material
    * web_url
    * word_count
    '''
    news = []
    v = 0
    for i in articles['response']['docs']:
        dic = {}
        if i['lead_paragraph'] is None or i['section'] == 'Paid Death Notices':
            continue
        dic['id'] = i['_id']
        if i['byline'] is not None and len(i['byline']) > 1 and len(i['byline']['person']) > 1 and 'firstname' and 'lastname' in i['byline']['person'][0]:
            dic['author'] = {'firstname':i['byline']['person'][0]['firstname'],'lastname':i['byline']['person'][0]['lastname']}
        dic['doc_type'] = i['document_type']
        #if i['abstract'] is not None:
            #dic['abstract'] = i['abstract'].encode("utf8")
        dic['headline'] = i['headline']['main']#.encode("utf8")
        dic['lead_paragraph'] = i['lead_paragraph']
        #if i['snippet'] is not None:
        #    dic['snippet'] = i['snippet']
        dic['summary'] = scrape_summary(i['web_url'])
        #dic['desk'] = i['news_desk']
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        dic['subsection'] = i['subsection_name']
        #if i['snippet'] is not None:
        #    dic['snippet'] = i['snippet'].encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        #locations = []
        #for x in range(0,len(i['keywords'])):
        #    if 'glocations' in i['keywords'][x]['name']:
        #        locations.append(i['keywords'][x]['value'])
        #dic['locations'] = locations
        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['keywords'] = subjects   
        news.append(dic)
        v += 1
    return(news)

Get_url_soup_content gets the BeautifulSoup content from the requested url

In [7]:
#Get article off of the New York Times
def get_url_soup_content(url):
    response = requests.get(url)
    if response.ok:
        return BeautifulSoup(response.text)

Scrape_summary goes to the url page of the New York Times gets the article content and summarizes it using Sumy. It returns that summary

In [8]:
def scrape_summary(url, language='english', sentences_cnt = 3):
    soup = get_url_soup_content(url)
    paragraphs = soup.body.find_all('p',{'itemprop':'articleBody'})
    text = ''
    for i in range(len(paragraphs)):
        text += paragraphs[i].text + ' '
    parser = PlaintextParser.from_string(text, Tokenizer(language))
    stemmer = Stemmer(language)
    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(language)
    summary_obj = summarizer(parser.document, sentences_cnt)
    summary = ''
    for sentences in summary_obj:
        summary += sentences._text + ' '
        #print(sentences)
    return summary

get_modeling_summaries_from_db: from database get necessary information to build text content that will be used to build model

In [9]:
def get_modeling_summaries_from_db(collection):
    cursor = collection.find()
    summaries = []
    database_articles = []
    for article in cursor:
        database_articles.append(article)
        lda_text = ''
        for keyword in article['keywords']:
            lda_text += keyword + ' '
        lda_text += article['headline'] + ' ' + article['section'] + ' ' + article['type'] + ' ' + article['summary']
        summaries.append(lda_text)
    return summaries, database_articles

find_article: given an article_id and a list of articles find the article with that id

In [10]:
def find_article(article_id,articles):
    for article in articles:
        if article['id'] == article_id:
            return article

find_most_similar_articles: given an article and a list of articles find similar_cnt articles that are similar to the article we want

In [11]:
def find_most_similar_articles(article,articles,similar_cnt = 5):
    cnt = 1
    suggestions = []
    sorted_similars = ((k, article['similar_docs'][k]) for k in sorted(article['similar_docs'], key = article['similar_docs'].get, reverse=True))
    for k,v in sorted_similars:
        found_article = find_article(k,articles)
        if found_article['section'] != "Paid Death Notices":
            suggestions.append(found_article)
            cnt += 1
        if cnt == 10:
            break
    return suggestions

build_lda_model: build model using text and given number of topics and return model and corpus later used to get similarities between documents

In [12]:
def build_lda_model(text, topics=11):
    count_vectorizer = CountVectorizer(analyzer='word',stop_words='english',token_pattern='\\b[a-z][a-z]+\\b')
    ng_vecs = count_vectorizer.fit_transform(text).transpose()
    #print(ng_vecs.shape)
    corpus = Sparse2Corpus(ng_vecs)
    #count_vectorizer.vocabulary_
    id2word = dict((count_vectorizer.vocabulary_[k],k) for k in count_vectorizer.vocabulary_)
    #print(id2word)
    lda = models.LdaModel(corpus, id2word=id2word, num_topics=topics)
    #lda.print_topics(num_words=10)
    return lda, corpus

get_lda_similarities: use lda_model and corpus to get similarities of doc_num with all other documents and return them

In [13]:
def get_lda_similarities(lda_model, corpus, doc_num):
    lda_corpus = lda_model[corpus]
    lda_docs = [doc for doc in lda_corpus]
    index = similarities.MatrixSimilarity(lda_docs)
    similarities = sorted(enumerate(index[lda_docs[doc_num]]), key=lambda item: -item[1])
    return similarities, lda_docs

________

###SCRAPE

Call the New York Times API to get article metadata, use the url to scrape the article content and build summary. Use all of this information to build an article dictionary and store all the articles into a list of articles.

In [58]:
result = get_articles(2016)

Page #0, http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=source:("The+New+York+Times")&begin_date=20160101&end_date=20161231&page=0&sort=oldest&api-key=888f546089cc789d146a2d70b4f2c804:9:74609839
<Response [200]>
Page #1, http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=source:("The+New+York+Times")&begin_date=20160101&end_date=20161231&page=1&sort=oldest&api-key=888f546089cc789d146a2d70b4f2c804:9:74609839
<Response [200]>
Page #2, http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=source:("The+New+York+Times")&begin_date=20160101&end_date=20161231&page=2&sort=oldest&api-key=888f546089cc789d146a2d70b4f2c804:9:74609839
<Response [200]>
Page #3, http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=source:("The+New+York+Times")&begin_date=20160101&end_date=20161231&page=3&sort=oldest&api-key=888f546089cc789d146a2d70b4f2c804:9:74609839
<Response [200]>
Page #4, http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=source:("The+New+York+Times")&begin_

In [59]:
len(result)

900

In [60]:
result[0:10]

[{'date': '2016-01-01',
  'doc_type': 'article',
  'headline': '50 Comics Walk Into a Club. No, You Haven’t Heard This One.',
  'id': '56842cab79881066fdb9523e',
  'keywords': ['Comedy and Humor', 'New Year'],
  'lead_paragraph': '“50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Brooklyn on Saturday, with 50 comedians trying for new laughs in the new year.',
  'section': 'Arts',
  'source': 'The New York Times',
  'subsection': None,
  'summary': 'A group of New York comics found that out almost by accident a decade ago, and on Saturday “50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Gowanus, Brooklyn, that will no doubt generate lots of laughs and perhaps the occasional groan. “They’re really putting it on the line and doing something that’s completely untested.” Mr. O’Donnell, one of the show’s creators and the event’s host, said the idea was born in the middle of the last decade when he and other comics and t

In [17]:
#Check topics
val = []
s = set()
for i in range(len(result)):
    val.append(result[i]['lead_paragraph'])
    s.add(result[i]['subsection'])#used to check how many topics i should define

In [20]:
s

{'Credit and Debit Cards',
 'Media',
 'Personal Tech',
 'Middle East',
 'Soccer',
 'Sunday Review',
 'Room For Debate',
 'Fashion & Beauty',
 'Music',
 'Olympics',
 'Dance',
 'College Football',
 'International Arts',
 'DealBook',
 'Asia Pacific',
 'Sunday Book Review',
 None,
 'Pro Basketball',
 'Hockey',
 'Art & Design',
 'Auto Racing',
 'Skiing',
 'Energy & Environment ',
 'Africa',
 'Economy',
 'Politics',
 'Television',
 'College Basketball',
 'Baseball',
 'New Cars',
 'International Business',
 'Campaign Stops',
 'Pro Football',
 'Tennis',
 'Weddings',
 'Europe',
 'Horse Racing',
 'Americas',
 'Men’s Style',
 'Korean',
 'América'}

In [102]:
cnt = 0
new_val = []
for i in range(len(val)):
    if val[i] is not None:
        new_val.append(val[i])
        cnt += 1
print(cnt)

900


______________

##MongoDB

In [14]:
client = MongoClient('localhost', 27017)
db = client.nytimes
collection = db.articles_info

Insert result into MongoDB database

In [40]:
result[0:10]

[{'date': '2016-01-01',
  'doc_type': 'article',
  'headline': '50 Comics Walk Into a Club. No, You Haven’t Heard This One.',
  'id': '56842cab79881066fdb9523e',
  'keywords': ['Comedy and Humor', 'New Year'],
  'lead_paragraph': '“50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Brooklyn on Saturday, with 50 comedians trying for new laughs in the new year.',
  'section': 'Arts',
  'snippet': '“50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Brooklyn on Saturday, with 50 comedians trying for new laughs in the new year.',
  'source': 'The New York Times',
  'subsection': None,
  'summary': 'A group of New York comics found that out almost by accident a decade ago, and on Saturday “50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Gowanus, Brooklyn, that will no doubt generate lots of laughs and perhaps the occasional groan. “They’re really putting it on the line and doing somethin

In [61]:
#collection.insert(result)

[ObjectId('56e0ebba9d1fa206b01dd128'),
 ObjectId('56e0ebba9d1fa206b01dd129'),
 ObjectId('56e0ebba9d1fa206b01dd12a'),
 ObjectId('56e0ebba9d1fa206b01dd12b'),
 ObjectId('56e0ebba9d1fa206b01dd12c'),
 ObjectId('56e0ebba9d1fa206b01dd12d'),
 ObjectId('56e0ebba9d1fa206b01dd12e'),
 ObjectId('56e0ebba9d1fa206b01dd12f'),
 ObjectId('56e0ebba9d1fa206b01dd130'),
 ObjectId('56e0ebba9d1fa206b01dd131'),
 ObjectId('56e0ebba9d1fa206b01dd132'),
 ObjectId('56e0ebba9d1fa206b01dd133'),
 ObjectId('56e0ebba9d1fa206b01dd134'),
 ObjectId('56e0ebba9d1fa206b01dd135'),
 ObjectId('56e0ebba9d1fa206b01dd136'),
 ObjectId('56e0ebba9d1fa206b01dd137'),
 ObjectId('56e0ebba9d1fa206b01dd138'),
 ObjectId('56e0ebba9d1fa206b01dd139'),
 ObjectId('56e0ebba9d1fa206b01dd13a'),
 ObjectId('56e0ebba9d1fa206b01dd13b'),
 ObjectId('56e0ebba9d1fa206b01dd13c'),
 ObjectId('56e0ebba9d1fa206b01dd13d'),
 ObjectId('56e0ebba9d1fa206b01dd13e'),
 ObjectId('56e0ebba9d1fa206b01dd13f'),
 ObjectId('56e0ebba9d1fa206b01dd140'),
 ObjectId('56e0ebba9d1fa2

In [56]:
summaries, database_articles = get_modeling_summaries_from_db(collection)

In [58]:
summaries[0]

'Comedy and Humor New Year 50 Comics Walk Into a Club. No, You Haven’t Heard This One. Arts News A group of New York comics found that out almost by accident a decade ago, and on Saturday “50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Gowanus, Brooklyn, that will no doubt generate lots of laughs and perhaps the occasional groan. “They’re really putting it on the line and doing something that’s completely untested.” Mr. O’Donnell, one of the show’s creators and the event’s host, said the idea was born in the middle of the last decade when he and other comics and their friends convened on the Lower East Side for some informal joke-swapping to start the year. Five participating comics agreed to answer five frivolous questions by email:  For Anthony DeVito, who is doing the show for the fifth time:   Singers, athletes, actors all have routines they go through before a concert, a game, a performance to prepare physically or mentally or both. '

###LDA

In [None]:
lda_model, corpus = build_lda_model(summaries,11)

In [None]:
similarities, lda_docs = get_lda_similarities(lda_model,corpus,0)

In [30]:
print("Original doc: ")
print(database_articles[0]['section'])
print(database_articles[0]['url'])
print("Similar documents: ")
cnt = 0
for sim in similarities[1:]:
    #if database_articles[sim[0]]['section'] != 'Paid Death Notices':
    print(database_articles[sim[0]]['section'])
    print(database_articles[sim[0]]['url'])
    print("________________________________")
    cnt += 1
    if cnt >= 10:
        break

Original doc: 
Arts
http://www.nytimes.com/2016/01/01/arts/50-comics-walk-into-a-club-no-you-havent-heard-this-one.html
Similar documents: 
Business Day
http://www.nytimes.com/2016/01/01/business/dealbook/a-roller-coaster-year-ends-with-us-markets-mostly-down.html
________________________________
Sports
http://www.nytimes.com/2016/01/01/sports/chris-mullin-sees-mixed-signs-in-st-johns-loss.html
________________________________
Food
http://www.nytimes.com/2016/01/01/dining/happy-new-year.html
________________________________
Arts
http://www.nytimes.com/2016/01/02/arts/music/natalie-cole-grammy-award-winning-singer-dies-at-65.html
________________________________
Sports
http://www.nytimes.com/2016/01/02/sports/hockey/dallas-stars-visit-new-york-and-theyre-bringing-a-defense.html
________________________________
Your Money
http://www.nytimes.com/2016/01/02/your-money/want-to-keep-new-years-resolutions-consider-the-consequences-of-failing.html
________________________________
Business Day


In [35]:
dic = {}
for i in range(0,len(doc_vecs)):
    sims = sorted(enumerate(index[doc_vecs[i]]), key=lambda item: -item[1])
    #dic[database_articles[i]['_id']] = {}
    dic2 = {}
    for sim in sims[1:]:
        dic2[database_articles[sim[0]]['id']] = str(sim[1])
    dic[database_articles[i]['id']] = dic2

Update database

In [183]:
cnt = 0
for key in dic:
    modified_doc = db.articles_info.update_one({"id": key},{"$set": {"similar_docs": dic[key]}})
    if modified_doc.matched_count == 1:
        cnt += 1
print(cnt)

889


In [174]:
dic

{ObjectId('56e0ebba9d1fa206b01dd128'): {ObjectId('56e0ebba9d1fa206b01dd129'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12a'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12b'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12c'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12d'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12e'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd12f'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd130'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd131'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd132'): '0.190837',
  ObjectId('56e0ebba9d1fa206b01dd133'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd134'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd135'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd136'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd137'): '0.748425',
  ObjectId('56e0ebba9d1fa206b01dd138'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd139'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd13a'): '0.146628',
  ObjectId('56e0ebba9d1fa206b01dd13b'): '0.0',
  ObjectId('56e0ebba9d1fa206b01dd13c'): '1.0',
  Objec

In [24]:
result_one = collection.find_one()

In [25]:
result_one['url']

'http://www.nytimes.com/2016/01/01/arts/50-comics-walk-into-a-club-no-you-havent-heard-this-one.html'

In [20]:
result_one['summary']

'A group of New York comics found that out almost by accident a decade ago, and on Saturday “50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Gowanus, Brooklyn, that will no doubt generate lots of laughs and perhaps the occasional groan. “They’re really putting it on the line and doing something that’s completely untested.” Mr. O’Donnell, one of the show’s creators and the event’s host, said the idea was born in the middle of the last decade when he and other comics and their friends convened on the Lower East Side for some informal joke-swapping to start the year. Five participating comics agreed to answer five frivolous questions by email:  For Anthony DeVito, who is doing the show for the fifth time:   Singers, athletes, actors all have routines they go through before a concert, a game, a performance to prepare physically or mentally or both. '

In [15]:
cursor = collection.find()

In [16]:
new_database_articles = []
for article in cursor:
    new_database_articles.append(article)

In [17]:
new_database_articles[0]

{'_id': ObjectId('56e0ebba9d1fa206b01dd128'),
 'date': '2016-01-01',
 'doc_type': 'article',
 'headline': '50 Comics Walk Into a Club. No, You Haven’t Heard This One.',
 'id': '56842cab79881066fdb9523e',
 'keywords': ['Comedy and Humor', 'New Year'],
 'lead_paragraph': '“50 First Jokes NYC” celebrates its 10th anniversary with a show at the Bell House in Brooklyn on Saturday, with 50 comedians trying for new laughs in the new year.',
 'section': 'Arts',
 'similar_docs': {'55f1d5b379881015b22f3201': '0.0',
  '567c27b479881009570cd242': '0.0',
  '568163cd79881067a393555b': '0.943247',
  '5681677e79881067a3935561': '0.0',
  '568180d97988100cd95e574e': '0.0',
  '5681b0627988100cd95e57a9': '0.0148684',
  '568259a97988100cd95e58ae': '0.0',
  '568275c87988100cd95e58f4': '0.0',
  '568275c97988100cd95e58f5': '0.0',
  '568275ce7988100cd95e58f6': '0.0',
  '5682928c7988100cd95e5934': '0.0',
  '568296287988100cd95e593b': '0.0',
  '568297a07988100cd95e593f': '0.0',
  '5682ac7f7988100cd95e5970': '0.0

In [18]:
similar_articles = find_most_similar_articles(new_database_articles[0],new_database_articles)

In [20]:
similar_articles[0]

{'_id': ObjectId('56e0ebba9d1fa206b01dd17f'),
 'author': {'firstname': 'Eric', 'lastname': 'SCHMITT'},
 'date': '2016-01-01',
 'doc_type': 'article',
 'headline': 'Monitoring of Terrorism Threats Has Risen, Official Says',
 'id': '5685dfcb7988101681c95ce8',
 'keywords': ['Terrorism',
  'Espionage and Intelligence Services',
  'Paris Attacks (November 2015)',
  'New Year'],
 'lead_paragraph': 'As the new year approached on Thursday, the police in Munich evacuated two train stations and warned residents to avoid large groups, citing “concrete hints” about a possible attack.',
 'section': 'World',
 'similar_docs': {'55f1d5b379881015b22f3201': '0.0',
  '567c27b479881009570cd242': '0.0',
  '568163cd79881067a393555b': '0.943247',
  '5681677e79881067a3935561': '0.0',
  '568180d97988100cd95e574e': '0.0',
  '5681b0627988100cd95e57a9': '0.0148684',
  '568259a97988100cd95e58ae': '0.0',
  '568275c87988100cd95e58f4': '0.0',
  '568275c97988100cd95e58f5': '0.0',
  '568275ce7988100cd95e58f6': '0.0',
 

In [19]:
for i in range(0,len(similar_articles)):
    print(similar_articles[i]['url'])

http://www.nytimes.com/2016/01/01/world/europe/monitoring-of-terrorism-threats-has-risen-official-says.html
http://www.nytimes.com/2016/01/03/nyregion/a-different-note-on-race-at-yale.html
http://www.nytimes.com/2016/01/03/fashion/weddings/fabio-monteiro-mitchell-travers.html
http://www.nytimes.com/2016/01/05/opinion/campaign-stops/time-to-talk-robots.html
http://www.nytimes.com/2016/01/04/t-magazine/goji-berry-ginger-tea-recipe-dimes-restaurant-ny.html
http://www.nytimes.com/2016/01/02/nyregion/number-of-traffic-deaths-in-new-york-falls-for-a-second-year-in-a-row.html
http://www.nytimes.com/2016/01/01/nyregion/a-family-in-transition-rebuilds-its-connections.html
http://www.nytimes.com/2016/01/04/nyregion/depression-leaves-a-man-adrift-but-a-stranger-helps-him-find-purpose.html
http://www.nytimes.com/2016/01/03/magazine/letter-of-recommendation-terro-liquid-ant-bait.html
