In [18]:
from bs4 import BeautifulSoup
import requests
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
from textblob import classifiers
import pprint

We're going to conduct our analysis on a range of news sources, across mediums and political positions. Roughly, we've organized them into three classes:  Newspapers (e.g. USA Today), Networks (e.g. CNN), and primarily online sources (e.g. Reuters).

In [23]:
newspapers = ['http://www.usatoday.com','http://www.wsj.com','http://www.nytimes.com',\
              'http://www.washingtonpost.com', 'http://www.latimes.com', 'http://www.nypost.com',\
             'http://www.newsday.com', 'http://www.chicagotribune.com', 'http://www.nydailynews.com',\
              'http://www.denverpost.com', 'http://www.chron.com', 'http://www.dallasnews.com',\
              'http://www.bostonglobe.com', 'http://www.seattletimes.com', 'http://www.tampabay.com']

networks = ['http://www.cnn.com', 'http://www.msnbc.com', 'http://www.foxnews.com', 'http://abcnews.go.com',\
           'http://www.cbsnews.com']

online_primary = ['http://www.reuters.com','http://news.yahoo.com', 'http://www.news.aol.com','http://www.huffingtonpost.com',\
              'http://www.theatlantic.com']

In order to search for immigration stories, we're going to use a list of immigration-related root words

In [27]:
immig_roots = ['immig', 'sessions', 'border', 'migran']

First, let's analyze the newspapers

In [36]:
newspaper_summary = {}
for paper in newspapers:
    summary ={}
    print "Summary for %s" %paper
    r = requests.get(paper)
    soup = BeautifulSoup(r.text, "html.parser")
    paragraphs = soup.find_all("p")
    stories_to_read = []
    for item in paragraphs:
        stories_to_read.append(item.get_text())
    for story in stories_to_read:
        story_summary = {}
        text_story = TextBlob(story)
        for phrase in immig_roots:
            if str(text_story.noun_phrases).find(phrase) > 0:
                story_summary['nouns'] = text_story.noun_phrases
                story_summary['polarity'] = text_story.sentiment.polarity
                story_summary['subjectivity'] = text_story.sentiment.subjectivity
        if len(story_summary)>0:
            pprint.pprint(story_summary)
    pprint.pprint(summary)
    newspaper_summary[paper] = summary
print "Overall Summary"
pprint.pprint(newspaper_summary)

Summary for http://www.usatoday.com
{'nouns': WordList([u'jeff sessions', u'don \u2019 t', u'separate parents']),
 'polarity': 0.0,
 'subjectivity': 0.0}
{'nouns': WordList([u'facebook', u'campaign raises $ 5m', u'reunite immigrant families']),
 'polarity': 0.0,
 'subjectivity': 0.0}
{}
Summary for http://www.wsj.com
{'nouns': WordList([u'trump', u'broad bill legislation', u'migrant children']),
 'polarity': -0.012500000000000002,
 'subjectivity': 0.3375000000000001}
{}
Summary for http://www.nytimes.com
{'nouns': WordList([u'trump', u'\u2019 s policy', u'immigrant children', u'mr. trump']),
 'polarity': 0.033333333333333326,
 'subjectivity': 0.5}
{'nouns': WordList([u'trump', u'border policy']),
 'polarity': 0.5,
 'subjectivity': 0.5}
{}
Summary for http://www.washingtonpost.com
{}
Summary for http://www.latimes.com
{}
Summary for http://www.nypost.com
{}
Summary for http://www.newsday.com
{'nouns': WordList([u'spin cycle cuomo', u'ny', u'immigrant policy']),
 'polarity': 0.0,
 'subje

In [8]:
#Filter to find stories on immigration

overall_summary = {}
for story in stories_to_read:
    text_story = TextBlob(story)
    summary = {}
    for phrase in immig_roots:
        if str(text_story.noun_phrases).find(phrase) >0:
            summary['nouns'] = text_story.noun_phrases
            summary['polarity'] = text_story.sentiment.polarity
            summary['subjectivity'] = text_story.sentiment.subjectivity
            overall_summary[story] = summary
pprint.pprint(overall_summary)

In [11]:
sentiments_list = []
for story in stories_to_read:
    analysis_subject = TextBlob(story, analyzer = NaiveBayesAnalyzer())
    sentiments_list.append(analysis_subject.sentiment)

In [12]:
print sentiments_list

[Sentiment(classification='pos', p_pos=0.9999461870755826, p_neg=5.3812924417559616e-05), Sentiment(classification='pos', p_pos=0.9734438181980501, p_neg=0.02655618180194581), Sentiment(classification='pos', p_pos=0.9034787497095463, p_neg=0.09652125029045362), Sentiment(classification='pos', p_pos=0.92256505125051, p_neg=0.07743494874948785), Sentiment(classification='pos', p_pos=0.9852377166378377, p_neg=0.014762283362160752), Sentiment(classification='pos', p_pos=0.9754756368027491, p_neg=0.02452436319724911), Sentiment(classification='pos', p_pos=0.5903471817508893, p_neg=0.4096528182491081), Sentiment(classification='pos', p_pos=0.6258378387367102, p_neg=0.3741621612632887), Sentiment(classification='pos', p_pos=0.9897096742168187, p_neg=0.010290325783181883), Sentiment(classification='pos', p_pos=0.9976993795974743, p_neg=0.002300620402526608), Sentiment(classification='pos', p_pos=0.8846630318682114, p_neg=0.11533696813179116), Sentiment(classification='pos', p_pos=0.99882778211

In [14]:
for story in stories_to_read:
    analysis_subject = TextBlob(story, analyzer = NaiveBayesAnalyzer())
    for item in analysis_subject.noun_phrases:
        print item

trump
’ s policy
immigrant children
mr. trump
requiring
federal court system
new category
kirstjen nielsen
homeland security
’ s
ms. nielsen
’ s
global criticism
prominent creators
tv
modern family ”
“ family
guy
recent days
news channel
trump
’ s threat
impose tariffs
chinese product
china
u.s.
’ s
american companies
commerce secretary
wilbur l. ross jr.
kremlin-linked
forthcoming news articles
job market
billionaire conservatives
sophisticated fight
new public rail projects
bus routes
ms. nixon
harsher scrutiny
’ s
iran
korea
eritrea
world ’ s
important human rights body
european officials
greece
comeback story
italy
euro ’ s issues
nepal
’ s
trump
border policy
trump
administration ’ s practice
top aides
stephen miller
federal program
consumer advocates
great gift ideas
wirecutter
dismissive. ”
warwick
n.y.
world cup fans
winter olympics
joel mesler
successful gallery
lower
side
new chapter
buried
n.i.h
disturbing examples
alcohol industry
america
’ s
bill
blasio
certain kinds
crimi

In [15]:
for story in stories_to_read:
    text_story = TextBlob(story)
    print text_story.noun_phrases

[u'trump', u'\u2019 s policy', u'immigrant children', u'mr. trump']
[u'requiring', u'federal court system', u'new category']
[u'kirstjen nielsen', u'homeland security', u'\u2019 s']
[u'ms. nielsen', u'\u2019 s', u'global criticism']
[u'prominent creators', u'tv', u'modern family \u201d', u'\u201c family', u'guy', u'recent days', u'news channel']
[u'trump', u'\u2019 s threat', u'impose tariffs', u'chinese product', u'china', u'u.s.', u'\u2019 s', u'american companies']
[u'commerce secretary', u'wilbur l. ross jr.', u'kremlin-linked', u'forthcoming news articles']
[u'job market']
[u'billionaire conservatives', u'sophisticated fight', u'new public rail projects', u'bus routes']
[u'ms. nixon', u'harsher scrutiny', u'\u2019 s']
[u'iran', u'korea', u'eritrea', u'world \u2019 s', u'important human rights body']
[u'european officials', u'greece', u'comeback story', u'italy', u'euro \u2019 s issues']
[u'nepal']
[u'\u2019 s']
[u'trump', u'border policy']
[u'trump', u'administration \u2019 s prac

In [16]:
#Filter to find stories on immigration
immig_roots = ['immig', 'sessions', 'border', 'migran']
overall_summary = {}
for story in stories_to_read:
    text_story = TextBlob(story)
    summary = {}
    for phrase in immig_roots:
        if str(text_story.noun_phrases).find(phrase) >0:
            summary['nouns'] = text_story.noun_phrases
            summary['polarity'] = text_story.sentiment.polarity
            summary['subjectivity'] = text_story.sentiment.subjectivity
            overall_summary[story] = summary
pprint.pprint(overall_summary)

{u'\n            Plus: The Trump administration defends its border policy, and more.        ': {'nouns': WordList([u'trump', u'border policy']),
                                                                                                'polarity': 0.5,
                                                                                                'subjectivity': 0.5},
 u'Congressional Republicans tried to defuse an escalating crisis over President Trump\u2019s policy of separating immigrant children from parents who cross illegally into the United States. \n\nBut the Senate had one plan, the House another, and Mr. Trump remained defiant, refusing to act on his own.': {'nouns': WordList([u'trump', u'\u2019 s policy', u'immigrant children', u'mr. trump']),
                                                                                                                                                                                                                                      

In [17]:
#Want to find the links to the stories above
search_phrases = []
for key in overall_summary:
    search_phrases.append(key)
print search_phrases

[u'\n            Plus: The Trump administration defends its border policy, and more.        ', u'Congressional Republicans tried to defuse an escalating crisis over President Trump\u2019s policy of separating immigrant children from parents who cross illegally into the United States. \n\nBut the Senate had one plan, the House another, and Mr. Trump remained defiant, refusing to act on his own.']
