In [45]:
from bs4 import BeautifulSoup
import requests
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
from textblob import classifiers
import pprint
from numpy import mean

We're going to conduct our analysis on a range of news sources, across mediums and political positions. Roughly, we've organized them into three classes:  Newspapers (e.g. USA Today), Networks (e.g. CNN), and primarily online sources (e.g. Reuters).

In [23]:
newspapers = ['http://www.usatoday.com','http://www.wsj.com','http://www.nytimes.com',\
              'http://www.washingtonpost.com', 'http://www.latimes.com', 'http://www.nypost.com',\
             'http://www.newsday.com', 'http://www.chicagotribune.com', 'http://www.nydailynews.com',\
              'http://www.denverpost.com', 'http://www.chron.com', 'http://www.dallasnews.com',\
              'http://www.bostonglobe.com', 'http://www.seattletimes.com', 'http://www.tampabay.com']

networks = ['http://www.cnn.com', 'http://www.msnbc.com', 'http://www.foxnews.com', 'http://abcnews.go.com',\
           'http://www.cbsnews.com']

online_primary = ['http://www.reuters.com','http://news.yahoo.com', 'http://www.news.aol.com','http://www.huffingtonpost.com',\
              'http://www.theatlantic.com']

In order to search for immigration stories, we're going to use a list of immigration-related root words

In [38]:
immigration_roots = ['immig', 'sessions', 'border', 'migran', 'nielsen']

First, let's analyze the newspapers

In [36]:
newspaper_summary = {}
for paper in newspapers:
    summary ={}
    print "Summary for %s" %paper
    r = requests.get(paper)
    soup = BeautifulSoup(r.text, "html.parser")
    paragraphs = soup.find_all("p")
    stories_to_read = []
    for item in paragraphs:
        stories_to_read.append(item.get_text())
    for story in stories_to_read:
        story_summary = {}
        text_story = TextBlob(story)
        for phrase in immig_roots:
            if str(text_story.noun_phrases).find(phrase) > 0:
                story_summary['nouns'] = text_story.noun_phrases
                story_summary['polarity'] = text_story.sentiment.polarity
                story_summary['subjectivity'] = text_story.sentiment.subjectivity
        if len(story_summary)>0:
            pprint.pprint(story_summary)
    pprint.pprint(summary)
    newspaper_summary[paper] = summary
print "Overall Summary"
pprint.pprint(newspaper_summary)

Summary for http://www.usatoday.com
{'nouns': WordList([u'jeff sessions', u'don \u2019 t', u'separate parents']),
 'polarity': 0.0,
 'subjectivity': 0.0}
{'nouns': WordList([u'facebook', u'campaign raises $ 5m', u'reunite immigrant families']),
 'polarity': 0.0,
 'subjectivity': 0.0}
{}
Summary for http://www.wsj.com
{'nouns': WordList([u'trump', u'broad bill legislation', u'migrant children']),
 'polarity': -0.012500000000000002,
 'subjectivity': 0.3375000000000001}
{}
Summary for http://www.nytimes.com
{'nouns': WordList([u'trump', u'\u2019 s policy', u'immigrant children', u'mr. trump']),
 'polarity': 0.033333333333333326,
 'subjectivity': 0.5}
{'nouns': WordList([u'trump', u'border policy']),
 'polarity': 0.5,
 'subjectivity': 0.5}
{}
Summary for http://www.washingtonpost.com
{}
Summary for http://www.latimes.com
{}
Summary for http://www.nypost.com
{}
Summary for http://www.newsday.com
{'nouns': WordList([u'spin cycle cuomo', u'ny', u'immigrant policy']),
 'polarity': 0.0,
 'subje

In [42]:
newspaper_summary = {}
overall_polarity = []
for paper in newspapers:
    subjectivity = 0
    polarity = 0
    num_stories = 0
    print "Working on %s" %paper
    r = requests.get(paper)
    soup = BeautifulSoup(r.text, "html.parser")
    paragraphs = soup.find_all("p")
    stories_to_read = []
    for item in paragraphs:
        stories_to_read.append(item.get_text())
    for story in stories_to_read:
        text_story = TextBlob(story)
        words_found = 0
        words_checked = 0
        while words_found < 1 and words_checked < len(immigration_roots):
            for phrase in immig_roots:
                if str(text_story.noun_phrases).find(phrase) > 0:
                    polarity += text_story.sentiment.polarity
                    subjectivity += text_story.sentiment.subjectivity
                    num_stories +=1
                    words_found +=1
                words_checked +=1
    if num_stories > 0:
        paper_subjectivity = subjectivity/num_stories
        paper_polarity = polarity/num_stories
        overall_polarity.append(paper_polarity)
    else:
        paper_subjectivity = "Not enough data"
        paper_polarity = "Not enough data"
    print "Subjectivity Score for %s is %s" %(paper, paper_subjectivity)
    print "Polarity Score for %s is %s" %(paper, paper_polarity)

Working on http://www.usatoday.com
Subjectivity Score for http://www.usatoday.com is 0.0
Polarity Score for http://www.usatoday.com is 0.0
Working on http://www.wsj.com
Subjectivity Score for http://www.wsj.com is 0.1125
Polarity Score for http://www.wsj.com is -0.00416666666667
Working on http://www.nytimes.com
Subjectivity Score for http://www.nytimes.com is 0.207792207792
Polarity Score for http://www.nytimes.com is 0.0194805194805
Working on http://www.washingtonpost.com
Subjectivity Score for http://www.washingtonpost.com is Not enough data
Polarity Score for http://www.washingtonpost.com is Not enough data
Working on http://www.latimes.com
Subjectivity Score for http://www.latimes.com is Not enough data
Polarity Score for http://www.latimes.com is Not enough data
Working on http://www.nypost.com
Subjectivity Score for http://www.nypost.com is Not enough data
Polarity Score for http://www.nypost.com is Not enough data
Working on http://www.newsday.com
Subjectivity Score for http:/

In [49]:
grand_polarity = mean(overall_polarity)
print "Overall polarity score for this topic is %s" %grand_polarity

Overall polarity score for this topic is 0.002870690817119389
