Week 7 - Distinguishing Sentiments

Pull last 100 tweets from BBC, CBS, CNN, Fox, and NY Times
Run Vader analysis to determine positive, negative, or neutral sentiment
Pull into DataFrame the tweet's source account, tex, date, and Vader sentiment scores
Export Data from DataFrame into CSV File
Create Scatter Plot and Bar Plot of sentiment data
Save PNG images from each plot
Write written description of three observable trends

Citation: Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.

In [1]:
# Dependencies

import json
import time
import tweepy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style

from config import (consumer_key, consumer_secret, access_token, access_token_secret)

In [2]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [3]:
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [4]:
# Tweet Texts & Timestamps
target_terms = ("BBC", "CBS", "CNN", "FoxNews", "nytimes")

In [5]:
# List to hold results
results_list = []    
    
# Counter to keep track of the number of tweets retrieved
counter = 0

In [6]:
# Loop through all target users
for target in target_terms:

    # Variable list for holding Vader Sentiment Results    
    compound_list = []
    positive_list = []
    negative_list = []
    neutral_list = []
    for x in range (5):
    
        # Get all tweets from home feed
        public_tweets = api.user_timeline(target, recent=True, page=x)
        
        # Loop through all tweets
        for tweet in public_tweets:
        
            # Run Vader Analysis on each tweet
            compound = analyzer.polarity_scores(tweet["text"])["compound"]
            pos = analyzer.polarity_scores(tweet["text"])["pos"]
            neu = analyzer.polarity_scores(tweet["text"])["neu"]
            neg = analyzer.polarity_scores(tweet["text"])["neg"]
        
            compound_list.append(compound)
            positive_list.append(pos)
            negative_list.append(neg)
            neutral_list.append(neu)
        
            # Add sentiments for each tweet into an array
            results_list.append({"User": target, 
                           "Source account": tweet["source"],
                           "Date": tweet["created_at"],        
                           "Text": tweet["text"],
                           "Compound": compound,
                           "Positive": pos,
                           "Negative": neu,
                           "Neutral": neg,
                           "Tweets Ago": counter})
     
            # Add to counter 
            counter = counter + 1
    
print(results_list)
print("")

[{'User': 'BBC', 'Source account': '<a href="http://www.socialflow.com" rel="nofollow">SocialFlow</a>', 'Date': 'Mon Sep 17 18:04:04 +0000 2018', 'Text': 'University Challenge icons @e_monkman and @Bobby_Seagull road trip around Britain in search of hidden gems of Briti… https://t.co/ehTKqStAoJ', 'Compound': 0.0772, 'Positive': 0.071, 'Negative': 0.929, 'Neutral': 0.0, 'Tweets Ago': 0}, {'User': 'BBC', 'Source account': '<a href="http://www.socialflow.com" rel="nofollow">SocialFlow</a>', 'Date': 'Mon Sep 17 17:04:05 +0000 2018', 'Text': 'Australia has ordered an investigation into the discovery of sewing needles hidden in strawberries.… https://t.co/4VUFTkoOKt', 'Compound': 0.0, 'Positive': 0.0, 'Negative': 1.0, 'Neutral': 0.0, 'Tweets Ago': 1}, {'User': 'BBC', 'Source account': '<a href="http://www.socialflow.com" rel="nofollow">SocialFlow</a>', 'Date': 'Mon Sep 17 16:04:02 +0000 2018', 'Text': 'The finalists of the Comedy Wildlife Photography Awards have been announced! 🐿😂\n https://

In [7]:
#Convert to DataFrame

results_list = pd.DataFrame.from_dict(results_list)
results_list


Unnamed: 0,Compound,Date,Negative,Neutral,Positive,Source account,Text,Tweets Ago,User
0,0.0772,Mon Sep 17 18:04:04 +0000 2018,0.929,0.000,0.071,"<a href=""http://www.socialflow.com"" rel=""nofol...",University Challenge icons @e_monkman and @Bob...,0,BBC
1,0.0000,Mon Sep 17 17:04:05 +0000 2018,1.000,0.000,0.000,"<a href=""http://www.socialflow.com"" rel=""nofol...",Australia has ordered an investigation into th...,1,BBC
2,0.6996,Mon Sep 17 16:04:02 +0000 2018,0.674,0.000,0.326,"<a href=""http://www.socialflow.com"" rel=""nofol...",The finalists of the Comedy Wildlife Photograp...,2,BBC
3,0.0000,Mon Sep 17 15:03:04 +0000 2018,1.000,0.000,0.000,"<a href=""http://www.socialflow.com"" rel=""nofol...",Who knew Paddington had such a foul mouth?! 😱🐻...,3,BBC
4,-0.5574,Mon Sep 17 14:03:05 +0000 2018,0.854,0.146,0.000,"<a href=""http://www.socialflow.com"" rel=""nofol...",The UK's oldest person has seen the outbreak a...,4,BBC
5,0.4404,Mon Sep 17 13:04:04 +0000 2018,0.791,0.000,0.209,"<a href=""http://www.socialflow.com"" rel=""nofol...",This is truly magical: Sue and her underwater ...,5,BBC
6,0.7184,Mon Sep 17 12:02:06 +0000 2018,0.708,0.000,0.292,"<a href=""http://www.socialflow.com"" rel=""nofol...",The Duchess of Sussex has launched her first s...,6,BBC
7,0.0000,Mon Sep 17 11:27:23 +0000 2018,1.000,0.000,0.000,"<a href=""http://twitter.com"" rel=""nofollow"">Tw...",RT @bbcpress: 🏴󠁧󠁢󠁷󠁬󠁳󠁿 @larrylamb47 leads cast ...,7,BBC
8,0.0000,Mon Sep 17 11:03:04 +0000 2018,1.000,0.000,0.000,"<a href=""http://www.socialflow.com"" rel=""nofol...",🌟🌠✨ The International Dark Sky Association has...,8,BBC
9,0.0000,Mon Sep 17 10:03:05 +0000 2018,1.000,0.000,0.000,"<a href=""http://www.socialflow.com"" rel=""nofol...","Meet Vera, a 102-year-old Manchester City masc...",9,BBC


In [8]:
#Set News source as User Index

results_list = results_list.set_index("User")
results_list.head()

Unnamed: 0_level_0,Compound,Date,Negative,Neutral,Positive,Source account,Text,Tweets Ago
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BBC,0.0772,Mon Sep 17 18:04:04 +0000 2018,0.929,0.0,0.071,"<a href=""http://www.socialflow.com"" rel=""nofol...",University Challenge icons @e_monkman and @Bob...,0
BBC,0.0,Mon Sep 17 17:04:05 +0000 2018,1.0,0.0,0.0,"<a href=""http://www.socialflow.com"" rel=""nofol...",Australia has ordered an investigation into th...,1
BBC,0.6996,Mon Sep 17 16:04:02 +0000 2018,0.674,0.0,0.326,"<a href=""http://www.socialflow.com"" rel=""nofol...",The finalists of the Comedy Wildlife Photograp...,2
BBC,0.0,Mon Sep 17 15:03:04 +0000 2018,1.0,0.0,0.0,"<a href=""http://www.socialflow.com"" rel=""nofol...",Who knew Paddington had such a foul mouth?! 😱🐻...,3
BBC,-0.5574,Mon Sep 17 14:03:05 +0000 2018,0.854,0.146,0.0,"<a href=""http://www.socialflow.com"" rel=""nofol...",The UK's oldest person has seen the outbreak a...,4


In [9]:
# Save to DataFrame
results_list.to_csv("../twitter_sentiment_recent.csv", index=False)

In [16]:
# Target News Twitter Accounts
target = ("BBC", "CBS", "CNN", "FoxNews", "nytimes")

# List to hold results
results_list = []

# Loop through all target users
for target in target:

    # Variable for holding the oldest tweet
    oldest_tweet = None

    # Variables for holding sentiments
    compound_list = []
    positive_list = []
    negative_list = []
    neutral_list = []

    # Loop through 10 times
    for x in range(1):

        # Run search around each tweet
        public_tweets = api.search(
            target, count=100, result_type="recent")

        # Loop through all tweets
        for tweet in public_tweets["statuses"]:

                # Run Vader Analysis on each tweet
                results = analyzer.polarity_scores(tweet["text"])
                compound = results["compound"]
                pos = results["pos"]
                neu = results["neu"]
                neg = results["neg"]

                # Add each value to the appropriate list
                compound_list.append(compound)
                positive_list.append(pos)
                negative_list.append(neg)
                neutral_list.append(neu)
                
        # Set the new oldest_tweet value
        oldest_tweet = tweet["id"] - 1

    # Store the Average Sentiments
    sentiment_overall = {
        "News": target,
        "Compound": np.mean(compound_list),
        "Positive": np.mean(positive_list),
        "Neutral": np.mean(negative_list),
        "Negative": np.mean(neutral_list),
    }

    # Print the Sentiments
    print(sentiment_overall)
    print()
    
    # Append twitter to 'results_list'
    results_list.append(sentiment_overall)

{'News': 'BBC', 'Compound': 0.021844999999999993, 'Positive': 0.07412999999999999, 'Neutral': 0.062349999999999996, 'Negative': 0.86351}

{'News': 'CBS', 'Compound': 0.172075, 'Positive': 0.12394999999999999, 'Neutral': 0.04938, 'Negative': 0.8266899999999999}

{'News': 'CNN', 'Compound': 0.07276400000000001, 'Positive': 0.07793, 'Neutral': 0.048440000000000004, 'Negative': 0.8736199999999998}

{'News': 'FoxNews', 'Compound': -0.16028900000000001, 'Positive': 0.0516, 'Neutral': 0.11134000000000001, 'Negative': 0.83702}

{'News': 'nytimes', 'Compound': 0.007576, 'Positive': 0.06727999999999999, 'Neutral': 0.060090000000000005, 'Negative': 0.8726200000000002}



In [20]:
#Convert to DataFrame

sentiment_overall = {"News": target,
            "Compound": compound_list,
            "Positive": positive_list,
            "Neutral": neutral_list,
            "Negative": negative_list
          }
pd.DataFrame(results_list)

Unnamed: 0,Compound,Negative,Neutral,News,Positive
0,0.021845,0.86351,0.06235,BBC,0.07413
1,0.172075,0.82669,0.04938,CBS,0.12395
2,0.072764,0.87362,0.04844,CNN,0.07793
3,-0.160289,0.83702,0.11134,FoxNews,0.0516
4,0.007576,0.87262,0.06009,nytimes,0.06728
