In [1]:
"""
Three findings:
1. There is no significant relationship between tweet polarity and time. For a given user, the polarity of its tweets is not 
dependent on time. 
2. Although a lot tweets have positive or negative polarity, there are still a considerable amount of tweets with polarity of 
zero.
3. Different users have very different levels of tweet polarity. The overall polarity of some users (e.g., CBS) is very positve,
while some other users (e.g. CNN) tend to tweet messages with negative polarity. 

"""

'\nThree findings:\n1. There is no significant relationship between tweet polarity and time. For a given user, the polarity of its tweets is not \ndependent on time. \n2. Although a lot tweets have positive or negative polarity, there are still a considerable amount of tweets with polarity of \nzero.\n3. Different users have very different levels of tweet polarity. The overall polarity of some users (e.g., CBS) is very positve,\nwhile some other users (e.g. CNN) tend to tweet messages with negative polarity. \n\n'

In [1]:
%matplotlib notebook

In [2]:
# Dependencies
import json
import tweepy 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [3]:
# Import Twitter API Keys
from config import consumer_key, consumer_secret, access_token, access_token_secret

In [4]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [5]:
# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [6]:
target_user = ["BBC", "CBS", "CNN", "FoxNews", "nytimes"]

In [7]:
# Create lists for tweet's text
tweet_text = []

In [8]:
# Variables for holding sentiments
sentiments = []

In [9]:
# Loop through 5 pages of tweets (total 100 tweets)
for user in target_user:
    counter = 1
    for x in range(1,6):
        public_tweets = api.user_timeline(user, page = x)
        for tweet in public_tweets:
            tweet_text.append(tweet["text"])
        # Run Vader Analysis on each tweet
            results = analyzer.polarity_scores(tweet["text"])
            compound = results["compound"]
            pos = results["pos"]
            neu = results["neu"]
            neg = results["neg"]
            tweets_ago = counter
        # Add analysis results into a list    
            sentiments.append({"Tweet_Source": tweet["user"]["name"],
                            "Tweet_text": tweet["text"], 
                            "Tweet_Date": tweet["created_at"], 
                           "Tweet_Vader_Score": compound,
                           "Tweet_Pos_Score": pos,
                           "Tweet_Neu_Score": neu,
                           "Tweet_Neg_Score": neg,
                           "Tweets Ago": counter})
            counter += 1
            

In [10]:
len(tweet_text)

500

In [11]:
# Convert sentiments to DataFrame
sentiments_df = pd.DataFrame(sentiments)
sentiments_df.head()

Unnamed: 0,Tweet_Date,Tweet_Neg_Score,Tweet_Neu_Score,Tweet_Pos_Score,Tweet_Source,Tweet_Vader_Score,Tweet_text,Tweets Ago
0,Sat Jul 14 19:03:07 +0000 2018,0.0,1.0,0.0,BBC,0.0,"Tonight, @reginalddhunter takes a 2,000-mile m...",1
1,Sat Jul 14 18:03:04 +0000 2018,0.0,0.843,0.157,BBC,0.3818,There's a clear difference in the way boys and...,2
2,Sat Jul 14 17:32:50 +0000 2018,0.0,0.628,0.372,BBC,0.8271,🎾 Angelique Kerber beat seven-time champion Se...,3
3,Sat Jul 14 16:23:10 +0000 2018,0.0,0.848,0.152,BBC,0.6688,RT @BBCSport: Angelique Kerber is the #wimbled...,4
4,Sat Jul 14 16:03:05 +0000 2018,0.111,0.527,0.362,BBC,0.6476,"""We'll definitely keep in touch!"" 👋 *Immediat...",5


In [12]:
# Reorder the columns of dataframe
sentiments_df = sentiments_df[['Tweet_Source', 'Tweet_text', 'Tweet_Date', 'Tweet_Vader_Score', 'Tweet_Pos_Score', 'Tweet_Neu_Score', 'Tweet_Neg_Score', 'Tweets Ago']]
sentiments_df.head()

Unnamed: 0,Tweet_Source,Tweet_text,Tweet_Date,Tweet_Vader_Score,Tweet_Pos_Score,Tweet_Neu_Score,Tweet_Neg_Score,Tweets Ago
0,BBC,"Tonight, @reginalddhunter takes a 2,000-mile m...",Sat Jul 14 19:03:07 +0000 2018,0.0,0.0,1.0,0.0,1
1,BBC,There's a clear difference in the way boys and...,Sat Jul 14 18:03:04 +0000 2018,0.3818,0.157,0.843,0.0,2
2,BBC,🎾 Angelique Kerber beat seven-time champion Se...,Sat Jul 14 17:32:50 +0000 2018,0.8271,0.372,0.628,0.0,3
3,BBC,RT @BBCSport: Angelique Kerber is the #wimbled...,Sat Jul 14 16:23:10 +0000 2018,0.6688,0.152,0.848,0.0,4
4,BBC,"""We'll definitely keep in touch!"" 👋 *Immediat...",Sat Jul 14 16:03:05 +0000 2018,0.6476,0.362,0.527,0.111,5


In [13]:
# Export the datafram into a csv file
sentiments_df.to_csv("sentiments.csv", encoding='utf-8')

In [15]:
# Create five sub dataframes based on different accounts
df_bbc = sentiments_df[sentiments_df["Tweet_Source"]=="BBC"]
df_cbs = sentiments_df[sentiments_df["Tweet_Source"]=="CBS"]
df_cnn = sentiments_df[sentiments_df["Tweet_Source"]=="CNN"]
df_fox = sentiments_df[sentiments_df["Tweet_Source"]=="Fox News"]
df_nytimes = sentiments_df[sentiments_df["Tweet_Source"]=="The New York Times"]

In [16]:
# scatter plots for five media sources
sns.set_style("darkgrid")
              
fig = plt.figure(figsize=(10,5))
ax = plt.subplot(111)

ax.scatter(x = df_bbc["Tweets Ago"], y=df_bbc["Tweet_Vader_Score"], c="lightskyblue", label = "BBC", edgecolor="black", alpha=0.6)
ax.scatter(x = df_cbs["Tweets Ago"], y=df_cbs["Tweet_Vader_Score"], c="green", label = "CBS", edgecolor="black", alpha=0.6)
ax.scatter(x = df_cnn["Tweets Ago"], y=df_cnn["Tweet_Vader_Score"], c="red", label = "CNN", edgecolor="black", alpha=0.6)
ax.scatter(x = df_fox["Tweets Ago"], y=df_fox["Tweet_Vader_Score"], c="blue", label = "Fox", edgecolor="black", alpha=0.6)
ax.scatter(x = df_nytimes["Tweets Ago"], y=df_nytimes["Tweet_Vader_Score"], c="yellow", label = "New York Times", edgecolor="black", alpha=0.6)

ax.set_xlabel("Tweets Ago")
ax.set_ylabel("Tweets Polarity")
ax.set_title("Sentiments Analysis of Media Tweets (07/14/2018)")
ax.grid(True)

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig("Sentiments Analysis of Media Tweets.png")
plt.show()

<IPython.core.display.Javascript object>

In [17]:
# Average the compound sentiments analyzed by VADER
avg_compound = sentiments_df.groupby("Tweet_Source", as_index = False)["Tweet_Vader_Score"].mean()
avg_compound

Unnamed: 0,Tweet_Source,Tweet_Vader_Score
0,BBC,0.16933
1,CBS,0.348781
2,CNN,-0.024081
3,Fox News,0.085452
4,The New York Times,0.011757


In [18]:
# Plot the bar chart to visualize the overall sentiments of the last 100 tweets from each organization
sns.set_style("dark")
a =avg_compound.set_index("Tweet_Source")
my_colors = ['b', 'r', 'g', 'y', 'm']
ax = a.plot.bar(width=1, color=my_colors, edgecolor ="gray", linewidth =1)
for p in ax.patches:
    val = '{num:0.2f}'.format(num = p.get_height())
    ax.annotate(str(val), (p.get_x()+0.35, p.get_height()))
ax.legend_.remove()
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Tweet Polarity")
plt.title("Overall Media Sentiment based on Twitter (07/14/18)")

plt.savefig("Overall Media Sentiment based on Twitter.png")
plt.show()

<IPython.core.display.Javascript object>