In [14]:
# Dependencies
import tweepy
import json
import numpy as np
import pandas as pd
import seaborn as sns
from config import consumer_key, consumer_secret, access_token, access_token_secret
import matplotlib.pyplot as plt


# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [15]:
# Target User
target_users = ("APTA_Transit", "BART", "MTA", "CTA", "MARTA", "RTCSNV")

# Tweet Texts
tweet_texts = []
tweet_date = []

#variables for holding sentiments
compound_list = []
positive_list = []
negative_list = []
neutral_list = []
counter_list = []
target_list = []


for target in target_users:
    #Counter
    counter = 1
    # Create a loop to iteratively run API requests/ 20 tweets per page/ 1000 tweets equals 51 pages
    for x in range(1, 51):

        # Get all tweets from home feed (for each page specified)
        public_tweets = api.user_timeline(target, page=x)

        # Loop through all tweets
        for tweet in public_tweets:

            # Print Tweet
            #print(tweet["text"])

            # Store Tweet in Array
            tweet_texts.append(tweet["text"])

            # Store Tweet Date in Array
            tweet_date.append(tweet["created_at"])

            # Run Vader Analysis on each tweet
            results = analyzer.polarity_scores(tweet["text"])
            compound = results["compound"]
            pos = results["pos"]
            neu = results["neu"]
            neg = results["neg"]

            # Add each value to the appropriate list
            compound_list.append(compound)
            positive_list.append(pos)
            negative_list.append(neg)
            neutral_list.append(neu)
            counter_list.append(counter)
            target_list.append(target)
            # Store the data in dictionary
            sentiment = {
                "User": target_list,
                "Tweet": tweet_texts,
                "Date": tweet_date,
                "Compound": compound_list,
                "Positive": positive_list,
                "Neutral": negative_list,
                "Negative": neutral_list,
                "Tweets Ago": counter_list
            }
            #add to counter
            #print(target)
            counter += 1

#df = pd.DataFrame(sentiment)
sentiment_df = pd.DataFrame(sentiment)
        


In [16]:
sentiment_df.head(1000)

Unnamed: 0,Compound,Date,Negative,Neutral,Positive,Tweet,Tweets Ago,User
0,0.8553,Wed Apr 11 21:22:08 +0000 2018,0.516,0.000,0.484,Congratulations to @RideKCTransit on their new...,1,APTA_Transit
1,0.0000,Tue Apr 10 21:36:02 +0000 2018,1.000,0.000,0.000,"RT @APTA_info: “In late March, Congress approp...",2,APTA_Transit
2,0.7717,Tue Apr 10 13:23:30 +0000 2018,0.675,0.000,0.325,Public transportation supports jobs in more wa...,3,APTA_Transit
3,0.0000,Mon Apr 09 21:03:27 +0000 2018,1.000,0.000,0.000,https://t.co/iNuedb2jYw,4,APTA_Transit
4,0.0000,Fri Apr 06 21:21:35 +0000 2018,1.000,0.000,0.000,https://t.co/zxMKT7Ulqs,5,APTA_Transit
5,0.4588,Thu Apr 05 18:59:20 +0000 2018,0.870,0.000,0.130,We applaud Congress for standing up for public...,6,APTA_Transit
6,0.7003,Wed Apr 04 20:59:16 +0000 2018,0.734,0.000,0.266,#Publictransportation has long operated in com...,7,APTA_Transit
7,0.4199,Tue Apr 03 16:30:17 +0000 2018,0.823,0.000,0.177,#PublicTransit keeps America moving! Our publi...,8,APTA_Transit
8,0.0000,Mon Apr 02 21:26:03 +0000 2018,1.000,0.000,0.000,https://t.co/1OBAhsKXmk,9,APTA_Transit
9,0.2732,Fri Mar 30 19:15:13 +0000 2018,0.884,0.000,0.116,DYK: Every $10 million in capital investment i...,10,APTA_Transit


In [19]:
max_tweets = sentiment_df.loc[sentiment_df['Tweets Ago'] == 1000]
max_tweets

Unnamed: 0,Compound,Date,Negative,Neutral,Positive,Tweet,Tweets Ago,User
999,0.296,Tue May 19 22:03:56 +0000 2015,0.784,0.0,0.216,Federal funding is vital to our national infra...,1000,APTA_Transit
1999,0.0,Tue Jan 09 10:25:23 +0000 2018,1.0,0.0,0.0,Gast-Huis of Zieken-Huis? @MarcNoppen https:/...,1000,BART
2999,-0.7003,Sat Jul 22 01:31:49 +0000 2017,0.766,0.234,0.0,"@Mad_Reggie Regrets, there was an earlier sick...",1000,MTA
3999,0.296,Thu Mar 08 18:24:48 +0000 2018,0.879,0.0,0.121,95th-bound Red Line trains are standing at 79t...,1000,CTA
5985,-0.5859,Sat Mar 03 01:39:20 +0000 2018,0.758,0.242,0.0,"#FASTALERT 02-Mar-18 5:37 pm,\nCrash on Windmi...",1000,RTCSNV


In [20]:
#Using Pandas create dataframes for each media company
apta_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'APTA_Transit']
bart_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'BART']
mta_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'MTA']
cta_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'CTA']
marta_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'MARTA']
rtcsnv_sentiment_df = sentiment_df.loc[sentiment_df['User'] == 'RTCSNV']

#Average the compound vader analysis for each broadcasting company
apta_compound_avg = (f"{np.mean(apta_sentiment_df['Compound']):.3f}")
bart_compound_avg = (f"{np.mean(bart_sentiment_df['Compound']):.3f}")
mta_compound_avg = (f"{np.mean(mta_sentiment_df['Compound']):.3f}")
cta_compound_avg = (f"{np.mean(cta_sentiment_df['Compound']):.3f}")
marta_compound_avg = (f"{np.mean(marta_sentiment_df['Compound']):.3f}")
rtcsnv_compound_avg = (f"{np.mean(rtcsnv_sentiment_df['Compound']):.3f}")

#create DF with all compounded averages
compound_df = [apta_compound_avg, bart_compound_avg, mta_compound_avg, cta_compound_avg, marta_compound_avg, rtcsnv_compound_avg ]
compound_df

['0.269', '0.124', '0.038', '-0.068', '0.099', '-0.295']

In [21]:
#--------SAVE TO .CSV----------#
sentiment_df.to_csv('sentiment_df.csv')

In [None]:
# Create a plot  
x_axis = np.arange(len(compund_df))

plt.bar(x_axis, compound_df, color=('red', 'cyan', 'blue', 'yellow', 'green'), alpha=0.5, align="edge")

# Tell matplotlib where we would like to place each of our x axis headers
tick_locations = [value+0.4 for value in x_axis]
plt.xticks(tick_locations, target_users)

# Give our chart some labels and a tile
plt.title("Overall Media Sentiment Based on Twitter (4/10/17)")
plt.xlabel("Media Company")
plt.ylabel("Tweet Populalrity")

# Sets the y limits of the current chart
#plt.ylim([-0.20,1])
# Print our chart to the screen
plt.show()
#plt.savefig("Avg_Media Sentiment_Bar_Plot_4-10-18.png")
#Sentiment values left to right
#['0.118', '0.367', '0.002', '0.189', '0.015']