In [4]:
# Dependencies
import tweepy
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from matplotlib import style

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Twitter API Keys
from config import (consumer_key, 
                    consumer_secret, 
                    access_token, 
                    access_token_secret)

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [8]:
# Search for People Tweeting about Mark Hamill
search_terms = ("@SFBART","@WMATA","@nyctsubway","@CTA","@MARTASERVICE","@RTCSNV","@UBER","@LYFT")
# Create variable for holding the oldest tweet
oldest_tweet = None

# List to hold unique IDs
sentiments = []

# Counter to keep track of the number of tweets retrieved
counter = 0

for search_term in search_terms:

    counter = 0
    oldest_tweet = None 
# Loop through 5 times (total of 500 tweets)
    for x in range(10):

        # Retrieve 100 most recent tweets -- specifying a max_id
        public_tweets = api.search(search_term, 
                                   count=100, 
                                   result_type="recent", 
                                   max_id=oldest_tweet)
        # Print Tweets
        for tweet in public_tweets["statuses"]:

            # Print the username
            results = analyzer.polarity_scores(tweet["text"])
            compound = results["compound"]
            pos = results["pos"]
            neu = results["neu"]
            neg = results["neg"]
            tweets_ago = counter

            # Get Tweet ID, subtract 1, and assign to oldest_tweet
            oldest_tweet = tweet['id'] - 1

            # Add sentiments for each tweet into a list
            sentiments.append({"Date": tweet["created_at"],
                               "Subject": search_term,
                               "Compound": compound,
                               "Positive": pos,
                               "Negative": neu,
                               "Neutral": neg,
                               "Tweets Ago": counter})

            # Add to counter 
            counter += 1

In [9]:
# Convert sentiments to DataFrame
sentiments_pd = pd.DataFrame.from_dict(sentiments)
sentiments_pd.to_csv('twitter.csv', encoding='utf-8')
sentiments_pd.head(50)
sentiments_pd.count()

Compound      6816
Date          6816
Negative      6816
Neutral       6816
Positive      6816
Subject       6816
Tweets Ago    6816
dtype: int64

In [None]:
# Dependencies
import tweepy
import json
import numpy as np
import pandas as pd
import seaborn as sns
from config import consumer_key, consumer_secret, access_token, access_token_secret
import matplotlib.pyplot as plt


# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [3]:
# Target User
target_users = ("APTA_Transit", "BART", "MTA", "CTA", "MARTA", "RTCSNV")

# Tweet Texts
tweet_texts = []
tweet_date = []

#variables for holding sentiments
compound_list = []
positive_list = []
negative_list = []
neutral_list = []
counter_list = []
target_list = []


for target in target_users:
    #Counter
    counter = 1
    # Create a loop to iteratively run API requests/ 20 tweets per page/ 1000 tweets equals 51 pages
    for x in range(1, 51):

        # Get all tweets from home feed (for each page specified)
        public_tweets = api.user_timeline(target, page=x)

        # Loop through all tweets
        for tweet in public_tweets:

            # Print Tweet
            #print(tweet["text"])

            # Store Tweet in Array
            tweet_texts.append(tweet["text"])

            # Store Tweet Date in Array
            tweet_date.append(tweet["created_at"])

            # Run Vader Analysis on each tweet
            results = analyzer.polarity_scores(tweet["text"])
            compound = results["compound"]
            pos = results["pos"]
            neu = results["neu"]
            neg = results["neg"]

            # Add each value to the appropriate list
            compound_list.append(compound)
            positive_list.append(pos)
            negative_list.append(neg)
            neutral_list.append(neu)
            counter_list.append(counter)
            target_list.append(target)
            # Store the data in dictionary
            sentiment = {
                "User": target_list,
                "Tweet": tweet_texts,
                "Date": tweet_date,
                "Compound": compound_list,
                "Positive": positive_list,
                "Neutral": negative_list,
                "Negative": neutral_list,
                "Tweets Ago": counter_list
            }
            #add to counter
            #print(target)
            counter += 1

#CREATE DATAFRAME
sentiment_users_df = pd.DataFrame(sentiment)

In [5]:
#Check Data Frame
sentiment_users_df.head(1000)

Unnamed: 0,Compound,Date,Negative,Neutral,Positive,Tweet,Tweets Ago,User
0,0.0000,Fri Apr 13 13:02:35 +0000 2018,1.000,0.000,0.000,"From coast to coast, public transportation is ...",1,APTA_Transit
1,0.8553,Wed Apr 11 21:22:08 +0000 2018,0.516,0.000,0.484,Congratulations to @RideKCTransit on their new...,2,APTA_Transit
2,0.0000,Tue Apr 10 21:36:02 +0000 2018,1.000,0.000,0.000,"RT @APTA_info: “In late March, Congress approp...",3,APTA_Transit
3,0.7717,Tue Apr 10 13:23:30 +0000 2018,0.675,0.000,0.325,Public transportation supports jobs in more wa...,4,APTA_Transit
4,0.0000,Mon Apr 09 21:03:27 +0000 2018,1.000,0.000,0.000,https://t.co/iNuedb2jYw,5,APTA_Transit
5,0.0000,Fri Apr 06 21:21:35 +0000 2018,1.000,0.000,0.000,https://t.co/zxMKT7Ulqs,6,APTA_Transit
6,0.4588,Thu Apr 05 18:59:20 +0000 2018,0.870,0.000,0.130,We applaud Congress for standing up for public...,7,APTA_Transit
7,0.7003,Wed Apr 04 20:59:16 +0000 2018,0.734,0.000,0.266,#Publictransportation has long operated in com...,8,APTA_Transit
8,0.4199,Tue Apr 03 16:30:17 +0000 2018,0.823,0.000,0.177,#PublicTransit keeps America moving! Our publi...,9,APTA_Transit
9,0.0000,Mon Apr 02 21:26:03 +0000 2018,1.000,0.000,0.000,https://t.co/1OBAhsKXmk,10,APTA_Transit


In [6]:
#CHECK 10000 TWEETS WERE POLLED --- ACCOUNTS -ATL/MARTA DID NOT HAVE ENOUGH----
max_tweets = sentiment_users_df.loc[sentiment_users_df['Tweets Ago'] == 1000]
max_tweets

Unnamed: 0,Compound,Date,Negative,Neutral,Positive,Tweet,Tweets Ago,User
999,0.0772,Wed May 20 21:19:59 +0000 2015,0.515,0.236,0.249,"It helps you lose weight, save time, stress, a...",1000,APTA_Transit
1999,0.0,Tue Jan 09 19:47:30 +0000 2018,1.0,0.0,0.0,@wintermute314 gelukkig zijn er nog tokens om ...,1000,BART
2999,-0.7003,Sat Jul 22 01:31:49 +0000 2017,0.766,0.234,0.0,"@Mad_Reggie Regrets, there was an earlier sick...",1000,MTA
3999,-0.4019,Fri Mar 09 12:34:31 +0000 2018,0.863,0.137,0.0,Some Linden-bound Purple Line trains are oper...,1000,CTA
5985,-0.5859,Sat Mar 03 21:56:23 +0000 2018,0.73,0.27,0.0,"#FASTALERT 03-Mar-18 1:54 pm,\nCrash on Durang...",1000,RTCSNV


In [7]:
#--------SAVE TO .CSV----------#
sentiment_users_df.to_csv('sentiment_df.csv')

In [9]:
#Using Pandas create dataframes for each TRANSIT company
apta_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'APTA_Transit']
bart_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'BART']
mta_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'MTA']
cta_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'CTA']
marta_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'MARTA']
rtcsnv_sentiment_df = sentiment_users_df.loc[sentiment_users_df['User'] == 'RTCSNV']

#Average the compound vader analysis for each TRANSIT company
apta_compound_avg = (f"{np.mean(apta_sentiment_df['Compound']):.3f}")
bart_compound_avg = (f"{np.mean(bart_sentiment_df['Compound']):.3f}")
mta_compound_avg = (f"{np.mean(mta_sentiment_df['Compound']):.3f}")
cta_compound_avg = (f"{np.mean(cta_sentiment_df['Compound']):.3f}")
marta_compound_avg = (f"{np.mean(marta_sentiment_df['Compound']):.3f}")
rtcsnv_compound_avg = (f"{np.mean(rtcsnv_sentiment_df['Compound']):.3f}")

#create DF with all compounded averages
compound_df = [apta_compound_avg, bart_compound_avg, mta_compound_avg, cta_compound_avg, marta_compound_avg, rtcsnv_compound_avg ]
compound_df

['0.268', '0.123', '0.038', '-0.069', '0.099', '-0.298']