In [2]:
import pandas as pd
from twitter import *

In [3]:
from config import PUBLIC_KEY, PRIVATE_KEY, ACCESS_TOKEN, SECRET_TOKEN

In [4]:
twitter_api = Twitter(auth = OAuth(ACCESS_TOKEN, SECRET_TOKEN, PUBLIC_KEY, PRIVATE_KEY))

In [5]:
# test connection
twitter_api.statuses.home_timeline()

[{'created_at': 'Sun May 17 02:41:30 +0000 2020',
  'id': 1261849294220529664,
  'id_str': '1261849294220529664',
  'text': 'Someone started talking to me tn in terms of coefficient of variation instead of Std. Dev &amp; my brain short-circuite… https://t.co/GPbBJHPVfa',
  'truncated': True,
  'entities': {'hashtags': [],
   'symbols': [],
   'user_mentions': [],
   'urls': [{'url': 'https://t.co/GPbBJHPVfa',
     'expanded_url': 'https://twitter.com/i/web/status/1261849294220529664',
     'display_url': 'twitter.com/i/web/status/1…',
     'indices': [121, 144]}]},
  'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_reply_to_user_id': None,
  'in_reply_to_user_id_str': None,
  'in_reply_to_screen_name': None,
  'user': {'id': 389584283,
   'id_str': '389584283',
   'name': 'Dan Leisman',
   'screen_name': 'danleisman',
   'location': 'New York, NY',
   'description'

In [6]:
# fetch a list of tweets based on a keyword

def buildTestSet(keyword):
    try:
        tweets_retrieved = twitter_api.search.tweets(q=keyword)
        print("fetched " + str(len(tweets_retrieved)) + " tweets about " + keyword)
        return [{"text":tweet.get('text')} for tweet in tweets_retrieved['statuses']]    
    except:
        print("Something went wrong...")
        return None

In [62]:
test_word = "tesla"

In [63]:
testDataSet = buildTestSet(test_word)
print(testDataSet[0:4])

fetched 2 tweets about tesla
[{'text': "@KaivanShroff it's OK not to like Musk but why make stuff up? https://t.co/PQvNBGPXRi\nhttps://t.co/hKb8mhidZs\nhttps://t.co/rCvwC21gkP"}, {'text': 'RT @ChemistryReacts: Tesla coil lighting a lightbulb https://t.co/zy1y0CIt8A'}, {'text': 'RT @erickussalas: Tesla descarta a México para construir su próxima fábrica https://t.co/jOFBXxu3ri'}, {'text': 'RT @GreenlakeRun: @DonaldJTrumpJr #TakeTheRedPill! #KAG #ElonMusk #Tesla #Jobs #OpenUpAmerica @elonmusk @realDonaldTrump  @HorseShort @cact…'}]


In [65]:
# API to extract sentiment of tweets

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [66]:
analyzer = SentimentIntensityAnalyzer()

In [67]:
analyzer.polarity_scores(testDataSet[0])

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [60]:
# function to request tweets within the limits set by Twitter (180 tweets/15 min)
# build a list of tweets and write them to a csv file

def buildTrainingSet(keyword, tweetDataFile):
    import csv
    import time

    sleep_time = 500
    
    trainingDataSet = []
    
    for i in range(2):
        try:
            tweets_retrieved = twitter_api.search.tweets(q=keyword, count = 90)
            print(str(len(tweets_retrieved['statuses'])) + " Tweets fetched")
            [trainingDataSet.append(tweet.get('text')) for tweet in tweets_retrieved['statuses']] 
            time.sleep(sleep_time) 
        except: 
            continue
            
    with open(tweetDataFile,'w') as csvfile:
        linewriter = csv.writer(csvfile,delimiter=',',quotechar="\"")
        for tweet in trainingDataSet:
            try:
                linewriter.writerow([tweet])
            except Exception as e:
                print(e)
    return trainingDataSet

In [15]:
tweetDataFile = '/Users/evanstroh/Documents/Personal/Data_science/nlp_analysis/tweetDataFile.csv'

In [38]:
trainingData = buildTrainingSet('remdesivir',tweetDataFile)

In [72]:
# run through the list of tweets to get sentiment analysis on them

positive_sentiment = 0
negative_sentiment = 0
neutral_sentiment = 0

for tweet in trainingData:
    sentiment_dict = analyzer.polarity_scores(tweet)
    print(sentiment_dict)
    if sentiment_dict.get('pos') > sentiment_dict.get('neg') and sentiment_dict.get('pos') > sentiment_dict.get('neu'):
        positive_sentiment += 1
    elif sentiment_dict.get('neg') > sentiment_dict.get('pos') and sentiment_dict.get('neg') > sentiment_dict.get('neu'):
        negative_sentiment += 1
    else: 
        neutral_sentiment += 1

{'neg': 0.126, 'neu': 0.777, 'pos': 0.097, 'compound': -0.1531}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.878, 'pos': 0.122, 'compound': 0.3612}
{'neg': 0.0, 'neu': 0.83, 'pos': 0.17, 'compound': 0.6249}
{'neg': 0.0, 'neu': 0.944, 'pos': 0.056, 'compound': 0.0772}
{'neg': 0.0, 'neu': 0.83, 'pos': 0.17, 'compound': 0.6249}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.272, 'neu': 0.728, 'pos': 0.0, 'compound': -0.8625}
{'neg': 0.0, 'neu': 0.827, 'pos': 0.173, 'compound': 0.6581}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.155, 'neu': 0.845, 'pos': 0.0, 'compound': -0.5106}
{'neg': 0.092, 'neu': 0.83, 'pos': 0.079, 'compound': -0.0772}
{'neg': 0.0, 'neu': 0.878, 'pos': 0.122, 'compound': 0.1007}
{'neg': 0

In [73]:
# all tweet identified as having a majority of neutral sentiment

print('positive sentiment: ' + str(positive_sentiment) + '\n' + 'negative sentiment: ' + str(negative_sentiment) + '\n'
      + 'neutral sentiment: ' + str(neutral_sentiment))

positive sentiment: 0
negative sentiment: 0
neutral sentiment: 180
