# Twitter Sentiment Analysis
Ming Fong

Linguistics 55AC

Useful: https://medium.com/swlh/coronavirus-python-tutorial-1-520cc960aac1

In [1]:
import tweepy #https://github.com/tweepy/tweepy
import csv
from secrets import *
import pandas as pd
import numpy as np
import re
from textblob import TextBlob

In [2]:
consumer_key = api_key
consumer_secret = api_secret
access_key = access_token
access_secret = access_secret

In [3]:
def get_all_tweets(screen_name):
    #Twitter only allows access to a users most recent 3240 tweets with this method
    
    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)
    
    #initialize a list to hold all the tweepy Tweets
    alltweets = []  
    
    #make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name = screen_name, count = 200)
    
    #save most recent tweets
    alltweets.extend(new_tweets)
    
    #save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1
    
    #keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:
        print(f"getting tweets before {oldest}")
        
        #all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name = screen_name, count=200, max_id = oldest)
        
        #save most recent tweets
        alltweets.extend(new_tweets)
        
        #update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        
        print(f"...{len(alltweets)} tweets downloaded so far")
    
    #transform the tweepy tweets into a 2D array that will populate the csv 
    outtweets = [[tweet.id_str, tweet.created_at, tweet.text] for tweet in alltweets]
    
    #write the csv  
    # with open(f'data/{screen_name}_tweets.csv', 'w') as f:
    #     writer = csv.writer(f)
    #     writer.writerow(["id","created_at","text"])
    #     writer.writerows(outtweets)
    
    return outtweets


In [4]:
df = pd.DataFrame(get_all_tweets("realDonaldTrump"))

getting tweets before 1335032599283109889
...400 tweets downloaded so far
getting tweets before 1332173996134195201
...599 tweets downloaded so far
getting tweets before 1329298668403499008
...798 tweets downloaded so far
getting tweets before 1327319294057848831
...997 tweets downloaded so far
getting tweets before 1324541519873847295
...1197 tweets downloaded so far
getting tweets before 1322746973443760127
...1397 tweets downloaded so far
getting tweets before 1321414554346160129
...1597 tweets downloaded so far
getting tweets before 1320224763491602432
...1797 tweets downloaded so far
getting tweets before 1318270451882192898
...1995 tweets downloaded so far
getting tweets before 1316711285871042561
...2192 tweets downloaded so far
getting tweets before 1315663084791435263
...2392 tweets downloaded so far
getting tweets before 1313842652731838463
...2590 tweets downloaded so far
getting tweets before 1311512518800470015
...2790 tweets downloaded so far
getting tweets before 1310027

In [5]:
# Remove links
df[2] = [re.sub(r'http\S+', '', str(x)) for x in df[2]]
# Remove Retweets
df = df[~df[2].str.startswith("RT @")]
df[2] = df[2].replace("", np.nan)
df = df.dropna(subset = [2])

In [7]:
df.head(20)

Unnamed: 0,0,1,2
3,1337526606445809665,2020-12-11 22:36:07,"Georgia, where is signature verification appro..."
4,1337494507756072961,2020-12-11 20:28:34,If the Supreme Court shows great Wisdom and Co...
5,1337494429221916674,2020-12-11 20:28:16,"If the two Senators from Georgia should lose, ..."
8,1337420100375339010,2020-12-11 15:32:54,“Donald Trump must get the credit for the vacc...
9,1337400706551123969,2020-12-11 14:15:50,Now it turns out that the Democrats want the P...
10,1337385736530780161,2020-12-11 13:16:21,Now that the Biden Administration will be a sc...
11,1337379719872974852,2020-12-11 12:52:27,Now it turns out that my phone call to the Pre...
12,1337373146652020736,2020-12-11 12:26:20,I just want to stop the world from killing its...
13,1337372201662746625,2020-12-11 12:22:34,"The Swine Flu (H1N1), and the attempt for a va..."
14,1337369403638362114,2020-12-11 12:11:27,While my pushing the money drenched but heavil...
