In [1]:
# HYPOTHESYS
# We expect that mentions of Trumps will have 50/50 positive and negative
# We expect more positive tweets volume from republican states
# We expect Trump's tweets to be majority negative
# What are the main keywords Trump uses and what are the sentiment about those subjects

In [2]:
# import dependencies 
import os
import pandas as pd
from matplotlib import pyplot as plt
import time
import csv
import json
import requests
from config import api_key, api_secret_key
import re 
import twitter
import tweepy 
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from textblob import TextBlob 
import base64
import string
from collections import defaultdict 

# request limit per second (max_number_of_requests = 180 per 15 min)
# https://towardsdatascience.com/creating-the-twitter-sentiment-analysis-program-in-python-with-naive-bayes-classification-672e5589a7ed


# Reference the url for the sentiment analysis code 'https://www.geeksforgeeks.org/twitter-sentiment-analysis-using-python/''

# Repository where we got Trump's tweets from 2018: https://github.com/bpb27/trump_tweet_data_archive


In [3]:
# Authenticate and Make the first API request for Twitter

client_key = api_key
client_secret = api_secret_key

key_secret = '{}:{}'.format(client_key, client_secret).encode('ascii')
b64_encoded_key = base64.b64encode(key_secret)
b64_encoded_key = b64_encoded_key.decode('ascii')

base_url = 'https://api.twitter.com/'
auth_url = '{}oauth2/token'.format(base_url)

auth_headers = {
    'Authorization': 'Basic {}'.format(b64_encoded_key),
    'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
}

auth_data = {
    'grant_type': 'client_credentials'
}

auth_resp = requests.post(auth_url, headers=auth_headers, data=auth_data)

auth_resp.json().keys()

access_token = auth_resp.json()['access_token']

search_headers = {
    'Authorization': 'Bearer {}'.format(access_token)    
}


In [4]:
# MENTIONS @realDonalTrump - define parameters for api call
search_params = {
    'q': '@realDonaldTrump',
    'lang': 'en',
    'count': 450
}


search_url = '{}1.1/search/tweets.json'.format(base_url)


search_resp = requests.get(search_url, headers=search_headers, params=search_params)

tweet_data = search_resp.json()

tweet_data_filter = defaultdict(list)

statuses = tweet_data['statuses']
processed_tweet_ids = set()


In [5]:
# MENTIONS @realDonalTrump - dataframe

for status in statuses:
    tweet_id = status['id_str']
    if tweet_id in processed_tweet_ids:
        continue
    
    processed_tweet_ids.add(tweet_id)

    tweet_data_filter["UserName"].append(status['user']['screen_name'])
    tweet_data_filter["text"].append(status['text'])
    tweet_data_filter["created_at"].append(status['created_at'])
    tweet_data_filter["followers_count"].append(status['user']['followers_count'])
    tweet_data_filter["retweet_count"].append(status['retweet_count'])
    tweet_data_filter["favorite_count"].append(status['favorite_count'])
    tweet_data_filter["TweetID"].append(tweet_id)
    tweet_data_filter["Retweeted"].append(status['retweeted'])
    tweet_data_filter["UserID"].append(status['user']['id_str'])
    tweet_data_filter["language"].append(status['user']['lang'])
    tweet_data_filter['verified'].append(status['user']['verified'])

tweet_data_filter_pdf = pd.DataFrame(tweet_data_filter)
tweet_data_filter_pdf

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified
0,RealEarlMilk,RT @tedlieu: Dear @realDonaldTrump: You are st...,Sat Jun 29 19:18:58 +0000 2019,19040,1867,0,1145049036329750528,False,234566636,en,False
1,rcj3725,RT @realDonaldTrump: After some very important...,Sat Jun 29 19:18:58 +0000 2019,2700,33069,0,1145049035369263105,False,3094576717,en,False
2,cakes_lane,@DonaldJTrumpJr @realDonaldTrump He is so dumb,Sat Jun 29 19:18:58 +0000 2019,23,0,0,1145049033838215169,False,2276731603,en,False
3,BarbCrofts,@realDonaldTrump Did you rape any women today?,Sat Jun 29 19:18:58 +0000 2019,4958,0,0,1145049033808814080,False,4215907939,en,False
4,rolandburns,"@realDonaldTrump is such a fucking moron, it's...",Sat Jun 29 19:18:58 +0000 2019,70,0,0,1145049033657896960,False,84369859,en,False
5,us2020president,RT @realDonaldTrump: After some very important...,Sat Jun 29 19:18:58 +0000 2019,8359,33069,0,1145049032949207042,False,1057868292398841857,en,False
6,khann470,@letyflores1 @realDonaldTrump Owsm what is ur job,Sat Jun 29 19:18:58 +0000 2019,4,0,0,1145049032936644610,False,996062665339494401,en,False
7,alllibertynews,RT @mrCLEANqs: @cofs2 @stuartHWD68 @SenCoryGar...,Sat Jun 29 19:18:57 +0000 2019,19456,1,0,1145049031011459073,False,4521330855,en,False
8,cayce0312,@rickfos38250406 @tedlieu @realDonaldTrump It'...,Sat Jun 29 19:18:57 +0000 2019,5786,0,0,1145049030424240129,False,985603645248032769,en,False
9,HeavensMesseng1,and he should be holding YOU and PELOSI accoun...,Sat Jun 29 19:18:57 +0000 2019,1087,0,0,1145049029438529537,False,1116004371865665536,en,False


In [9]:
# TRUMP TWEETS - import and read json file from 2018 tweets

filepath = os.path.join("condensed_2018.json")
with open(filepath) as jsonfile:
    trump_2018_json = json.load(jsonfile)

# print(json.dumps(trump_2018_json, sort_keys=True, indent=4)) 

trump_2018 = defaultdict(list)

In [7]:
# TRUMP TWEETS - creade a data frame with tweets and columns we want from Trump

for data in trump_2018_json:

    trump_2018["UserName"].append('realDonaldTrump')
    trump_2018["text"].append(data['text'])
    trump_2018["created_at"].append(data['created_at'])
    trump_2018["followers_count"].append('61506048')
    trump_2018["retweet_count"].append(data['retweet_count'])
    trump_2018["favorite_count"].append(data['favorite_count'])
    trump_2018["TweetID"].append(data['id_str'])
    trump_2018["Retweeted"].append('False')
    trump_2018["UserID"].append('25073877')
    trump_2018["language"].append('None')
    trump_2018['verified'].append('True')
    
trump_2018_pdf = pd.DataFrame(trump_2018)
trump_2018_pdf

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified
0,realDonaldTrump,HAPPY NEW YEAR! https://t.co/bHoPDPQ7G6,Mon Dec 31 23:53:06 +0000 2018,61506048,33548,136012,1079888205351145472,False,25073877,,True
1,realDonaldTrump,"....Senator Schumer, more than a year longer t...",Mon Dec 31 20:02:52 +0000 2018,61506048,17456,65069,1079830268708556800,False,25073877,,True
2,realDonaldTrump,Heads of countries are calling wanting to know...,Mon Dec 31 20:02:52 +0000 2018,61506048,21030,76721,1079830267274108930,False,25073877,,True
3,realDonaldTrump,It’s incredible how Democrats can all use thei...,Mon Dec 31 15:39:15 +0000 2018,61506048,29610,127485,1079763923845419009,False,25073877,,True
4,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:37:14 +0000 2018,61506048,30957,132439,1079763419908243456,False,25073877,,True
5,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:33:15 +0000 2018,61506048,1123,4217,1079762413589807104,False,25073877,,True
6,realDonaldTrump,I am the only person in America who could say ...,Mon Dec 31 14:38:52 +0000 2018,61506048,25463,112735,1079748730058870789,False,25073877,,True
7,realDonaldTrump,"I campaigned on Border Security, which you can...",Mon Dec 31 13:29:32 +0000 2018,61506048,22079,91523,1079731279032172545,False,25073877,,True
8,realDonaldTrump,.....Except the results are FAR BETTER than I ...,Mon Dec 31 13:19:13 +0000 2018,61506048,15152,72758,1079728684427341825,False,25073877,,True
9,realDonaldTrump,...I campaigned on getting out of Syria and ot...,Mon Dec 31 13:12:40 +0000 2018,61506048,22119,101470,1079727034673311746,False,25073877,,True


In [10]:
# TRUMP TWEETS - get data 2019/recent 7 days using API call

trump_search_params = {
    'screen_name': 'realDonaldTrump',
    'count': 1000
}


user_url = '{}1.1/statuses/user_timeline.json'.format(base_url)


search_resp2 = requests.get(user_url, headers=search_headers, params=trump_search_params)

trump_tweet_data = search_resp2.json()

trump_tweet_data_filter = defaultdict(list)

processed_trump_tweet_ids = set()

# trump_tweet_data

In [11]:
# TRUMP TWEETS - create dataframe

for tweet in trump_tweet_data:
    trump_tweet_id = tweet['id_str']
    if trump_tweet_id in processed_tweet_ids:
        continue
        
    processed_trump_tweet_ids.add(trump_tweet_id)
    
    trump_tweet_data_filter["UserName"].append(tweet['user']['screen_name'])
    trump_tweet_data_filter["text"].append(tweet['text'])
    trump_tweet_data_filter["created_at"].append(tweet['created_at'])
    trump_tweet_data_filter["followers_count"].append(tweet['user']['followers_count'])
    trump_tweet_data_filter["retweet_count"].append(tweet['retweet_count'])
    trump_tweet_data_filter["favorite_count"].append(tweet['favorite_count'])
    trump_tweet_data_filter["TweetID"].append(tweet['id_str'])
    trump_tweet_data_filter["Retweeted"].append(tweet['retweeted'])
    trump_tweet_data_filter["UserID"].append(tweet['user']['id_str'])
    trump_tweet_data_filter["language"].append(tweet['user']['lang'])
    trump_tweet_data_filter['verified'].append(tweet['user']['verified'])


trump_data_filter_pdf = pd.DataFrame(trump_tweet_data_filter)

trump_data_filter_pdf

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified
0,realDonaldTrump,Thank you #G20OsakaSummit https://t.co/9FCqSuR5Bp,Sat Jun 29 10:11:32 +0000 2019,61510607,18158,81737,1144911267641135109,False,25073877,,True
1,realDonaldTrump,"After some very important meetings, including ...",Fri Jun 28 22:51:41 +0000 2019,61510607,33117,139079,1144740178948493314,False,25073877,,True
2,realDonaldTrump,54% in Poll! I would be at 75% (with our great...,Fri Jun 28 22:41:17 +0000 2019,61510607,24518,120646,1144737559786020864,False,25073877,,True
3,realDonaldTrump,"I am in Japan at the G-20, representing our Co...",Fri Jun 28 09:26:34 +0000 2019,61510607,29372,152900,1144537564944228352,False,25073877,,True
4,realDonaldTrump,The Stock Market went up massively from the da...,Fri Jun 28 09:12:18 +0000 2019,61510607,21125,110651,1144533973428842496,False,25073877,,True
5,realDonaldTrump,All Democrats just raised their hands for givi...,Fri Jun 28 01:37:04 +0000 2019,61510607,70015,280857,1144419410729242625,False,25073877,,True
6,realDonaldTrump,Great to be back in Japan for the #G20OsakaSum...,Fri Jun 28 00:46:37 +0000 2019,61510607,15264,76813,1144406713165963264,False,25073877,,True
7,realDonaldTrump,Bipartisan Humanitarian Aid Bill for the South...,Thu Jun 27 22:22:39 +0000 2019,61510607,24252,104088,1144370485783867392,False,25073877,,True
8,realDonaldTrump,.....United States Supreme Court is given addi...,Thu Jun 27 17:37:32 +0000 2019,61510607,24999,113359,1144298734311878657,False,25073877,,True
9,realDonaldTrump,"Seems totally ridiculous that our government, ...",Thu Jun 27 17:37:32 +0000 2019,61510607,28805,130656,1144298731887628288,False,25073877,,True


In [None]:
# TRUMP TWEETS store in excel just in case. we can get rid of this later
trump_data_filter_pdf.to_excel("TrumpFile3.xlsx", engine='xlsxwriter', encoding='utf8')

In [12]:
# TRUMP TWEETS combine the 2 data frames with 2018 and 2019 recent trump tweets
frames =[trump_2018_pdf, trump_data_filter_pdf]
combined_trump_tweets = pd.concat(frames)
combined_trump_tweets

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified
0,realDonaldTrump,HAPPY NEW YEAR! https://t.co/bHoPDPQ7G6,Mon Dec 31 23:53:06 +0000 2018,61506048,33548,136012,1079888205351145472,False,25073877,,True
1,realDonaldTrump,"....Senator Schumer, more than a year longer t...",Mon Dec 31 20:02:52 +0000 2018,61506048,17456,65069,1079830268708556800,False,25073877,,True
2,realDonaldTrump,Heads of countries are calling wanting to know...,Mon Dec 31 20:02:52 +0000 2018,61506048,21030,76721,1079830267274108930,False,25073877,,True
3,realDonaldTrump,It’s incredible how Democrats can all use thei...,Mon Dec 31 15:39:15 +0000 2018,61506048,29610,127485,1079763923845419009,False,25073877,,True
4,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:37:14 +0000 2018,61506048,30957,132439,1079763419908243456,False,25073877,,True
5,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:33:15 +0000 2018,61506048,1123,4217,1079762413589807104,False,25073877,,True
6,realDonaldTrump,I am the only person in America who could say ...,Mon Dec 31 14:38:52 +0000 2018,61506048,25463,112735,1079748730058870789,False,25073877,,True
7,realDonaldTrump,"I campaigned on Border Security, which you can...",Mon Dec 31 13:29:32 +0000 2018,61506048,22079,91523,1079731279032172545,False,25073877,,True
8,realDonaldTrump,.....Except the results are FAR BETTER than I ...,Mon Dec 31 13:19:13 +0000 2018,61506048,15152,72758,1079728684427341825,False,25073877,,True
9,realDonaldTrump,...I campaigned on getting out of Syria and ot...,Mon Dec 31 13:12:40 +0000 2018,61506048,22119,101470,1079727034673311746,False,25073877,,True


In [14]:
# MENTIONS @realDonaldTrump - sentiment analysis for Everyone's tweets that mention Trump
# Updates df to append one column with results of sentiment analyses 

tweet_data_filter_pdf["TweetTone"] = ""
for index, row in tweet_data_filter_pdf.iterrows():
    tweettext = row['text']
    sent_analysis = TextBlob(tweettext)
       # set sentiment
    if sent_analysis.sentiment.polarity > 0:
        tweet_data_filter_pdf.loc[index, "TweetTone"] = "Positive"
    elif sent_analysis.sentiment.polarity == 0:
        tweet_data_filter_pdf.loc[index, "TweetTone"] = "Neutral"
    else:
        tweet_data_filter_pdf.loc[index, "TweetTone"] = "Negative"

tweet_data_filter_pdf

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified,TweetTone
0,RealEarlMilk,RT @tedlieu: Dear @realDonaldTrump: You are st...,Sat Jun 29 19:18:58 +0000 2019,19040,1867,0,1145049036329750528,False,234566636,en,False,Negative
1,rcj3725,RT @realDonaldTrump: After some very important...,Sat Jun 29 19:18:58 +0000 2019,2700,33069,0,1145049035369263105,False,3094576717,en,False,Positive
2,cakes_lane,@DonaldJTrumpJr @realDonaldTrump He is so dumb,Sat Jun 29 19:18:58 +0000 2019,23,0,0,1145049033838215169,False,2276731603,en,False,Negative
3,BarbCrofts,@realDonaldTrump Did you rape any women today?,Sat Jun 29 19:18:58 +0000 2019,4958,0,0,1145049033808814080,False,4215907939,en,False,Neutral
4,rolandburns,"@realDonaldTrump is such a fucking moron, it's...",Sat Jun 29 19:18:58 +0000 2019,70,0,0,1145049033657896960,False,84369859,en,False,Negative
5,us2020president,RT @realDonaldTrump: After some very important...,Sat Jun 29 19:18:58 +0000 2019,8359,33069,0,1145049032949207042,False,1057868292398841857,en,False,Positive
6,khann470,@letyflores1 @realDonaldTrump Owsm what is ur job,Sat Jun 29 19:18:58 +0000 2019,4,0,0,1145049032936644610,False,996062665339494401,en,False,Neutral
7,alllibertynews,RT @mrCLEANqs: @cofs2 @stuartHWD68 @SenCoryGar...,Sat Jun 29 19:18:57 +0000 2019,19456,1,0,1145049031011459073,False,4521330855,en,False,Negative
8,cayce0312,@rickfos38250406 @tedlieu @realDonaldTrump It'...,Sat Jun 29 19:18:57 +0000 2019,5786,0,0,1145049030424240129,False,985603645248032769,en,False,Positive
9,HeavensMesseng1,and he should be holding YOU and PELOSI accoun...,Sat Jun 29 19:18:57 +0000 2019,1087,0,0,1145049029438529537,False,1116004371865665536,en,False,Neutral


In [None]:
#  MENTIONS @realDonaldTrump - store data frame to excel just in case
tweet_data_filter_pdf.to_excel("TweetFile2.xlsx", engine='xlsxwriter', encoding='utf8')

In [15]:
# TRUMP TWEETS - sentiment analysis for Trump's tweets
# Updates df to append one column with results of sentiment analyses 

combined_trump_tweets["TweetTone"] = ""
for index, row in combined_trump_tweets.iterrows():
    trumptweettext = row['text']
    sent_analysis = TextBlob(trumptweettext)
       # set sentiment
    if sent_analysis.sentiment.polarity > 0:
        combined_trump_tweets.loc[index, "TweetTone"] = "Positive"
    elif sent_analysis.sentiment.polarity == 0:
        combined_trump_tweets.loc[index, "TweetTone"] = "Neutral"
    else:
        combined_trump_tweets.loc[index, "TweetTone"] = "Negative"
combined_trump_tweets  

Unnamed: 0,UserName,text,created_at,followers_count,retweet_count,favorite_count,TweetID,Retweeted,UserID,language,verified,TweetTone
0,realDonaldTrump,HAPPY NEW YEAR! https://t.co/bHoPDPQ7G6,Mon Dec 31 23:53:06 +0000 2018,61506048,33548,136012,1079888205351145472,False,25073877,,True,Neutral
1,realDonaldTrump,"....Senator Schumer, more than a year longer t...",Mon Dec 31 20:02:52 +0000 2018,61506048,17456,65069,1079830268708556800,False,25073877,,True,Positive
2,realDonaldTrump,Heads of countries are calling wanting to know...,Mon Dec 31 20:02:52 +0000 2018,61506048,21030,76721,1079830267274108930,False,25073877,,True,Positive
3,realDonaldTrump,It’s incredible how Democrats can all use thei...,Mon Dec 31 15:39:15 +0000 2018,61506048,29610,127485,1079763923845419009,False,25073877,,True,Negative
4,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:37:14 +0000 2018,61506048,30957,132439,1079763419908243456,False,25073877,,True,Neutral
5,realDonaldTrump,"I’m in the Oval Office. Democrats, come back f...",Mon Dec 31 15:33:15 +0000 2018,61506048,1123,4217,1079762413589807104,False,25073877,,True,Negative
6,realDonaldTrump,I am the only person in America who could say ...,Mon Dec 31 14:38:52 +0000 2018,61506048,25463,112735,1079748730058870789,False,25073877,,True,Positive
7,realDonaldTrump,"I campaigned on Border Security, which you can...",Mon Dec 31 13:29:32 +0000 2018,61506048,22079,91523,1079731279032172545,False,25073877,,True,Positive
8,realDonaldTrump,.....Except the results are FAR BETTER than I ...,Mon Dec 31 13:19:13 +0000 2018,61506048,15152,72758,1079728684427341825,False,25073877,,True,Neutral
9,realDonaldTrump,...I campaigned on getting out of Syria and ot...,Mon Dec 31 13:12:40 +0000 2018,61506048,22119,101470,1079727034673311746,False,25073877,,True,Negative


In [16]:
#create function that cleans up punctuations

def remove_punctuation(input_string):
    out_string = ''
    for char in input_string:
        if char not in string.punctuation and char != '\u2026' and char!='”' and char!='“':
#             out_string = out_string + char
#         if char !='...':
            out_string = out_string + char
    return out_string

In [17]:
# TRUMP TWEETS - frequently used words List

key_words=[]
for text in combined_trump_tweets['text']:
    key_words.append(text.split())

keylist = []
for sublist in key_words:
    for item in sublist:
        keylist.append(item)

#  cleaning up article words and others that we don't want. cleaning up punctuation and setting all to lowercasw       
keylistclean = []
unwanted = ['and', 'to', 'a', 'an', 'for', 'the', 'in', 'of', 'is', 'at', 'that', 'on', 'it', 'are', 'is']
    

for item in keylist:
    cleanitem = remove_punctuation(item).lower()
    if cleanitem not in unwanted: 
        keylistclean.append(cleanitem)
        
c = dict((word, keylistclean.count(word)) for word in keylistclean)

key_word_trend=pd.DataFrame(c.items(), columns=['key_word', 'occurance'])

# Sort list by top used KW

key_word_trend.sort_values(by=['occurance'], ascending=False)    


Unnamed: 0,key_word,occurance
117,our,1040
224,will,918
110,i,897
104,with,890
52,great,882
84,be,859
341,we,839
74,they,696
16,have,647
196,was,593


In [18]:
# TRUMP TWEETS - store data frame to excel just in case
key_word_trend.to_excel("TrumpKW-CLEAN-2018_2019.xlsx", engine='xlsxwriter', encoding='utf8')

In [None]:
# DATA CLEANING

In [None]:
# VISUALIZATION REQUEIREMENTS - 6-8

# What is the distribution of sentiment (positive/negative/neutral) per mention of handle?
# Piechart/bar chart per handle mention

# How many (positive/negative/neutral) retweets?


# Scatter plot - size of bubles is number of followers

# Most commom used words by Trump - top keywords
# Use of exclamation points
# Use of Caps lock?
# Emoji symbols he uses

# Sentiment analysis of Trump Tweets
# Sentiment analysis of his tweets for top 5 keyqords he uses (sort and select top 5 based on total count)


# Scatter plot of Trumps tweets colored by sentiment

# Save png versions of our visuals


In [None]:
# OBSERVATIONS/TRENDS

# Implications - can we predict if he will be elected or not