# Twitter Analysis to determine election sentiments in Hudson County

* Firstly, we import the essential libraries
* Tweepy is the Twitter client that helps with the retrieval and manipulation of Twitter posts
* nltk and textblob are text processing libraries

In [2]:
import os
import pandas as pd
import tweepy as tw
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
import textblob
from textblob import TextBlob
import numpy as np

* We obtain the authorization parameters from our Twitter developer account
* Then we initiate the API object direclty to interact with Twitter using the tweepy

In [4]:
consumer_key="..."
consumer_secret="..."
access_token="..."
access_token_secret="..."

auth=tw.OAuthHandler(consumer_key,consumer_secret)
auth.set_access_token(access_token,access_token_secret)

api=tw.API(auth,wait_on_rate_limit=True)

* We put filter words "Republicans" and "Democrats" to find the sentiments associated with them
* We further use cursor to iterate through the API and fetch the results

In [5]:
search_words_1="Republicans"
search_words_2="Democrats"
new_search_1=search_words_1+" -filter:retweets"
new_search_2=search_words_2+" -filter:retweets"

date_since = "2020-7-1"

tweets_1 = tw.Cursor(api.search,q=new_search_1,lang="en",since=date_since,location=(74.0535,40.7453,62.31)).items(200)
tweets_2 = tw.Cursor(api.search,q=new_search_2,lang="en",since=date_since,location=(74.0535,40.7453,62.31)).items(200)

user_locs_1 = [[tweet.user.screen_name, tweet.text] for tweet in tweets_1]
user_locs_2 = [[tweet.user.screen_name, tweet.text] for tweet in tweets_2]

In [6]:
tweet_text_1=pd.DataFrame(data=user_locs_1,columns=["user","Tweets"])
tweet_text_2=pd.DataFrame(data=user_locs_2,columns=["user","Tweets"])

First we consider the Republicans searchword. We start the process of Natural Language Processing and break the sentence in corresponding word tokens

In [7]:
trial = []
for i in tweet_text_1["Tweets"]:
    trial.append(word_tokenize(i))

word_tokens = word_tokenize(str(tweet_text_1["Tweets"])) 
stop_words = set(stopwords.words('english'))
# stop_words = list(stop_words)
stop_words
print(trial[0])

['@', 'SimplyMargolous', 'Stupid', '@', 'SenateGOP', 'you', 'could', 'have', 'impeached', 'him', 'and', 'Pence', 'could', 'have', 'won', '2nd', 'term', '.....', 'Republicans…', 'https', ':', '//t.co/HY8pJkGgD0']


We filter out the stop words here and append them to an array

In [8]:
array = []

for w in trial:
    array1 = []
    for j in w:
        if j.lower() not in stop_words:
            array1.append(j)
    array.append(array1)
len(array)

200

In [9]:
a=[]
j=0
for i in array:
    a.append(" ".join(array[j]))
    j=j+1

In the following part we make use of the Regular Expression library to remove noise from the data

In [10]:
i=0
pattern=[]
for j in a:
    pattern.append(re.sub(r"(\@\s\w+)|(\s\.)|(\'\w+)|https?[A-Za-z0-9]+|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|\d|\b[a-zA-Z][a-zA-Z]\b|\b[a-zA-Z]\b","",a[i]))
    i=i+1
pattern[128]



In [11]:
def analyse_emotion(tweets):
    analysis=TextBlob(tweets)
    if analysis.sentiment.polarity>0:
        return 1
    elif analysis.sentiment.polarity==0:
        return 0
    else:
        return -1

z=np.array([analyse_emotion(tweet) for tweet in pattern])

In [12]:
pattern=np.array(pattern)
z=pd.DataFrame(z)
pattern=pd.DataFrame(pattern)

pattern["polarity"]=z

In [13]:
pattern["polarity"].value_counts()

 0    80
 1    79
-1    41
Name: polarity, dtype: int64

In [14]:
sum1 = sum(pattern["polarity"])
scale_of_positivity = sum1/2

if sum1>0:
    print("Sentiment is positive!")
    print("The positivity index out of 100 is :",scale_of_positivity)
elif sum1<0:
    print("Sentiment is negative!")
    print("The negativity index out of 100 is :",scale_of_positivity)
else:
    print("Sentiment is neutral!")

Sentiment is positive!
The positivity index out of 100 is : 19.0


Similarly, we can run the code for Democrats as well

In [18]:
trial2 = []
for i in tweet_text_2["Tweets"]:
    trial2.append(word_tokenize(i))
word_tokens2 = word_tokenize(str(tweet_text_2["Tweets"])) 



array = []
for w in trial2:
    array1 = []
    for j in w:
        if j.lower() not in stop_words:
            array1.append(j)
    array.append(array1)
 

a=[]
j=0
for i in array:
    a.append(" ".join(array[j]))
    j=j+1
   


i=0
pattern2=[]
for j in a:
    pattern2.append(re.sub(r"(\@\s\w+)|(\s\.)|(\'\w+)|https?[A-Za-z0-9]+|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|\d|\b[a-zA-Z][a-zA-Z]\b|\b[a-zA-Z]\b","",a[i]))
    i=i+1



z=np.array([analyse_emotion(tweet) for tweet in pattern2])



pattern2=np.array(pattern2)
z=pd.DataFrame(z)
pattern2=pd.DataFrame(pattern2)
pattern2["polarity"]=z



sum2 = sum(pattern2["polarity"])
scale_of_positivity = sum2/2

if sum1>0:
    print("Sentiment is positive!")
    print("The positivity index out of 100 is :",scale_of_positivity)
elif sum1<0:
    print("Sentiment is negative!")
    print("The negativity index out of 100 is :",scale_of_positivity)
else:
    print("Sentiment is neutral!")

Sentiment is positive!
The positivity index out of 100 is : 14.5


# We conclude that the overall sentiment is positive for both. But the positivity index for Republicans is slightly higher than that of Democrats