In [82]:
import sys,tweepy,csv,re, requests, json
import matplotlib.pyplot as plt
from dotenv import dotenv_values
import pandas as pd
import numpy as np


from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

In [62]:
import warnings
warnings.filterwarnings('ignore') # We can suppress the warnings

# Getting Twitter

In [27]:
#token
config = dotenv_values(".env")
bearer_token = config['BEARER_TOKEN']


#connections
auth = tweepy.OAuth2BearerHandler({bearer_token})
api = tweepy.API(auth)


search_url = 'https://api.twitter.com/2/tweets/search/recent'
#search_url = 'https://api.twitter.com/1.1/search/tweets.json?'


def bearer_oauth(r):
    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

def connect_to_endpoint(url, params):
    response = requests.get(url, auth=bearer_oauth, params=params)
    #print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [29]:
NoOfTerms = int(10)

query_params = {
    #'q' : 'beef OR milk place:ea679934779f45c7',
    'query': 'Irish Farmers agriculture -is:retweet',
    'max_results': f'{NoOfTerms}',
    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
}



In [30]:
# Querying the API
json_response = connect_to_endpoint(search_url, query_params)

# Parsing the response
parsedRes = json.dumps(json_response, indent=4, sort_keys=True, ensure_ascii=False)
#print(parsedRes)

In [236]:
tweets_dt = pd.DataFrame.from_dict(json_response['data'])

In [237]:
tweets = tweets_dt
tweets.sample()

Unnamed: 0,in_reply_to_user_id,public_metrics,id,lang,author_id,source,text,conversation_id,edit_history_tweet_ids,referenced_tweets,reply_settings,created_at
6,2589328316,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1603354556380889089,en,1113016486321405952,Twitter Web App,@agriculture_ie @McConalogue @UKRinIRL @Gerask...,1603033104960167936,[1603354556380889089],"[{'type': 'replied_to', 'id': '160303310496016...",everyone,2022-12-15T11:41:29.000Z


# Clean Your Text Data

In [63]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

import string
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gustavo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Worlds extractions

In [215]:

# keyword extraction from tweets
def keyword_list(x):
    list_of_keywords = []
    words = x.split()
    for word in words:
        if word.startswith('#'):
            list_of_keywords.append(word)
    list_of_lists.append(list_of_keywords)

    
# username extraction from tweets
def usernames_list(x):
    list_of_usernames = []
    words = x.split()
    for word in words:
        if word.startswith('@'):
            list_of_usernames.append(word.lower().replace('@',''))
    list_of_lists.append(list_of_usernames)


In [238]:
list_of_lists = []

#tweets['text'].apply( lambda tweet : keyword_list(tweet) )
tweets['text'].apply( lambda tweet : usernames_list(tweet) )

rem_list = [item for sublist in list_of_lists for item in sublist]

tweets['text_c'] = tweets['text'].apply( lambda tweet : ' '.join([word for word in re.split("\W+",tweet) if word.lower() not in rem_list]))

In [239]:
print(tweets['text'][0], '\n')
print(tweets['text_c'][0])

@MarkBehan4 @McConalogue @GeraskoLarysa @FAO @agriculture_ie @FAODG @fiannafailparty @OgraFiannaFail OK this I agree with, their duty firstly should be to irish farmers, plenty of them struggling. 

 OK this I agree with their duty firstly should be to irish farmers plenty of them struggling 


## PoterStemmer

In [240]:
# Store the stopwords into the object named as "stop_words"
stop_words = stopwords.words('english')

# Store the string.punctuation into an object punct
punct = string.punctuation

# Initialise an object using a method PorterStemmer
stemmer = PorterStemmer()

In [241]:
import re

# Store the column of the dataframe named as "text"
X = tweets['text_c']

cleaned_data=[]

# For loop from first value to length(X), ^a-zA-Z means include small and capital case letters

for i in range(len(X)):
    tweet = re.sub('[^a-zA-Z]', ' ', X.iloc[i])
    tweet = tweet.lower().split()
    tweet = [stemmer.stem(word) for word in tweet if (word not in stop_words) and (word not in punct)]
    tweet = ' '.join(tweet)
    cleaned_data.append(tweet)

In [242]:
print(tweets['text'][0], '\n')
print(cleaned_data[0])

@MarkBehan4 @McConalogue @GeraskoLarysa @FAO @agriculture_ie @FAODG @fiannafailparty @OgraFiannaFail OK this I agree with, their duty firstly should be to irish farmers, plenty of them struggling. 

ok agre duti firstli irish farmer plenti struggl


# Sentiment Analyzes

In [243]:
for i in range(len(cleaned_data)):
    text = cleaned_data[i]
    tweets.loc[i, 'TextBlob'] = TextBlob(text).sentiment.polarity
    #print(TextBlob(text).sentiment.polarity)
    tweets.loc[i, 'Vader'] = SentimentIntensityAnalyzer().polarity_scores(text)['compound']
    #print(SentimentIntensityAnalyzer().polarity_scores(text)['compound'], '\n')
    
tweets.sample(2)

Unnamed: 0,in_reply_to_user_id,public_metrics,id,lang,author_id,source,text,conversation_id,edit_history_tweet_ids,referenced_tweets,reply_settings,created_at,text_c,TextBlob,Vader
6,2589328316.0,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1603354556380889089,en,1113016486321405952,Twitter Web App,@agriculture_ie @McConalogue @UKRinIRL @Gerask...,1603033104960167936,[1603354556380889089],"[{'type': 'replied_to', 'id': '160303310496016...",everyone,2022-12-15T11:41:29.000Z,Great any chance you might do something for i...,0.4,0.6249
1,,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",1603760301194739712,en,1321217184459272193,Twitter Web App,Celebrating🌍Africa’s food and farmers | Joan B...,1603760301194739712,[1603760301194739712],,everyone,2022-12-16T14:33:46.000Z,Celebrating Africa s food and farmers Joan Bax...,0.0,0.0


In [256]:
def get_polarity(df, col):

    polarity = 0
    neutral = 0
    wpositive = 0
    positive = 0
    spositive = 0
    wnegative = 0
    negative = 0
    snegative = 0
     
    for t in df.index:
        
        v = df.loc[t, col]
        polarity += v  # adding up polarities to find the average later

        if (v == 0):  # adding reaction of how people are reacting to find average later
            neutral += 1
        elif (v > 0 and v <= 0.3):
            wpositive += 1
        elif (v > 0.3 and v <= 0.6):
            positive += 1
        elif (v > 0.6 and v <= 1):
            spositive += 1
        elif (v > -0.3 and v <= 0):
            wnegative += 1
        elif (v > -0.6 and v <= -0.3):
            negative += 1
        elif (v > -1 and v <= -0.6):
            snegative += 1

    return {'polarity_sum':polarity,
            'polarity_mean':(polarity / len(df)),
            'neutral':neutral,
            'strong_positive':spositive,
            'positive':positive,
            'weak_positive':wpositive,
            'weak_negative':wnegative,
            'negative':negative,
            'strong_negative':snegative}




In [257]:
get_polarity(tweets,'TextBlob')

{'polarity_sum': 0.9783333333333334,
 'polarity_mean': 0.09783333333333334,
 'neutral': 5,
 'strong_positive': 0,
 'positive': 1,
 'weak_positive': 4,
 'weak_negative': 0,
 'negative': 0,
 'strong_negative': 0}

In [258]:
get_polarity(tweets,'Vader')

{'polarity_sum': 3.9924999999999997,
 'polarity_mean': 0.39925,
 'neutral': 1,
 'strong_positive': 2,
 'positive': 5,
 'weak_positive': 1,
 'weak_negative': 1,
 'negative': 0,
 'strong_negative': 0}

In [264]:
TextBlob().sentiment.polarity

0.1625

In [265]:
SentimentIntensityAnalyzer().polarity_scores('Im too sad today so i become happy and started to cry of hapness in a dark room, ive lost the love of my life')['compound']

0.1689