# Settings

## Imports

In [439]:
import sys,tweepy,csv,re, requests, json
import matplotlib.pyplot as plt
from dotenv import dotenv_values
import pandas as pd
import numpy as np
import os.path


from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

In [62]:
import warnings
warnings.filterwarnings('ignore') # We can suppress the warnings

# Getting Twitter

In [440]:
#token
config = dotenv_values(".env")
bearer_token = config['BEARER_TOKEN']


#connections
auth = tweepy.OAuth2BearerHandler({bearer_token})
api = tweepy.API(auth)




def bearer_oauth(r):
    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

def connect_to_endpoint(url, params):
    response = requests.get(url, auth=bearer_oauth, params=params)
    #print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def get_tweets(query,file_name):
    search_url = 'https://api.twitter.com/2/tweets/search/recent'
    #search_url = 'https://api.twitter.com/1.1/search/tweets.json?'

    file_name = f'{file_name}.bz2'
    
    if os.path.exists(file_name) == False: #First checking if database exists
        print(f'Getting tweets...')
        
        # Querying the API
        json_response = connect_to_endpoint(search_url, query)
        
        '''
        It's necessary to complement with next twitters
        '''
 
        tweets_dt = pd.DataFrame.from_dict(json_response['data'])
        tweets_dt.to_csv(file_name, index=False,compression='bz2')
        
    else:
        create_dt = time.strftime("%d/%m/%Y %H:%M:%S",time.strptime(time.ctime(os.path.getmtime(file_name))))
        print(f'Reading {file_name}, created at {create_dt}')
        tweets_dt = pd.read_csv(file_name)
        
    return tweets_dt

In [441]:
NoOfTerms = int(10)

query_params = {
    #'q' : 'beef OR milk place:ea679934779f45c7',
    'query': 'Irish Farmers agriculture -is:retweet',
    'max_results': f'{NoOfTerms}',
    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
}


In [443]:
tweets = get_tweets(query_params,'tweets')
tweets.sample()

Getting tweets...


Unnamed: 0,id,in_reply_to_user_id,conversation_id,reply_settings,edit_history_tweet_ids,public_metrics,created_at,source,author_id,referenced_tweets,text,lang
0,1604028300376014848,1110183756,1603038191728365568,everyone,[1604028300376014848],"{'retweet_count': 0, 'reply_count': 0, 'like_c...",2022-12-17T08:18:42.000Z,Twitter Web App,1326607228363804672,"[{'type': 'replied_to', 'id': '160347111924741...",@MarkBehan4 @McConalogue @GeraskoLarysa @FAO @...,en


# Clean Your Text Data

In [63]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

import string
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gustavo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Worlds extractions

In [414]:

# keyword extraction from tweets
def links_list(x):
    list_of_links = []
    words = x.split(' ')
    for word in words:
        if re.search('http', word):
            list_of_links.append(re.split("\W+",word.lower()))
    if len(list_of_links) > 0:
        list_of_lists.append(list_of_links[0])
    
# keyword extraction from tweets
def keywords_list(x):
    list_of_keywords = []
    words = x.split()
    for word in words:
        if word.startswith('#'):
            list_of_keywords.append(word)
    if len(list_of_keywords) > 0:
        list_of_lists.append(list_of_keywords)

    
# username extraction from tweets
def usernames_list(x):
    list_of_usernames = []
    words = x.split()
    for word in words:
        if word.startswith('@'):
            list_of_usernames.append(word.lower().replace('@',''))
    if len(list_of_usernames) > 0:
        list_of_lists.append(list_of_usernames)


In [415]:
list_of_lists = []

#tweets['text'].apply( lambda tweet : keywords_list(tweet) )
tweets['text'].apply( lambda tweet : links_list(tweet) )
tweets['text'].apply( lambda tweet : usernames_list(tweet) )

rem_list = [item for sublist in list_of_lists for item in sublist]

tweets['text_c'] = tweets['text'].apply( lambda tweet : ' '.join([word for word in re.split("\W+",tweet) if word.lower() not in rem_list]))

In [416]:
list_of_lists

[['', 'joan_baxter', 'https', 't', 'co', 'kno3bokfkk'],
 ['https', 't', 'co', 'w9dkvshpcx'],
 ['', 'https', 't', 'co', 'lvrevhs0fq'],
 ['https', 't', 'co', 'rfnl2bouio'],
 ['https', 't', 'co', 'mp9cc4lxqb'],
 ['markbehan4',
  'mcconalogue',
  'geraskolarysa',
  'fao',
  'agriculture_ie',
  'faodg',
  'fiannafailparty',
  'ografiannafail'],
 ['joan_baxter',
  'comhlamh',
  'farmersjournal',
  'agriaware',
  'macranafeirme',
  'irelanddsa',
  'teagasc',
  'irish_aid',
  'irishschsusty',
  'tcbotanicgarden',
  'nbgglasnevin',
  'agriculture_ie',
  'ecowas_agric',
  'biaamachanseo'],
 ['mcconalogue',
  'geraskolarysa',
  'fao',
  'agriculture_ie',
  'faodg',
  'fiannafailparty',
  'ografiannafail'],
 ['mcconalogue',
  'geraskolarysa',
  'fao',
  'agriculture_ie',
  'faodg',
  'fiannafailparty',
  'ografiannafail'],
 ['gameonclimate'],
 ['agriculture_ie', 'mcconalogue', 'ukrinirl', 'geraskolarysa', 'un', 'fao'],
 ['agriculture_ie', 'deptofscisetu', 'natorgskill', 'teagascorganics'],
 ['pmcc

In [418]:
n = 1
print(tweets['text'][n], '\n')
print(tweets['text_c'][n])

Celebrating🌍Africa’s food and farmers | Joan Baxter 🍁🎤📻✍️📰📚🇨🇦 @joan_baxter
https://t.co/Kno3bOkfKK @Comhlamh @farmersjournal @AgriAware @MacranaFeirme @IrelandDSA @teagasc @Irish_Aid @IrishSchSusty @TCBotanicGarden @NBGGlasnevin @agriculture_ie @ecowas_agric @BiaAmachAnseo 

Celebrating Africa s food and farmers Joan Baxter


# Sentiment Analyzes

TextBlob is a Python (2 and 3) library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more. [link](https://textblob.readthedocs.io/en/dev/index.html)

In [423]:
for i in range(len(cleaned_data)):
    text = cleaned_data[i]
    tweets.loc[i, 'TextBlob'] = TextBlob(text).sentiment.polarity
    #print(TextBlob(text).sentiment.polarity)
    tweets.loc[i, 'Vader'] = SentimentIntensityAnalyzer().polarity_scores(text)['compound']
    #print(SentimentIntensityAnalyzer().polarity_scores(text)['compound'], '\n')
    
tweets.sample(2)

Unnamed: 0,in_reply_to_user_id,public_metrics,id,lang,author_id,source,text,conversation_id,edit_history_tweet_ids,referenced_tweets,reply_settings,created_at,text_c,TextBlob,Vader
6,2589328316,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1603354556380889089,en,1113016486321405952,Twitter Web App,@agriculture_ie @McConalogue @UKRinIRL @Gerask...,1603033104960167936,[1603354556380889089],"[{'type': 'replied_to', 'id': '160303310496016...",everyone,2022-12-15T11:41:29.000Z,Great any chance you might do something for ir...,0.4,0.6249
2,197897890,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",1603707005147152385,en,1469965454865321985,Twitter for Android,@McConalogue @GeraskoLarysa @FAO @agriculture_...,1603038191728365568,[1603707005147152385],"[{'type': 'replied_to', 'id': '160303819172836...",everyone,2022-12-16T11:02:00.000Z,Irish farmers We want to cull the herd and put...,0.0,0.4588


In [424]:
def get_polarity(df, col):

    polarity = 0
    neutral = 0
    wpositive = 0
    positive = 0
    spositive = 0
    wnegative = 0
    negative = 0
    snegative = 0
     
    for t in df.index:
        
        v = df.loc[t, col]
        polarity += v  # adding up polarities to find the average later

        if (v == 0):  # adding reaction of how people are reacting to find average later
            neutral += 1
            desc = 'neutral'
        elif (v > 0 and v <= 0.3):
            wpositive += 1
            desc ='weak_positive'
        elif (v > 0.3 and v <= 0.6):
            positive += 1
            desc = 'positive'
        elif (v > 0.6 and v <= 1):
            spositive += 1
            desc = 'strong_positive'
        elif (v > -0.3 and v <= 0):
            wnegative += 1
            desc = 'weak_negative'
        elif (v > -0.6 and v <= -0.3):
            negative += 1
            desc = 'negative'
        elif (v > -1 and v <= -0.6):
            snegative += 1
            desc = 'strong_negative'
         
        df.loc[t, f'{col}_desc'] = desc
        

    return {'polarity_sum':polarity,
            'polarity_mean':(polarity / len(df)),
            'neutral':neutral,
            'strong_positive':spositive,
            'positive':positive,
            'weak_positive':wpositive,
            'weak_negative':wnegative,
            'negative':negative,
            'strong_negative':snegative}




In [425]:
get_polarity(tweets,'TextBlob')

{'polarity_sum': 0.9783333333333334,
 'polarity_mean': 0.09783333333333334,
 'neutral': 5,
 'strong_positive': 0,
 'positive': 1,
 'weak_positive': 4,
 'weak_negative': 0,
 'negative': 0,
 'strong_negative': 0}

In [426]:
get_polarity(tweets,'Vader')

{'polarity_sum': 3.9924999999999997,
 'polarity_mean': 0.39925,
 'neutral': 1,
 'strong_positive': 2,
 'positive': 5,
 'weak_positive': 1,
 'weak_negative': 1,
 'negative': 0,
 'strong_negative': 0}

In [428]:
tweets.sample()

Unnamed: 0,in_reply_to_user_id,public_metrics,id,lang,author_id,source,text,conversation_id,edit_history_tweet_ids,referenced_tweets,reply_settings,created_at,text_c,TextBlob,Vader,TextBlob_desc,Vader_desc
8,,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",1603104787578458112,en,1193124290,TweetDeck,Irish Farmers Journal Northern correspondent @...,1603104787578458112,[1603104787578458112],,everyone,2022-12-14T19:09:00.000Z,Irish Farmers Journal Northern correspondent w...,0.125,0.34,weak_positive,positive


In [435]:
n = 9
print('Text Original:', tweets.loc[n, 'text'], '\n',
     '-------------------------------------------------------------------------------------------')
print('Text Clear:', tweets.loc[n, 'text_c'], '\n',
     '-------------------------------------------------------------------------------------------')
print('TextBlob: ',tweets.loc[n, 'TextBlob'], tweets.loc[n, 'TextBlob_desc'])
print('Vader: ', tweets.loc[n, 'Vader'], tweets.loc[n, 'Vader_desc'])

Text Original: Diarmuid is passionate about the success of Irish #agriculture. He brings a wealth of knowledge &amp; experience to his interactions with #farmers &amp; other #agricultural stakeholders. https://t.co/mP9CC4LXQB 
 -------------------------------------------------------------------------------------------
Text Clear: Diarmuid is passionate about the success of Irish agriculture He brings a wealth of knowledge amp experience to his interactions with farmers amp other agricultural stakeholders 
 -------------------------------------------------------------------------------------------
TextBlob:  0.15 weak_positive
Vader:  0.872 strong_positive


In [430]:
tweets.iloc[:, -5:]

Unnamed: 0,text_c,TextBlob,Vader,TextBlob_desc,Vader_desc
0,OK this I agree with their duty firstly should...,0.25,0.296,weak_positive,weak_positive
1,Celebrating Africa s food and farmers Joan Baxter,0.0,0.0,neutral,neutral
2,Irish farmers We want to cull the herd and put...,0.0,0.4588,neutral,positive
3,Screw the Irish farmers,0.0,-0.1027,neutral,weak_negative
4,Most farmers accept that changes are needed bu...,0.053333,0.4588,weak_positive,positive
5,After a turbulent year of input prices and out...,0.0,0.5859,neutral,positive
6,Great any chance you might do something for ir...,0.4,0.6249,positive,strong_positive
7,Interesting times ahead for the Irish organic ...,0.0,0.4588,neutral,positive
8,Irish Farmers Journal Northern correspondent w...,0.125,0.34,weak_positive,positive
9,Diarmuid is passionate about the success of Ir...,0.15,0.872,weak_positive,strong_positive


## PoterStemmer

In [419]:
# Store the stopwords into the object named as "stop_words"
stop_words = stopwords.words('english')

# Store the string.punctuation into an object punct
punct = string.punctuation

# Initialise an object using a method PorterStemmer
stemmer = PorterStemmer()

In [420]:
import re

# Store the column of the dataframe named as "text"
X = tweets['text_c']

cleaned_data=[]

# For loop from first value to length(X), ^a-zA-Z means include small and capital case letters

for i in range(len(X)):
    tweet = re.sub('[^a-zA-Z]', ' ', X.iloc[i])
    tweet = tweet.lower().split()
    tweet = [stemmer.stem(word) for word in tweet if (word not in stop_words) and (word not in punct)]
    tweet = ' '.join(tweet)
    cleaned_data.append(tweet)

In [421]:
print(tweets['text'][0], '\n')
print(cleaned_data[0])

@MarkBehan4 @McConalogue @GeraskoLarysa @FAO @agriculture_ie @FAODG @fiannafailparty @OgraFiannaFail OK this I agree with, their duty firstly should be to irish farmers, plenty of them struggling. 

ok agre duti firstli irish farmer plenti struggl
