# Settings

## Imports

In [1]:
import sys,tweepy,csv,re, requests, json
import matplotlib.pyplot as plt
from dotenv import dotenv_values
import pandas as pd
import numpy as np
import os.path


from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

In [2]:
import warnings
warnings.filterwarnings('ignore') # We can suppress the warnings

# Getting Twitter

In [3]:
#token
config = dotenv_values(".env")
bearer_token = config['BEARER_TOKEN']


#connections
auth = tweepy.OAuth2BearerHandler({bearer_token})
api = tweepy.API(auth)




def bearer_oauth(r):
    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

def connect_to_endpoint(url, params):
    response = requests.get(url, auth=bearer_oauth, params=params)
    #print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

def get_tweets(query,file_name):
    search_url = 'https://api.twitter.com/2/tweets/search/recent'
    #search_url = 'https://api.twitter.com/1.1/search/tweets.json?'

    file_name = f'{file_name}.bz2'
    
    if os.path.exists(file_name) == False: #First checking if database exists
        print(f'Getting tweets...')
        
        # Querying the API
        json_response = connect_to_endpoint(search_url, query)
        
        '''
        It's necessary to complement with next twitters
        '''
 
        tweets_dt = pd.DataFrame.from_dict(json_response['data'])
        tweets_dt.to_csv(file_name, index=False,compression='bz2')
        
    else:
        create_dt = time.strftime("%d/%m/%Y %H:%M:%S",time.strptime(time.ctime(os.path.getmtime(file_name))))
        print(f'Reading {file_name}, created at {create_dt}')
        tweets_dt = pd.read_csv(file_name)
        
    return tweets_dt

In [4]:
NoOfTerms = int(10)

query_params = {
    #'q' : 'beef OR milk place:ea679934779f45c7',
    'query': 'Irish Farmers agriculture -is:retweet',
    'max_results': f'{NoOfTerms}',
    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
}


In [5]:
tweets = get_tweets(query_params,'tweets')
tweets.sample()

Getting tweets...


Unnamed: 0,author_id,text,public_metrics,id,created_at,in_reply_to_user_id,source,conversation_id,lang,referenced_tweets,edit_history_tweet_ids,reply_settings
1,1321217184459272193,Celebrating🌍Africa’s food and farmers | Joan B...,"{'retweet_count': 2, 'reply_count': 0, 'like_c...",1603760301194739712,2022-12-16T14:33:46.000Z,,Twitter Web App,1603760301194739712,en,,[1603760301194739712],everyone


# Clean Your Text Data

In [6]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

import string
from nltk.stem import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gustavo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Worlds extractions

In [7]:

# keyword extraction from tweets
def links_list(x):
    list_of_links = []
    words = x.split(' ')
    for word in words:
        if re.search('http', word):
            list_of_links.append(re.split("\W+",word.lower()))
    if len(list_of_links) > 0:
        list_of_lists.append(list_of_links[0])
    
# keyword extraction from tweets
def keywords_list(x):
    list_of_keywords = []
    words = x.split()
    for word in words:
        if word.startswith('#'):
            list_of_keywords.append(word)
    if len(list_of_keywords) > 0:
        list_of_lists.append(list_of_keywords)

    
# username extraction from tweets
def usernames_list(x):
    list_of_usernames = []
    words = x.split()
    for word in words:
        if word.startswith('@'):
            list_of_usernames.append(word.lower().replace('@',''))
    if len(list_of_usernames) > 0:
        list_of_lists.append(list_of_usernames)
        
# keyword extraction from tweets
def get_keywords(x):
    list_of_keywords = []
    words = x.split()
    for word in words:
        if word.startswith('#'):
            list_of_keywords.append(word)
    return list_of_keywords


In [8]:
list_of_lists = []

#tweets['text'].apply( lambda tweet : keywords_list(tweet) )
tweets['text'].apply( lambda tweet : links_list(tweet) )
tweets['text'].apply( lambda tweet : usernames_list(tweet) )

rem_list = [item for sublist in list_of_lists for item in sublist]

tweets['text_c'] = tweets['text'].apply( lambda tweet : ' '.join([word for word in re.split("\W+",tweet) if word.lower() not in rem_list]))

In [9]:
tweets['keywords'] = tweets['text'].apply( lambda tweet : get_keywords(tweet) )

In [10]:
n = 1
print(tweets['text'][n], '\n')
print(tweets['text_c'][n])

Celebrating🌍Africa’s food and farmers | Joan Baxter 🍁🎤📻✍️📰📚🇨🇦 @joan_baxter
https://t.co/Kno3bOkfKK @Comhlamh @farmersjournal @AgriAware @MacranaFeirme @IrelandDSA @teagasc @Irish_Aid @IrishSchSusty @TCBotanicGarden @NBGGlasnevin @agriculture_ie @ecowas_agric @BiaAmachAnseo 

Celebrating Africa s food and farmers Joan Baxter


## PoterStemmer

In [11]:
# Store the stopwords into the object named as "stop_words"
stop_words = stopwords.words('english')

# Store the string.punctuation into an object punct
punct = string.punctuation

# Initialise an object using a method PorterStemmer
stemmer = PorterStemmer()

In [12]:
import re

# Store the column of the dataframe named as "text"
X = tweets['text_c']

cleaned_data=[]

# For loop from first value to length(X), ^a-zA-Z means include small and capital case letters

for i in range(len(X)):
    tweet = re.sub('[^a-zA-Z]', ' ', X.iloc[i])
    tweet = tweet.lower().split()
    tweet = [stemmer.stem(word) for word in tweet if (word not in stop_words) and (word not in punct)]
    tweet = ' '.join(tweet)
    tweets.loc[ i ,'text_ps'] = tweet

In [13]:
print(tweets['text'][0], '\n')
print(tweets['text_ps'][0], '\n')

@MarkBehan4 @McConalogue @GeraskoLarysa @FAO @agriculture_ie @FAODG @fiannafailparty @OgraFiannaFail OK this I agree with, their duty firstly should be to irish farmers, plenty of them struggling. 

ok agre duti firstli irish farmer plenti struggl 



# Sentiment Analyzes

TextBlob is a Python (2 and 3) library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more. [link](https://textblob.readthedocs.io/en/dev/index.html)

In [14]:
for i in tweets.index:
    text = tweets.loc[i,'text_ps']
    tweets.loc[i, 'TextBlob'] = TextBlob(text).sentiment.polarity
    #print(TextBlob(text).sentiment.polarity)
    tweets.loc[i, 'Vader'] = SentimentIntensityAnalyzer().polarity_scores(text)['compound']
    #print(SentimentIntensityAnalyzer().polarity_scores(text)['compound'], '\n')
    
tweets.sample(2)

Unnamed: 0,author_id,text,public_metrics,id,created_at,in_reply_to_user_id,source,conversation_id,lang,referenced_tweets,edit_history_tweet_ids,reply_settings,text_c,keywords,text_ps,TextBlob,Vader
5,8973142,After a turbulent year of input prices and out...,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",1603391104132464642,2022-12-15T14:06:43.000Z,,Twitter Web App,1603391104132464642,en,,[1603391104132464642],everyone,After a turbulent year of input prices and out...,[],turbul year input price output return dairi fa...,0.0,0.5859
7,194513294,Interesting times ahead for the Irish #organic...,"{'retweet_count': 2, 'reply_count': 0, 'like_c...",1603322521692307456,2022-12-15T09:34:12.000Z,,Twitter Web App,1603322521692307456,en,,[1603322521692307456],everyone,Interesting times ahead for the Irish organic ...,"[#organic, #organicfarming]",interest time ahead irish organ sector organic...,0.0,0.4588


In [15]:
def get_polarity(df, col):

    polarity = 0
    neutral = 0
    wpositive = 0
    positive = 0
    spositive = 0
    wnegative = 0
    negative = 0
    snegative = 0
     
    for t in df.index:
        
        v = df.loc[t, col]
        polarity += v  # adding up polarities to find the average later

        if (v == 0):  # adding reaction of how people are reacting to find average later
            neutral += 1
            desc = 'neutral'
        elif (v > 0 and v <= 0.3):
            wpositive += 1
            desc ='weak_positive'
        elif (v > 0.3 and v <= 0.6):
            positive += 1
            desc = 'positive'
        elif (v > 0.6 and v <= 1):
            spositive += 1
            desc = 'strong_positive'
        elif (v > -0.3 and v <= 0):
            wnegative += 1
            desc = 'weak_negative'
        elif (v > -0.6 and v <= -0.3):
            negative += 1
            desc = 'negative'
        elif (v > -1 and v <= -0.6):
            snegative += 1
            desc = 'strong_negative'
         
        df.loc[t, f'{col}_desc'] = desc
        

    return {'polarity_sum':polarity,
            'polarity_mean':(polarity / len(df)),
            'neutral':neutral,
            'strong_positive':spositive,
            'positive':positive,
            'weak_positive':wpositive,
            'weak_negative':wnegative,
            'negative':negative,
            'strong_negative':snegative}




In [16]:
get_polarity(tweets,'TextBlob')

{'polarity_sum': 0.9783333333333334,
 'polarity_mean': 0.09783333333333334,
 'neutral': 5,
 'strong_positive': 0,
 'positive': 1,
 'weak_positive': 4,
 'weak_negative': 0,
 'negative': 0,
 'strong_negative': 0}

In [17]:
get_polarity(tweets,'Vader')

{'polarity_sum': 3.9924999999999997,
 'polarity_mean': 0.39925,
 'neutral': 1,
 'strong_positive': 2,
 'positive': 5,
 'weak_positive': 1,
 'weak_negative': 1,
 'negative': 0,
 'strong_negative': 0}

In [18]:
tweets.sample()

Unnamed: 0,author_id,text,public_metrics,id,created_at,in_reply_to_user_id,source,conversation_id,lang,referenced_tweets,edit_history_tweet_ids,reply_settings,text_c,keywords,text_ps,TextBlob,Vader,TextBlob_desc,Vader_desc
6,1113016486321405952,@agriculture_ie @McConalogue @UKRinIRL @Gerask...,"{'retweet_count': 0, 'reply_count': 1, 'like_c...",1603354556380889089,2022-12-15T11:41:29.000Z,2589328316,Twitter Web App,1603033104960167936,en,"[{'type': 'replied_to', 'id': '160303310496016...",[1603354556380889089],everyone,Great any chance you might do something for ir...,[],great chanc might someth irish farmer see cont...,0.4,0.6249,positive,strong_positive


In [19]:
n = 9
print('Text Original:', tweets.loc[n, 'text'], '\n',
     '-------------------------------------------------------------------------------------------')
print('Text Clear:', tweets.loc[n, 'text_c'], '\n',
     '-------------------------------------------------------------------------------------------')

print('Text Steammed:', tweets.loc[n, 'text_ps'], '\n',
     '-------------------------------------------------------------------------------------------')

print('KeyWords:', tweets.loc[n, 'keywords'], '\n',
     '-------------------------------------------------------------------------------------------')

print('TextBlob: ',tweets.loc[n, 'TextBlob'], tweets.loc[n, 'TextBlob_desc'])
print('Vader: ', tweets.loc[n, 'Vader'], tweets.loc[n, 'Vader_desc'])

Text Original: Diarmuid is passionate about the success of Irish #agriculture. He brings a wealth of knowledge &amp; experience to his interactions with #farmers &amp; other #agricultural stakeholders. https://t.co/mP9CC4LXQB 
 -------------------------------------------------------------------------------------------
Text Clear: Diarmuid is passionate about the success of Irish agriculture He brings a wealth of knowledge amp experience to his interactions with farmers amp other agricultural stakeholders 
 -------------------------------------------------------------------------------------------
Text Steammed: diarmuid passion success irish agricultur bring wealth knowledg amp experi interact farmer amp agricultur stakehold 
 -------------------------------------------------------------------------------------------
KeyWords: ['#agriculture.', '#farmers', '#agricultural'] 
 -------------------------------------------------------------------------------------------
TextBlob:  0.15 weak

In [20]:
tweets.iloc[:, -6:]

Unnamed: 0,keywords,text_ps,TextBlob,Vader,TextBlob_desc,Vader_desc
0,[],ok agre duti firstli irish farmer plenti struggl,0.25,0.296,weak_positive,weak_positive
1,[],celebr africa food farmer joan baxter,0.0,0.0,neutral,neutral
2,[],irish farmer want cull herd put busi ukrainian...,0.0,0.4588,neutral,positive
3,[],screw irish farmer,0.0,-0.1027,neutral,weak_negative
4,[],farmer accept chang need agricultur alway evol...,0.053333,0.4588,weak_positive,positive
5,[],turbul year input price output return dairi fa...,0.0,0.5859,neutral,positive
6,[],great chanc might someth irish farmer see cont...,0.4,0.6249,positive,strong_positive
7,"[#organic, #organicfarming]",interest time ahead irish organ sector organic...,0.0,0.4588,neutral,positive
8,[],irish farmer journal northern correspond one j...,0.125,0.34,weak_positive,positive
9,"[#agriculture., #farmers, #agricultural]",diarmuid passion success irish agricultur brin...,0.15,0.872,weak_positive,strong_positive


## Irony and Sarcasm

Minimizing the impact of irony and sarcasm through hashing tags

In [None]:
word_bag =["#sarcasm",
"#sarcastic",
"#not",
"#notsarcasm",
"#notsarcastic",
"#irony",
"#ironic",
"#joke",
"#humour",
"#funny"]

In [24]:
import json

with open('sarcasm_headlines.json', 'r') as f:
  news = json.load(f)
  
news

#news = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True, orient = "column")


FileNotFoundError: [Errno 2] No such file or directory: 'sarcasm_headlines.json'