In [2]:
import pandas as pd # for data manipulation and analysis
import numpy as np # for working with arrays and carrying out mathematical operations. Pandas is built on Numpy
import csv # to read and write csv files
import re # In-built regular expressions library
import string # Inbuilt string library
import glob # to retrieve files/pathnames matching a specified pattern. 
import random # generating random numbers
import requests # to send HTTP requests
from PIL import Image # for opening, manipulating, and saving many different image file f
import matplotlib.pyplot as plt # for plotting
import string

# Natural Language Processing Toolkit
from nltk.corpus import stopwords, words # get stopwords from NLTK library & get all words in english language
from nltk.tokenize import word_tokenize # to create word tokens
# from nltk.stem import PorterStemmer (I played around with Stemmer and decided to use Lemmatizer instead)
from nltk.stem import WordNetLemmatizer # to reduce words to orginal form
from nltk import pos_tag # For Parts of Speech tagging



In [4]:
df = pd.read_csv('Tweets_Merged.csv')

In [6]:
# Knowing the total number of rows and columns
df.shape

(15716, 5)

In [7]:
# Checking for duplicated values
df.duplicated(subset='Tweet Id').sum()

30

In [8]:
# Dropping the Duplicated Values
New_df=df.drop_duplicates(subset=['Tweet Id'])

In [9]:
# Confirming if the duplicated values has been dropped by checking the shape 
New_df.shape

(15686, 5)

In [10]:
# Checking if we have any NAN values 
New_df.isna().any()

Unnamed: 0    False
Datetime      False
Tweet Id      False
Text          False
Username      False
dtype: bool

In [11]:
# Dropping Unwanted Columns
tweets_df = New_df.drop(columns=['Unnamed: 0'])

In [12]:
tweets_df

Unnamed: 0,Datetime,Tweet Id,Text,Username
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng
...,...,...,...,...
15711,2021-10-01 09:26:58+00:00,1443870040529555488,- @atiku Abubakar \n\n#October1st \n#Atikukawai,Abdl_Rahmane
15712,2021-10-01 07:49:07+00:00,1443845418614358023,H.E Alh @atiku Abubakar former Vice President ...,WasiluSalihu
15713,2021-10-01 07:37:44+00:00,1443842551455223812,Independence Day: Nigerians Still Living in Ex...,LagosEyeNews2
15714,2021-10-01 04:07:04+00:00,1443789538619494401,"By zoning the presidency to the south, PDP has...",AkeliciousMedia


In [13]:
# checking the dataframe first ten headers 
tweets_df.head(10)

Unnamed: 0,Datetime,Tweet Id,Text,Username
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng
5,2022-01-30 21:56:44+00:00,1487907659059810308,All these are entrepreneurs in Nigeria except ...,ibraheemgarrba
6,2022-01-30 21:42:24+00:00,1487904049773039618,Many Nigerians were left bewildered last week ...,atndaily
7,2022-01-30 21:41:33+00:00,1487903836899397634,President Muhammadu Buhari has joined the ente...,StrawberryNG
8,2022-01-30 21:27:00+00:00,1487900176467566592,"country because of the tenure of the leader, P...",letisbsbsbs
9,2022-01-30 21:19:53+00:00,1487898386795839491,@erigganewmoney President Muhammadu Buhari,Bob_A_Salam


In [18]:
# Function to remove punctuations, links, emojis, and stop words
def preprocessTweets(tweet):
    tweet = tweet.lower()  #has to be in place
    # Remove urls
    tweet = re.sub(r"http\S+|www\S+|https\S+", '', tweet, flags=re.MULTILINE)
    # Remove user @ references and '#' from tweet
    tweet = re.sub(r'\@\w+|\#|\d+', '', tweet)
    # Remove stopwords
    tweet_tokens = word_tokenize(tweet)  # convert string to tokens
    filtered_words = [w for w in tweet_tokens if w not in stop_words]
    filtered_words = [w for w in filtered_words if w in word_list]

    # Remove punctuations
    unpunctuated_words = [char for char in filtered_words if char not in string.punctuation]
    unpunctuated_words = ' '.join(unpunctuated_words)

    return "".join(unpunctuated_words)  # join words with a space in between them

In [61]:
# Defining my NLTK stop words and my user-defined stop words
stop_words = list(stopwords.words('english'))
user_stop_words = ['2021', 'year', 'many', 'much', 'amp', 'next', 'cant', 'wont', 'hadnt',
                    'havent', 'hasnt', 'isnt', 'shouldnt', 'couldnt', 'wasnt', 'werent',
                    'mustnt', '’', '...', '..', '.', '.....', '....', 'been…', 'one', 'two',
                    'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'aht',
                    've', 'next']
alphabets = list(string.ascii_lowercase)
stop_words = stop_words + user_stop_words + alphabets
word_list = words.words()  # all words in English language

In [62]:
tweets_df['Processed_Tweets'] = tweets_df['Text'].apply(preprocessTweets)

In [63]:
tweets_df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Processed_Tweets,Tweets_Adjectives,Tweets_Sentiments,Subjectivity,Polarity,Sentiment
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4,dear president please power professor need som...,,dear president please power professor need som...,0.00000,0.000000,Neutral
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu,leadership,,leadership,0.00000,0.000000,Neutral
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661,done look blessing honestly,,done look blessing honestly,0.90000,0.600000,Positive
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi,go try soapbox excited mien disposition corrup...,,go try soapbox excited mien disposition corrup...,0.46875,0.131250,Positive
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng,president entertainment industry renowned sing...,,president entertainment industry renowned sing...,0.00000,0.000000,Neutral
...,...,...,...,...,...,...,...,...,...,...
15711,2021-10-01 09:26:58+00:00,1443870040529555488,- @atiku Abubakar \n\n#October1st \n#Atikukawai,Abdl_Rahmane,,,,0.00000,0.000000,Neutral
15712,2021-10-01 07:49:07+00:00,1443845418614358023,H.E Alh @atiku Abubakar former Vice President ...,WasiluSalihu,former vice president urge good people unity c...,,former vice president urge good people unity c...,0.58750,0.458333,Positive
15713,2021-10-01 07:37:44+00:00,1443842551455223812,Independence Day: Nigerians Still Living in Ex...,LagosEyeNews2,independence day still living extreme poverty ...,,independence day still living extreme poverty ...,1.00000,-0.125000,Negative
15714,2021-10-01 04:07:04+00:00,1443789538619494401,"By zoning the presidency to the south, PDP has...",AkeliciousMedia,zoning presidency south ambition joe,,zoning presidency south ambition joe,0.00000,0.000000,Neutral


In [64]:
# function to return words to their base form using Lemmatizer
def preprocessTweetsSentiments(tweet):
    tweet_tokens = word_tokenize(tweet)
    lemmatizer = WordNetLemmatizer() # instatiate an object WordNetLemmatizer Class
    lemma_words = [lemmatizer.lemmatize(w) for w in tweet_tokens]
    return " ".join(lemma_words)

In [65]:
# Apply preprocessTweetsSentiments function to the 'Processed Tweets' column to generate a new column
# called 'Processed_Tweets'
tweets_df['Tweets_Sentiments'] = tweets_df['Processed_Tweets'].apply(preprocessTweetsSentiments)

In [66]:
tweets_df.isnull().sum()

Datetime             0
Tweet Id             0
Text                 0
Username             0
Processed_Tweets     0
Tweets_Adjectives    0
Tweets_Sentiments    0
Subjectivity         0
Polarity             0
Sentiment            0
dtype: int64

In [67]:
tweets_df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Processed_Tweets,Tweets_Adjectives,Tweets_Sentiments,Subjectivity,Polarity,Sentiment
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4,dear president please power professor need som...,,dear president please power professor need som...,0.00000,0.000000,Neutral
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu,leadership,,leadership,0.00000,0.000000,Neutral
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661,done look blessing honestly,,done look blessing honestly,0.90000,0.600000,Positive
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi,go try soapbox excited mien disposition corrup...,,go try soapbox excited mien disposition corrup...,0.46875,0.131250,Positive
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng,president entertainment industry renowned sing...,,president entertainment industry renowned sing...,0.00000,0.000000,Neutral
...,...,...,...,...,...,...,...,...,...,...
15711,2021-10-01 09:26:58+00:00,1443870040529555488,- @atiku Abubakar \n\n#October1st \n#Atikukawai,Abdl_Rahmane,,,,0.00000,0.000000,Neutral
15712,2021-10-01 07:49:07+00:00,1443845418614358023,H.E Alh @atiku Abubakar former Vice President ...,WasiluSalihu,former vice president urge good people unity c...,,former vice president urge good people unity c...,0.58750,0.458333,Positive
15713,2021-10-01 07:37:44+00:00,1443842551455223812,Independence Day: Nigerians Still Living in Ex...,LagosEyeNews2,independence day still living extreme poverty ...,,independence day still living extreme poverty ...,1.00000,-0.125000,Negative
15714,2021-10-01 04:07:04+00:00,1443789538619494401,"By zoning the presidency to the south, PDP has...",AkeliciousMedia,zoning presidency south ambition joe,,zoning presidency south ambition joe,0.00000,0.000000,Neutral


In [68]:
from textblob import TextBlob # TextBlob - Python library for processing textual data

In [78]:
# Create function to obtain Subjectivity Score
def getSubjectivity(tweet):
    return TextBlob(tweet).sentiment.subjectivity

# Create function to obtain Polarity Score
def getPolarity(tweet):
    return TextBlob(tweet).sentiment.polarity

# Create function to obtain Sentiment category
def getSentimentTextBlob(polarity):
    if polarity < 0:
        return "Negative"
    elif polarity == 0:
        return "Neutral"
    else:
        return "Positive"

# Create function to obtain Sentiment values
def SentimentValues(Sentiment):
    if Sentiment == "Negative":
        return -1
    elif Sentiment == "Positive":
        return 1
    elif Sentiment == "Neutral":
        return 0

In [79]:
tweets_df['Subjectivity']=tweets_df['Tweets_Sentiments'].apply(getSubjectivity)
tweets_df['Polarity']=tweets_df['Tweets_Sentiments'].apply(getPolarity)
tweets_df['Sentiment']=tweets_df['Polarity'].apply(getSentimentTextBlob)
tweets_df['Sentiment Values'] = tweets_df['Sentiment'].apply(SentimentValues)

In [83]:
tweets_df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Processed_Tweets,Tweets_Adjectives,Tweets_Sentiments,Subjectivity,Polarity,Sentiment,Sentiment Values
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4,dear president please power professor need som...,,dear president please power professor need som...,0.00000,0.000000,Neutral,0
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu,leadership,,leadership,0.00000,0.000000,Neutral,0
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661,done look blessing honestly,,done look blessing honestly,0.90000,0.600000,Positive,1
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi,go try soapbox excited mien disposition corrup...,,go try soapbox excited mien disposition corrup...,0.46875,0.131250,Positive,1
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng,president entertainment industry renowned sing...,,president entertainment industry renowned sing...,0.00000,0.000000,Neutral,0
...,...,...,...,...,...,...,...,...,...,...,...
15711,2021-10-01 09:26:58+00:00,1443870040529555488,- @atiku Abubakar \n\n#October1st \n#Atikukawai,Abdl_Rahmane,,,,0.00000,0.000000,Neutral,0
15712,2021-10-01 07:49:07+00:00,1443845418614358023,H.E Alh @atiku Abubakar former Vice President ...,WasiluSalihu,former vice president urge good people unity c...,,former vice president urge good people unity c...,0.58750,0.458333,Positive,1
15713,2021-10-01 07:37:44+00:00,1443842551455223812,Independence Day: Nigerians Still Living in Ex...,LagosEyeNews2,independence day still living extreme poverty ...,,independence day still living extreme poverty ...,1.00000,-0.125000,Negative,-1
15714,2021-10-01 04:07:04+00:00,1443789538619494401,"By zoning the presidency to the south, PDP has...",AkeliciousMedia,zoning presidency south ambition joe,,zoning presidency south ambition joe,0.00000,0.000000,Neutral,0


In [84]:
df = tweets_df.drop(columns=['Tweets_Adjectives'])

In [85]:
df.to_csv('Trained Twitter Sentimental Analysis Data.csv')

In [86]:
df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Processed_Tweets,Tweets_Sentiments,Subjectivity,Polarity,Sentiment,Sentiment Values
0,2022-01-30 23:38:14+00:00,1487933203809714177,"Dear Mr president Muhammadu Buhari, please han...",ismailmsani4,dear president please power professor need som...,dear president please power professor need som...,0.00000,0.000000,Neutral,0
1,2022-01-30 22:34:47+00:00,1487917234106339330,@yush_belle Muhammadu Buhari: Challenges of Le...,alliiyyu,leadership,leadership,0.00000,0.000000,Neutral,0
2,2022-01-30 22:23:50+00:00,1487914477521674244,@fash360degree @MuhammaduBuhari The so called ...,4real009494661,done look blessing honestly,done look blessing honestly,0.90000,0.600000,Positive,1
3,2022-01-30 22:14:08+00:00,1487912038026420225,@fash360degree @MuhammaduBuhari He should go a...,lukemonowolabi,go try soapbox excited mien disposition corrup...,go try soapbox excited mien disposition corrup...,0.46875,0.131250,Positive,1
4,2022-01-30 22:02:11+00:00,1487909028588142596,President Muhammadu Buhari has joined the ente...,pmparrotng,president entertainment industry renowned sing...,president entertainment industry renowned sing...,0.00000,0.000000,Neutral,0
...,...,...,...,...,...,...,...,...,...,...
15711,2021-10-01 09:26:58+00:00,1443870040529555488,- @atiku Abubakar \n\n#October1st \n#Atikukawai,Abdl_Rahmane,,,0.00000,0.000000,Neutral,0
15712,2021-10-01 07:49:07+00:00,1443845418614358023,H.E Alh @atiku Abubakar former Vice President ...,WasiluSalihu,former vice president urge good people unity c...,former vice president urge good people unity c...,0.58750,0.458333,Positive,1
15713,2021-10-01 07:37:44+00:00,1443842551455223812,Independence Day: Nigerians Still Living in Ex...,LagosEyeNews2,independence day still living extreme poverty ...,independence day still living extreme poverty ...,1.00000,-0.125000,Negative,-1
15714,2021-10-01 04:07:04+00:00,1443789538619494401,"By zoning the presidency to the south, PDP has...",AkeliciousMedia,zoning presidency south ambition joe,zoning presidency south ambition joe,0.00000,0.000000,Neutral,0
