## Ukraine Analysis

In [1]:
from textblob import TextBlob
import sys
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import nltk
import re
import string
import seaborn as sns
from PIL import Image
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import SnowballStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
## Load DataFrame 
tweets = pd.read_csv(r'.\..\Data\Finalized_DataFrame_All_Data.csv')

In [3]:
## Load Tweet
tweets[['Tweet']].head()

Unnamed: 0,Tweet
0,Can’t seem to find any serious buyers tonight ...
1,Gabsus solusi masalah striker Arsenal kah?? ht...
2,@el3estuvodemas @m_delprado_7 @agus____1974 @j...
3,Payah nih yang ngancem bom. Jadi ga maen dah d...
4,If this happens the arsenal are acc back. http...


In [4]:
#Removing RT, Punctuation etc
remove_rt = lambda x: re.sub('RT @\w+: '," ",x)
rt = lambda x: re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ",x)
tweets["Tweet"] = tweets.Tweet.map(remove_rt).map(rt)
tweets["Tweet"] = tweets.Tweet.str.lower()
tweets.head(10)

Unnamed: 0.1,Unnamed: 0,Query,Date,User,Tweet,TweetLikes,TweetReplies,RetweetCount,Result,Team
0,0,Arsenal until:2022-04-23,2022-04-22 23:59:56+00:00,RoweThings,can t seem to find any serious buyers tonight ...,0,3,0,1,Arsenal
1,1,Arsenal until:2022-04-23,2022-04-22 23:59:54+00:00,Ancurancuran01,gabsus solusi masalah striker arsenal kah,0,0,0,1,Arsenal
2,2,Arsenal until:2022-04-23,2022-04-22 23:59:54+00:00,celsky07,delprado 7 1974 agregar que arsen...,1,1,0,1,Arsenal
3,3,Arsenal until:2022-04-23,2022-04-22 23:59:53+00:00,fachry4,payah nih yang ngancem bom jadi ga maen dah d...,0,0,0,1,Arsenal
4,4,Arsenal until:2022-04-23,2022-04-22 23:59:51+00:00,GunnersDXB,if this happens the arsenal are acc back,1,0,0,1,Arsenal
5,5,Arsenal until:2022-04-23,2022-04-22 23:59:50+00:00,Mikeeafcldn12,manchester united have just four wins in their...,13,2,0,1,Arsenal
6,6,Arsenal until:2022-04-23,2022-04-22 23:59:37+00:00,Gibeto8a,se lo trajeron de la banca del arsenal obv...,0,0,0,1,Arsenal
7,7,Arsenal until:2022-04-23,2022-04-22 23:59:35+00:00,portalcamaquars,brigada militar e corpo de bombeiros militar d...,0,0,0,1,Arsenal
8,8,Arsenal until:2022-04-23,2022-04-22 23:59:33+00:00,JxOxOxArsenal,arsenal 0,0,0,0,1,Arsenal
9,9,Arsenal until:2022-04-23,2022-04-22 23:59:27+00:00,arsenaltweeta,arsenal fans arsenal followtrain ...,0,0,1,1,Arsenal


In [5]:
## Cleaned Tweets
tweets[["Tweet"]].head(5)

Unnamed: 0,Tweet
0,can t seem to find any serious buyers tonight ...
1,gabsus solusi masalah striker arsenal kah
2,delprado 7 1974 agregar que arsen...
3,payah nih yang ngancem bom jadi ga maen dah d...
4,if this happens the arsenal are acc back


In [6]:
nltk.download('vader_lexicon') 

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\benja\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [7]:
#Calculating Negative, Positive, Neutral and Compound values
tweets[['polarity', 'subjectivity']] = tweets['Tweet'].apply(lambda Text: pd.Series(TextBlob(Text).sentiment))
for index, row in tweets['Tweet'].iteritems():
    score = SentimentIntensityAnalyzer().polarity_scores(row)
    neg = score['neg']
    neu = score['neu']
    pos = score['pos']
    comp = score['compound']
    if neg > pos:
        tweets.loc[index, 'sentiment'] = "negative"
    elif pos > neg:
        tweets.loc[index, 'sentiment'] = "positive"
    else:
        tweets.loc[index, 'sentiment'] = "neutral"
    tweets.loc[index, 'neg'] = neg
    tweets.loc[index, 'neu'] = neu
    tweets.loc[index, 'pos'] = pos
    tweets.loc[index, 'compound'] = comp
tweets.head(10)

Unnamed: 0.1,Unnamed: 0,Query,Date,User,Tweet,TweetLikes,TweetReplies,RetweetCount,Result,Team,polarity,subjectivity,sentiment,neg,neu,pos,compound
0,0,Arsenal until:2022-04-23,2022-04-22 23:59:56+00:00,RoweThings,can t seem to find any serious buyers tonight ...,0,3,0,1,Arsenal,0.015606,0.434242,neutral,0.195,0.61,0.195,-0.0
1,1,Arsenal until:2022-04-23,2022-04-22 23:59:54+00:00,Ancurancuran01,gabsus solusi masalah striker arsenal kah,0,0,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
2,2,Arsenal until:2022-04-23,2022-04-22 23:59:54+00:00,celsky07,delprado 7 1974 agregar que arsen...,1,1,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
3,3,Arsenal until:2022-04-23,2022-04-22 23:59:53+00:00,fachry4,payah nih yang ngancem bom jadi ga maen dah d...,0,0,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
4,4,Arsenal until:2022-04-23,2022-04-22 23:59:51+00:00,GunnersDXB,if this happens the arsenal are acc back,1,0,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
5,5,Arsenal until:2022-04-23,2022-04-22 23:59:50+00:00,Mikeeafcldn12,manchester united have just four wins in their...,13,2,0,1,Arsenal,0.256667,0.293333,positive,0.095,0.633,0.272,0.7722
6,6,Arsenal until:2022-04-23,2022-04-22 23:59:37+00:00,Gibeto8a,se lo trajeron de la banca del arsenal obv...,0,0,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
7,7,Arsenal until:2022-04-23,2022-04-22 23:59:35+00:00,portalcamaquars,brigada militar e corpo de bombeiros militar d...,0,0,0,1,Arsenal,0.0,0.0,negative,0.062,0.937,0.0,-0.296
8,8,Arsenal until:2022-04-23,2022-04-22 23:59:33+00:00,JxOxOxArsenal,arsenal 0,0,0,0,1,Arsenal,0.0,0.0,neutral,0.0,1.0,0.0,0.0
9,9,Arsenal until:2022-04-23,2022-04-22 23:59:27+00:00,arsenaltweeta,arsenal fans arsenal followtrain ...,0,0,1,1,Arsenal,1.0,0.3,positive,0.0,0.682,0.318,0.7906


In [8]:
tweets.to_csv("tweets_clean.csv") #saving the CSV file with cleaned tweets
#tweets=pd.read_csv("tweets_clean.csv")