# Twitter Data Sentiment Analysis

## Importing Modules

In [223]:
import pandas as pd
import numpy as np
import re
import string
from textblob import TextBlob
from nltk.corpus import stopwords

## Importing Our Twitter Posts Excell Datasheet

In [224]:
twitter_post=pd.read_csv('./cyberbullying_tweets.csv')
twitter_post.head(10)

Unnamed: 0,tweet_text,cyberbullying_type
0,"In other words #katandandre, your food was cra...",not_cyberbullying
1,Why is #aussietv so white? #MKR #theblock #ImA...,not_cyberbullying
2,@XochitlSuckkks a classy whore? Or more red ve...,not_cyberbullying
3,"@Jason_Gio meh. :P thanks for the heads up, b...",not_cyberbullying
4,@RudhoeEnglish This is an ISIS account pretend...,not_cyberbullying
5,"@Raja5aab @Quickieleaks Yes, the test of god i...",not_cyberbullying
6,Itu sekolah ya bukan tempat bully! Ga jauh kay...,not_cyberbullying
7,Karma. I hope it bites Kat on the butt. She is...,not_cyberbullying
8,@stockputout everything but mostly my priest,not_cyberbullying
9,Rebecca Black Drops Out of School Due to Bully...,not_cyberbullying


In [225]:
twitter_post['cyberbullying_type'].value_counts()

religion               7998
age                    7992
gender                 7973
ethnicity              7961
not_cyberbullying      7945
other_cyberbullying    7823
Name: cyberbullying_type, dtype: int64

## Check The Missing Data - Null Values

In [226]:
np.sum(twitter_post.isnull())

tweet_text            0
cyberbullying_type    0
dtype: int64

## TEXT PREPROCESSING

* CONVERT INTO LOWERCASE

In [227]:
twitter_post['cleen_text'] = twitter_post['tweet_text'].str.lower()
twitter_post.head()

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text
0,"In other words #katandandre, your food was cra...",not_cyberbullying,"in other words #katandandre, your food was cra..."
1,Why is #aussietv so white? #MKR #theblock #ImA...,not_cyberbullying,why is #aussietv so white? #mkr #theblock #ima...
2,@XochitlSuckkks a classy whore? Or more red ve...,not_cyberbullying,@xochitlsuckkks a classy whore? or more red ve...
3,"@Jason_Gio meh. :P thanks for the heads up, b...",not_cyberbullying,"@jason_gio meh. :p thanks for the heads up, b..."
4,@RudhoeEnglish This is an ISIS account pretend...,not_cyberbullying,@rudhoeenglish this is an isis account pretend...


* REMOVE URLS

In [228]:
def remove_URL(text):
    return re.sub(r"((www.[^s]+)|(http\S+))","",text)

twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda text : remove_URL(text))

twitter_post.sample(5)

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text
31867,I feel like the American school system has thi...,age,i feel like the american school system has thi...
3458,Kat and Andre do not deserve to be in the comp...,not_cyberbullying,kat and andre do not deserve to be in the comp...
20747,no one's laughing with you idiot. Just because...,religion,no one's laughing with you idiot. just because...
18119,Which idiot told you about oppression of Musli...,religion,which idiot told you about oppression of musli...
12984,RT @bigballzrevilla: I'm not sexist but every ...,gender,rt @bigballzrevilla: i'm not sexist but every ...


* REMOVE Mentions @

In [229]:
def remove_users(text):
    return re.sub(r"@[A-Za-z0-9_]+","",text)

twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda text : remove_users(text))

twitter_post.sample(5)

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text
37178,we all had that one bully that was cute in hig...,age,we all had that one bully that was cute in hig...
37115,I remember being bullied by this girl in high ...,age,i remember being bullied by this girl in high ...
33479,I was bullied by a girl Jordan all through our...,age,i was bullied by a girl jordan all through our...
24972,@nolanwhyte @mistaphill ...K. Thanks for demo...,other_cyberbullying,...k. thanks for demonstrating a firm under...
13400,"I'm tired of all the blase fat jokes, rape jok...",gender,"i'm tired of all the blase fat jokes, rape jok..."


* Storing Hashtag and Remove it from text

In [230]:
# Storing The Hashtag

twitter_post['hashtag'] = twitter_post['cleen_text'].apply(lambda x: re.findall(r'#(\w+)', x))

#Removing The Hashtag from text

def remove_hashtag(text):
    return re.sub(r"#[A-Za-z0-9_]+","",text)
twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda text : remove_hashtag(text))

twitter_post.head()

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text,hashtag
0,"In other words #katandandre, your food was cra...",not_cyberbullying,"in other words , your food was crapilicious!","[katandandre, mkr]"
1,Why is #aussietv so white? #MKR #theblock #ImA...,not_cyberbullying,why is so white?,"[aussietv, mkr, theblock, imacelebrityau, toda..."
2,@XochitlSuckkks a classy whore? Or more red ve...,not_cyberbullying,a classy whore? or more red velvet cupcakes?,[]
3,"@Jason_Gio meh. :P thanks for the heads up, b...",not_cyberbullying,"meh. :p thanks for the heads up, but not too...",[]
4,@RudhoeEnglish This is an ISIS account pretend...,not_cyberbullying,this is an isis account pretending to be a ku...,[]


* Remove Punctuations

In [231]:
english_puctuations = string.punctuation
english_puctuations

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [232]:
def remove_puctuations(text):
    translator = str.maketrans('','', english_puctuations)
    return text.translate(translator)
twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda text : remove_puctuations(text))
twitter_post.head()

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text,hashtag
0,"In other words #katandandre, your food was cra...",not_cyberbullying,in other words your food was crapilicious,"[katandandre, mkr]"
1,Why is #aussietv so white? #MKR #theblock #ImA...,not_cyberbullying,why is so white,"[aussietv, mkr, theblock, imacelebrityau, toda..."
2,@XochitlSuckkks a classy whore? Or more red ve...,not_cyberbullying,a classy whore or more red velvet cupcakes,[]
3,"@Jason_Gio meh. :P thanks for the heads up, b...",not_cyberbullying,meh p thanks for the heads up but not too co...,[]
4,@RudhoeEnglish This is an ISIS account pretend...,not_cyberbullying,this is an isis account pretending to be a ku...,[]


* Removing StopWords

In [233]:
stopwords=stopwords.words('english')
print(stopwords)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [234]:
twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stopwords)]))
twitter_post.head(10)

Unnamed: 0,tweet_text,cyberbullying_type,cleen_text,hashtag
0,"In other words #katandandre, your food was cra...",not_cyberbullying,words food crapilicious,"[katandandre, mkr]"
1,Why is #aussietv so white? #MKR #theblock #ImA...,not_cyberbullying,white,"[aussietv, mkr, theblock, imacelebrityau, toda..."
2,@XochitlSuckkks a classy whore? Or more red ve...,not_cyberbullying,classy whore red velvet cupcakes,[]
3,"@Jason_Gio meh. :P thanks for the heads up, b...",not_cyberbullying,meh p thanks heads concerned another angry dud...,[]
4,@RudhoeEnglish This is an ISIS account pretend...,not_cyberbullying,isis account pretending kurdish account like i...,[]
5,"@Raja5aab @Quickieleaks Yes, the test of god i...",not_cyberbullying,yes test god good bad indifferent weird whatev...,[]
6,Itu sekolah ya bukan tempat bully! Ga jauh kay...,not_cyberbullying,itu sekolah ya bukan tempat bully ga jauh kaya...,[]
7,Karma. I hope it bites Kat on the butt. She is...,not_cyberbullying,karma hope bites kat butt nasty,[mkr]
8,@stockputout everything but mostly my priest,not_cyberbullying,everything mostly priest,[]
9,Rebecca Black Drops Out of School Due to Bully...,not_cyberbullying,rebecca black drops school due bullying,[]


* Handling Emojis 

In [235]:
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F" # emoticons
                           u"\U0001F300-\U0001F5FF" # symbols & pictographs
                           u"\U0001F680-\U0001F6FF" # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF" # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)
print(remove_emoji("Hilarious 😂!"))

Hilarious !


In [236]:
twitter_post['cleen_text'] = twitter_post['cleen_text'].apply(lambda text : remove_emoji(text))