In [28]:
# Library for View Images in Google Colab
from IPython.display import Image
#Library for Data Preprocessing Analysing Visualizing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#Library for Text Data Preprocessing 
import nltk
import re
import string
# Library for Splitting Data into Training and Testing
from sklearn.model_selection import train_test_split
# Library for converting text into vectors
from sklearn.feature_extraction.text import TfidfVectorizer
# Library for Machine Learning Models/ Estimators
# Logisitic Regression
from sklearn.linear_model import LogisticRegression
# Support Vector Machine
from sklearn import svm
# Naive Bayes
from sklearn.naive_bayes import MultinomialNB
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
# Library for Machine Learning Models/ Estimators Evaluation Pattern
from sklearn.metrics import classification_report, confusion_matrix

In [29]:
data = pd.read_csv("/content/drive/MyDrive/Suicidal_Sentiment_Analysis/Tweets_Mix.csv")

In [30]:
data.head(10)

Unnamed: 0,Sentiment,Tweets
0,1,you dont have to be crazy to work here serious...
1,0,Paw pawing my ass off smh...im starting to fee...
2,1,and ppl better not act like threatening suicid...
3,0,@juliaroy you are just a tumbling fool. Love it.
4,0,Just gonna go shopping up Fosse Park with mate...
5,1,â youâ€™re stuck with me now iâ€™m not leav...
6,1,i wake up and just want to sleep forever
7,0,@dotmariusz I'm a late bird Mariusz - you fro...
8,1,our friendly purge there is not a better way t...
9,1,this what you a saying many thieves and looter...


In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Sentiment  10000 non-null  int64 
 1   Tweets     10000 non-null  object
dtypes: int64(1), object(1)
memory usage: 156.4+ KB


In [32]:
data['Sentiment'].value_counts()

1    5000
0    5000
Name: Sentiment, dtype: int64

### Data Preprocessing

In [33]:
# Writing Function to remove the mentions  URL's  and String with @
def removeURL(text):
    tweet_out = re.sub(r'@[A-Za-z0-9]+','',text)
    re.sub('https?://[A-zA-z0-9]+','',text)
    return tweet_out

# Writing function to remove the non-numeric characters
def removeNonAlphanumeric(text):
    text_out = "".join([char for char in text if char not in string.punctuation])
    return text_out

In [35]:
data["Tweet_No_URL"]  = data["Tweets"].apply(lambda x:removeURL(x))
data["Tweet_No_Punc"] = data["Tweet_No_URL"].apply(lambda x:removeNonAlphanumeric(x))

In [36]:
data.head()

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...


#### Tokenization

In [37]:
def tokenization(text):
    token = re.split('\W+',text)
    return token

data ["Tokens"] = data["Tweet_No_Punc"].apply(lambda x:tokenization(x))

In [38]:
data

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc,Tokens
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,"[you, dont, have, to, be, crazy, to, work, her..."
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...,"[Paw, pawing, my, ass, off, smhim, starting, t..."
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,"[and, ppl, better, not, act, like, threatening..."
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it,"[, you, are, just, a, tumbling, fool, Love, it, ]"
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,"[Just, gonna, go, shopping, up, Fosse, Park, w..."
...,...,...,...,...,...
9995,0,@valxx http://twitpic.com/2w1uj - And the artw...,http://twitpic.com/2w1uj - And the artwork's ...,httptwitpiccom2w1uj And the artworks by Bill...,"[, httptwitpiccom2w1uj, And, the, artworks, by..."
9996,1,i just love reading that im worthless \r\n\r\...,i just love reading that im worthless \r\n\r\...,i just love reading that im worthless \r\n\r\...,"[, i, just, love, reading, that, im, worthless..."
9997,0,Disneyland was great! ... Got to go and pick ...,Disneyland was great! ... Got to go and pick ...,Disneyland was great Got to go and pick up t...,"[, Disneyland, was, great, Got, to, go, and, p..."
9998,1,i hope i die in my sleep,i hope i die in my sleep,i hope i die in my sleep,"[i, hope, i, die, in, my, sleep]"


#### Stemming

In [39]:
ps = nltk.PorterStemmer()

def stemming (text):
    out_text = [ps.stem(word) for word in text]
    return out_text

data['Stem'] = data['Tokens'].apply(lambda x:stemming(x))

In [40]:
data.head()

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc,Tokens,Stem
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,"[you, dont, have, to, be, crazy, to, work, her...","[you, dont, have, to, be, crazi, to, work, her..."
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...,"[Paw, pawing, my, ass, off, smhim, starting, t...","[paw, paw, my, ass, off, smhim, start, to, fee..."
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,"[and, ppl, better, not, act, like, threatening...","[and, ppl, better, not, act, like, threaten, s..."
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it,"[, you, are, just, a, tumbling, fool, Love, it, ]","[, you, are, just, a, tumbl, fool, love, it, ]"
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,"[Just, gonna, go, shopping, up, Fosse, Park, w...","[just, gonna, go, shop, up, foss, park, with, ..."


#### Lemmatizing

In [41]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [42]:
wn = nltk.WordNetLemmatizer()

def lemmatize(text):
    out_text = [wn.lemmatize(word) for word in text]
    return out_text

data['Lem'] =data['Tokens'].apply(lambda x:lemmatize(x))

data['Lem'].head()

0    [you, dont, have, to, be, crazy, to, work, her...
1    [Paw, pawing, my, as, off, smhim, starting, to...
2    [and, ppl, better, not, act, like, threatening...
3    [, you, are, just, a, tumbling, fool, Love, it, ]
4    [Just, gonna, go, shopping, up, Fosse, Park, w...
Name: Lem, dtype: object

In [43]:
data.head()

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc,Tokens,Stem,Lem
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,"[you, dont, have, to, be, crazy, to, work, her...","[you, dont, have, to, be, crazi, to, work, her...","[you, dont, have, to, be, crazy, to, work, her..."
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...,"[Paw, pawing, my, ass, off, smhim, starting, t...","[paw, paw, my, ass, off, smhim, start, to, fee...","[Paw, pawing, my, as, off, smhim, starting, to..."
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,"[and, ppl, better, not, act, like, threatening...","[and, ppl, better, not, act, like, threaten, s...","[and, ppl, better, not, act, like, threatening..."
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it,"[, you, are, just, a, tumbling, fool, Love, it, ]","[, you, are, just, a, tumbl, fool, love, it, ]","[, you, are, just, a, tumbling, fool, Love, it, ]"
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,"[Just, gonna, go, shopping, up, Fosse, Park, w...","[just, gonna, go, shop, up, foss, park, with, ...","[Just, gonna, go, shopping, up, Fosse, Park, w..."


#### Stop Words

In [44]:
nltk.download('stopwords')
stopwords = nltk.corpus.stopwords.words('english')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [45]:
def remove_stopWords(token_list):
    text_out = [word for word in token_list if word not in stopwords]
    return text_out

data['StopRemove'] = data['Lem'].apply(lambda x:remove_stopWords(x))

In [46]:
data.head()

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc,Tokens,Stem,Lem,StopRemove
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,"[you, dont, have, to, be, crazy, to, work, her...","[you, dont, have, to, be, crazi, to, work, her...","[you, dont, have, to, be, crazy, to, work, her...","[dont, crazy, work, seriously, illegal, requir..."
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...,"[Paw, pawing, my, ass, off, smhim, starting, t...","[paw, paw, my, ass, off, smhim, start, to, fee...","[Paw, pawing, my, as, off, smhim, starting, to...","[Paw, pawing, smhim, starting, feel, better, ]"
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,"[and, ppl, better, not, act, like, threatening...","[and, ppl, better, not, act, like, threaten, s...","[and, ppl, better, not, act, like, threatening...","[ppl, better, act, like, threatening, suicide,..."
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it,"[, you, are, just, a, tumbling, fool, Love, it, ]","[, you, are, just, a, tumbl, fool, love, it, ]","[, you, are, just, a, tumbling, fool, Love, it, ]","[, tumbling, fool, Love, ]"
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,"[Just, gonna, go, shopping, up, Fosse, Park, w...","[just, gonna, go, shop, up, foss, park, with, ...","[Just, gonna, go, shopping, up, Fosse, Park, w...","[Just, gonna, go, shopping, Fosse, Park, mate,..."


In [47]:
def final_join(token):
    document = " ".join([word for word in token if not word.isdigit()])
    return document

data['FinalJoin'] = data['StopRemove'].apply(lambda x:final_join(x))

In [48]:
data.head()

Unnamed: 0,Sentiment,Tweets,Tweet_No_URL,Tweet_No_Punc,Tokens,Stem,Lem,StopRemove,FinalJoin
0,1,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,you dont have to be crazy to work here serious...,"[you, dont, have, to, be, crazy, to, work, her...","[you, dont, have, to, be, crazi, to, work, her...","[you, dont, have, to, be, crazy, to, work, her...","[dont, crazy, work, seriously, illegal, requir...",dont crazy work seriously illegal require dont...
1,0,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smh...im starting to fee...,Paw pawing my ass off smhim starting to feel b...,"[Paw, pawing, my, ass, off, smhim, starting, t...","[paw, paw, my, ass, off, smhim, start, to, fee...","[Paw, pawing, my, as, off, smhim, starting, to...","[Paw, pawing, smhim, starting, feel, better, ]",Paw pawing smhim starting feel better
2,1,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,and ppl better not act like threatening suicid...,"[and, ppl, better, not, act, like, threatening...","[and, ppl, better, not, act, like, threaten, s...","[and, ppl, better, not, act, like, threatening...","[ppl, better, act, like, threatening, suicide,...",ppl better act like threatening suicide result...
3,0,@juliaroy you are just a tumbling fool. Love it.,you are just a tumbling fool. Love it.,you are just a tumbling fool Love it,"[, you, are, just, a, tumbling, fool, Love, it, ]","[, you, are, just, a, tumbl, fool, love, it, ]","[, you, are, just, a, tumbling, fool, Love, it, ]","[, tumbling, fool, Love, ]",tumbling fool Love
4,0,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,Just gonna go shopping up Fosse Park with mate...,"[Just, gonna, go, shopping, up, Fosse, Park, w...","[just, gonna, go, shop, up, foss, park, with, ...","[Just, gonna, go, shopping, up, Fosse, Park, w...","[Just, gonna, go, shopping, Fosse, Park, mate,...",Just gonna go shopping Fosse Park mate Got ï ½...


# Model Building

#### Splitting of Data Set

In [50]:
X = data['FinalJoin']
y= data['Sentiment']
cv = TfidfVectorizer(min_df=1,stop_words='english')

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=0)

In [53]:
len(X_train), len(X_test), len(y_train), len(y_test)

(8000, 2000, 8000, 2000)

In [54]:
X_train = cv.fit_transform(X_train)
X_test = cv.transform(X_test)

#### Logistic Regression

In [67]:
logreg = LogisticRegression()

logreg = logreg.fit(X_train,y_train)

y_pred = logreg.predict(X_test)

In [68]:
logreg.score(X_train,y_train)

0.97625

In [69]:
logreg.score(X_test,y_test)

0.955

In [70]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.95      0.95       976
           1       0.95      0.96      0.96      1024

    accuracy                           0.95      2000
   macro avg       0.96      0.95      0.95      2000
weighted avg       0.96      0.95      0.95      2000



In [71]:
print(confusion_matrix(y_test,y_pred))

[[927  49]
 [ 41 983]]


#### Random Forest

In [72]:
clf = RandomForestClassifier(n_estimators=100)

In [73]:
clf.fit(X_train,y_train);

In [74]:
y_pred = clf.predict(X_test)

In [75]:
clf.score(X_train,y_train)

0.99975

In [76]:
clf.score(X_test,y_test)

0.9635

In [77]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.98      0.94      0.96       976
           1       0.95      0.98      0.97      1024

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000



In [78]:
print(confusion_matrix(y_test,y_pred))

[[ 919   57]
 [  16 1008]]


#### Support Vector Machine

In [96]:
class_linear = svm.SVC(kernel='linear')
class_linear.fit(X_train,y_train);

In [97]:
y_pred = class_linear.predict(X_test)

In [98]:
class_linear.score(X_train,y_train)

0.991875

In [99]:
class_linear.score(X_test,y_test)

0.9575

In [100]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.97      0.94      0.96       976
           1       0.94      0.98      0.96      1024

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000



In [101]:
print(confusion_matrix(y_test,y_pred))

[[ 915   61]
 [  24 1000]]


#### Navie Bayes

In [86]:
mnb = MultinomialNB()

In [87]:
mnb.fit(X_train,y_train);

In [88]:
y_pred = mnb.predict(X_test);

In [89]:
mnb.score(X_train,y_train)

0.961125

In [90]:
mnb.score(X_test,y_test)

0.8825

In [93]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.98      0.78      0.87       976
           1       0.82      0.98      0.90      1024

    accuracy                           0.88      2000
   macro avg       0.90      0.88      0.88      2000
weighted avg       0.90      0.88      0.88      2000



In [94]:
print(confusion_matrix(y_test,y_pred))

[[ 758  218]
 [  17 1007]]


**Accuracy of All 4**

1. Logistic Regression: 95.50%
2. Random Forest: 96.35%
3. Support Vector Machine: 95.75%
4. Naive Bayes: 88.25%