In [8]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import re
import nltk
import nltk

## For cleaning the text.
nltk.download('stopwords')
stemmer = nltk.SnowballStemmer("english")
from nltk.corpus import stopwords
import string
stopword = set(stopwords.words('english'))

## To calculate the sentimental scores and assign labels (Positive, Negative or Neutral)
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\moade\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\moade\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
datatwit = pd.read_excel("twitt.xlsx")

In [3]:
datatwit

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
...,...,...,...,...,...,...,...
24778,25291,3,0,2,1,1,you's a muthaf***in lie &#8220;@LifeAsKing: @2...
24779,25292,3,0,1,2,2,"you've gone and broke the wrong heart baby, an..."
24780,25294,3,0,3,0,1,young buck wanna eat!!.. dat nigguh like I ain...
24781,25295,6,0,6,0,1,youu got wild bitches tellin you lies


# Data Cleaning

In [6]:
def clean(datatwit_text):
    datatwit_text = str(datatwit_text).lower()
    datatwit_text = re.sub('\[.*?\]', '', datatwit_text)
    datatwit_text = re.sub('https?://\S+|www\.\S+', '', datatwit_text)
    datatwit_text = re.sub('<.*?>+', '', datatwit_text)
    datatwit_text = re.sub('[%s]' % re.escape(string.punctuation), '', datatwit_text)
    datatwit_text = re.sub('\n', '', datatwit_text)
    datatwit_text = re.sub('\w*\d\w*', '', datatwit_text)
    datatwit_text = [word for word in datatwit_text.split(' ') if word not in stopword]
    datatwit_text =" ".join(datatwit_text)
    datatwit_text = [stemmer.stem(word) for word in datatwit_text.split(' ')]
    datatwit_text =" ".join(datatwit_text)
    return datatwit_text


datatwit["tweet"] = datatwit["tweet"].apply(clean)


In [7]:
datatwit

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,rt mayasolov woman shouldnt complain clean ho...
1,1,3,0,3,0,1,rt boy dat coldtyga dwn bad cuffin dat hoe ...
2,2,3,0,3,0,1,rt urkindofbrand dawg rt ever fuck bitch sta...
3,3,3,0,2,1,1,rt cganderson vivabas look like tranni
4,4,6,0,6,0,1,rt shenikarobert shit hear might true might f...
...,...,...,...,...,...,...,...
24778,25291,3,0,2,1,1,yous muthafin lie coreyemanuel right tl tras...
24779,25292,3,0,1,2,2,youv gone broke wrong heart babi drove redneck...
24780,25294,3,0,3,0,1,young buck wanna eat dat nigguh like aint fuck...
24781,25295,6,0,6,0,1,youu got wild bitch tellin lie


# Calculating Sentimental scores

In [9]:
datatwit_sentiment = SentimentIntensityAnalyzer()
datatwit["Positive"] = [datatwit_sentiment.polarity_scores(i)["pos"] for i in datatwit["tweet"]]
datatwit["Negative"] = [datatwit_sentiment.polarity_scores(i)["neg"] for i in datatwit["tweet"]]
datatwit["Neutral"] = [datatwit_sentiment.polarity_scores(i)["neu"] for i in datatwit["tweet"]]

In [10]:

datatwit = datatwit[["tweet", "Positive","Negative", "Neutral"]]
datatwit.head()

Unnamed: 0,tweet,Positive,Negative,Neutral
0,rt mayasolov woman shouldnt complain clean ho...,0.147,0.157,0.696
1,rt boy dat coldtyga dwn bad cuffin dat hoe ...,0.0,0.28,0.72
2,rt urkindofbrand dawg rt ever fuck bitch sta...,0.0,0.577,0.423
3,rt cganderson vivabas look like tranni,0.333,0.0,0.667
4,rt shenikarobert shit hear might true might f...,0.154,0.407,0.44


# Assigning labels to the most frequent tweets according to the sentiment scores:

In [11]:
x = sum(datatwit["Positive"])
y = sum(datatwit["Negative"])
z = sum(datatwit["Neutral"])

def datatwit_sentiment_score(a, b, c):
    if (a>b) and (a>c):
        print("Positive 😊 ")
    elif (b>a) and (b>c):
        print("Negative 😠 ")
    else:
        print("Neutral 🙂 ")
datatwit_sentiment_score(x, y, z)

Neutral 🙂 


* Most of the tweets are neutral, which means they are neither positive nor negative.

# Total Sentimental scores

In [12]:
print("Positive: ", x)
print("Negative: ", y)
print("Neutral: ", z)

Positive:  2879.6310000000085
Negative:  7201.020999999922
Neutral:  14696.342999999732


# Summary

* The total of neutral tweets is way higher than the total of negative and positive tweets, but out of all the tweets, the negative tweets are more than the positive tweets, so we can say that most of the opinions are negative.