#### AUTHOR : VAISHNAV KRISHNA P
#### SENTIMENT ANALYSIS
- Sentiment analysis experiment

In [1]:
! pip install vadersentiment

Collecting vadersentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vadersentiment
Successfully installed vadersentiment-3.3.2


#### SAMPLE TEXT

In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
SIA = SentimentIntensityAnalyzer()

In [4]:
# sample text
sample_text = '''They are playing football nicely.
Atiqua is a good girl.
He rides the bike perfectly.
Rahul was sent back home as he was ill.
Priya cooks well.
My mother was reading the book silently.
The children were going to their aunt's house.
The officer arrested the culprits.
The elephant and mouse were good friends.
The sun shines brightly in the morning.'''

SIA.polarity_scores(sample_text) # model working well

{'neg': 0.074, 'neu': 0.643, 'pos': 0.282, 'compound': 0.9382}

#### TEXT FROM A FILE

In [6]:
# Function to get context from the text
def getTextFromFile(file_name):
  text = []
  with(open(file_name) as f):
    text = f.readlines()

  for i in range(len(text)):
    text[i] = text[i].replace('\n','')
  return text

In [13]:
# sample input
input_data = getTextFromFile("sample_input.csv")
label = getTextFromFile('sample_output.csv')

In [19]:
# print some of the text
for i in range(5):
  print(f"Text : {input_data[i]} \nLabel: {label[i]}")

Text : I can't say anything bad about this dress. It was gorgeous. The fabric was thick and excellent quality. The bow was my favorite part. I had to pin it up otherwise it flopped down but as mentioned previously the fabric was sturdy enough the safety pin did the trick! I would rent this again or buy it without hesitation! (My profile won't update but I am 136 lbs and the 2 was perfect!) I got so many compliments. It was exactly what I wanted. Sophisticated but so much fun! The only reason I gave it 4/5 stars is because it was a little worn. 
Label: 4
Text : "Overall I enjoyed the dress, however I did feel like it was quite long... especially in the front. I was anticipating a length about 3-4 inches above the knee, and the dress came to my knees. " 
Label: 4
Text : This is my favorite designer and love almost all of her dresses.  I love the fit and cut of this dress.  The black top w/spaghetti straps with the floral high low bottom was perfect!  I got multiple compliments on how bea

#### PREDICTION + LABELING TARGET

In [21]:
# function for the prediction and labelling
def predict_sentiment(text):
  predict_label = []

  for line in text:
    scores = SIA.polarity_scores(line)

    if scores['compound'] >= 0.95:
      predict_label.append('5')
    elif scores['compound'] >= 0.6:
      predict_label.append('4')
    elif scores['compound'] >= -0.6:
      predict_label.append('3')
    elif scores['compound'] >= -0.95:
      predict_label.append('2')
    else:
      predict_label.append('1')

  return predict_label

In [23]:
# prediction
predict_label = predict_sentiment(input_data)

In [28]:
# Accuracy Scores
from sklearn.metrics import accuracy_score,cohen_kappa_score
print(f"Accuracy score : {accuracy_score(predict_label, label)}")
print(f"Cohen Kappa Score(unweight): {cohen_kappa_score(predict_label, label, weights=None)}")
print(f"Cohen Kappa Score(unweight): {cohen_kappa_score(predict_label, label, weights='quadratic', labels=['5','4','3','2','1'])}")

Accuracy score : 0.55
Cohen Kappa Score(unweight): 0.25
Cohen Kappa Score(unweight): 0.14634146341463417


### PREDICTION USING NAIVE BAYES

In [42]:
# Xtrain and X_test
X_train = input_data[:12]
y_train = label[:12]
X_test = input_data[12:]
y_test = label[12:]

In [43]:
# importing necessory libraries
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

In [44]:
# applying count vectorizer
vectorizer = CountVectorizer()
trainXBow = vectorizer.fit_transform(X_train)
testXBow = vectorizer.transform(X_test)

In [45]:
# model prediction
model = MultinomialNB()
model.fit(trainXBow,y_train)

In [48]:
# prediction
predictions = model.predict(testXBow)

In [49]:
# Accuracy scores
# Accuracy Scores
from sklearn.metrics import accuracy_score,cohen_kappa_score

print(f"Accuracy score : {accuracy_score(predictions,y_test)}")
print(f"Cohen Kappa Score(unweight): {cohen_kappa_score(predictions,y_test, weights=None)}")
print(f"Cohen Kappa Score(unweight): {cohen_kappa_score(predictions,y_test, weights='quadratic', labels=['5','4','3','2','1'])}")

Accuracy score : 0.75
Cohen Kappa Score(unweight): 0.3846153846153846
Cohen Kappa Score(unweight): 0.3846153846153846
