# LEXICON SCORING ALGORITHM - 2/7/24

## Importing all necessary libraries

In [1]:
# Import all the necessary Libraries
import pandas as pd

## Reading the data

In [2]:
# Reading the data 
df = pd.read_csv("C:/Users/LENOVO/OneDrive/Desktop/TextBasedAnalysis/Arts_Crafts_and_sewing_5.csv")
df.head(10)

Unnamed: 0,overall,verified,reviewerID,asin,reviewText,summary
0,4,True,AIE8N9U317ZBM,449819906,Contains some interesting stitches.,Four Stars
1,5,True,A3ECOW0TWLH9V6,449819906,I'm a fairly experienced knitter of the one-co...,My current favorite go-to guide for inspiration
2,4,True,A278N8QX9TY2OS,449819906,Great book but the index is terrible. Had to w...,"lots of great examples, good instructions, col..."
3,5,True,A123W8HIK76XCN,449819906,I purchased the Kindle edition which is incred...,Another little gem by Melissa Leapman
4,5,True,A2A6MZ2QB4AE0L,449819906,Very well laid out and very easy to read.\n\nT...,Very comprehensive
5,5,True,A2OIU84NPVKIWN,449819906,"Beginning her career as a freelance knitter, M...",Great Addition to Any Stitch Library
6,5,True,AKIV5VMRUZK1K,449819906,This is a terrific stitch handbook (and I have...,Count on Melissa
7,4,True,A2BQ7YGPNCQSO4,449819906,The book needs to be coil bound. The content i...,good book
8,5,True,A2JNO9OR8FGNR4,449819906,I really am enjoying this book! I like the siz...,Excellent!
9,5,True,A1IFF9F3XA9X1I,449819906,Just received this book and looked over it cov...,Great addition to my library


## Exploring Opinion Lexicon in NLTK Library

In [3]:
# Import necessary Libraries
from sklearn import preprocessing
import nltk
nltk.download('opinion_lexicon')
# Importing the opinion_lexicon corpus, which contains positive&negative opinion words
from nltk.corpus import opinion_lexicon
# Importing word_tokenize function, which is used for tokenizing words
from nltk.tokenize import word_tokenize

print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))
print('Examples of positive words in opinion lexicon',
      opinion_lexicon.positive()[:10])
print('Examples of negative words in opinion lexicon',
      opinion_lexicon.negative()[:10])

Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


## Creation of Dictionary for Sentiment Analysis

In [4]:
# Let's create a dictionary which we can use for scoring our review text
nltk.download('punkt')
df.rename(columns={"reviewText": "text"}, inplace=True)
pos_score = 1
neg_score = -1
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Calculating Sentiment Score with Bing Liu Lexicon

In [5]:
#Creating a fuunction text
def bing_liu_score(text):
    #Initializing the sentiment score
    sentiment_score = 0
    #Tokenizing the input text into words and convert them to lowercase
    bag_of_words = word_tokenize(text.lower())
     # creating loop to check each word in the bag of words
    for word in bag_of_words:
        #Checking if the word exists in the sentiment dictionary
        if word in word_dict:
            # If the word exist, adding its sentiment score to the sentiment score
            sentiment_score += word_dict[word]
    return sentiment_score  #Returning the sentiment score for the text

In [6]:
# Fill NaN values in the 'text' column
df['text'].fillna('no review', inplace=True)
#creating new column 'Bing_Liu_Score' to store the scores by applying  bing_liu_score to calculate sentiment scores for each text
df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)

In [7]:
# Displaying the first 10 rows of the DataFrame with few columns
df[['overall',"text", 'Bing_Liu_Score']].head(10)

Unnamed: 0,overall,text,Bing_Liu_Score
0,4,Contains some interesting stitches.,1
1,5,I'm a fairly experienced knitter of the one-co...,22
2,4,Great book but the index is terrible. Had to w...,0
3,5,I purchased the Kindle edition which is incred...,4
4,5,Very well laid out and very easy to read.\n\nT...,5
5,5,"Beginning her career as a freelance knitter, M...",15
6,5,This is a terrific stitch handbook (and I have...,9
7,4,The book needs to be coil bound. The content i...,1
8,5,I really am enjoying this book! I like the siz...,12
9,5,Just received this book and looked over it cov...,6


## Calculating mean sentiment score

In [8]:
# Grouping 'overall' rating and calculating the mean sentiment score using the Bing Liu Lexicon
df.groupby('overall').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
overall,Unnamed: 1_level_1
1,-0.255049
2,0.566098
3,1.158796
4,2.027999
5,2.129986
