## Read the Dataset

In [5]:
import pandas as pd

# Assuming "Amazonnn.csv" is a CSV file
df = pd.read_csv("C:\\Users\\pbhar\\Downloads\\new\\Amazonnn.csv")

# Display a random sample of 10 rows from the DataFrame
df.sample(10)


Unnamed: 0,Text,label
16297,Could care less about the game just like readi...,1
19241,"I was looking for a free currency converter, w...",1
12941,this is very easy to follow and understand. ma...,1
19420,"This is a simple bubble popper game, but with ...",1
7870,It was nice to revisit my palm pilot days. And...,1
8108,"Until now,all the good songs in the Kindle Fir...",1
11082,very cool to see what peaple are doing.in sted...,1
8048,I use You Tube quite often so to have access t...,1
18851,worst app ever. not for kindle fire. if you ha...,0
7769,I use this app all the time. It is outstandin...,1


## Get the Bing Liu lexicon

In [8]:
import nltk

# Download the opinion lexicon 
nltk.download('opinion_lexicon')

# Import the opinion lexicon and word tokenizer
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize

# Print some information about the opinion lexicon
print('Total number of words in opinion lexicon:', len(opinion_lexicon.words()))
print('Examples of positive words in opinion lexicon:', opinion_lexicon.positive()[:10])
print('Examples of negative words in opinion lexicon:', opinion_lexicon.negative()[:10])


Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\pbhar\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


## Create the “Dictionary"

In [25]:
# Let's create a dictionary which we can use for scoring our review text
nltk.download('punkt')
df.rename(columns={"reviewText": "text"}, inplace=True)
pos_score = 1
neg_score = -1
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pbhar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Function to do the math

In [26]:
# Function to calculate sentiment score using Bing Liu lexicon
def bing_liu_score(text):
# Initialize sentiment score to 0
    sentiment_score = 0
# Tokenize text into words and convert to lowercase
    bag_of_words = word_tokenize(text.lower())
# Iterate through each word in the bag of words
    for word in bag_of_words:
        if word in word_dict:
            sentiment_score += word_dict[word]
# Return total sentiment score
    return sentiment_score  


## Fill missing values in 'Text'


In [29]:
# Fill missing values in the 'text' column with 'no review'
df['Text'].fillna('no review', inplace=True)

# Apply the bing_liu_score function to calculate sentiment scores for each text in the 'text' column
df['Bing_Liu_Score'] = df['Text'].apply(bing_liu_score)


In [30]:
# Display the first 10 rows of the DataFrame with columns 'label', 'Text', and 'Bing_Liu_Score'
df[['label', "Text", 'Bing_Liu_Score']].head(10)

Unnamed: 0,label,Text,Bing_Liu_Score
0,1,This is the best apps acording to a bunch of ...,-1
1,1,This is a pretty good version of the game for ...,4
2,1,this is a really . there are a bunch of levels...,3
3,1,"This is a silly game and can be frustrating, b...",1
4,1,This is a terrific game on any pad. Hrs of fun...,4
5,1,This is a very entertaining game! You don't h...,4
6,1,this is awesome and you don't need wi ti to pl...,2
7,1,this is awesome I bet no one even reads the re...,2
8,1,This is basicly the free version but with ads....,2
9,1,this is by far the best free app that is avail...,3


## Output

In [31]:
# Group the DataFrame by 'label' and calculate the mean of 'Bing_Liu_Score' for each group
df.groupby('label').agg({'Bing_Liu_Score': 'mean'})


Unnamed: 0_level_0,Bing_Liu_Score
label,Unnamed: 1_level_1
0,-0.065044
1,2.067892
