## Read the Dataset

In [2]:
# Import the pandas library
import pandas as pd 

# Read the JSON file into a pandas DataFrame, treating each line as a separate JSON object
df = pd.read_json("C:\\Users\\pbhar\\Downloads\\Arts_Crafts_and_Sewing_5.json", lines=True)

# Display a random sample of 10 rows from the DataFrame
df.sample(10)


Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,vote,image
187692,5,True,"01 12, 2014",A3PHDOTOTV258W,B002WE3VJO,,gmatweety,I got this to add to my colors as I am making ...,a nice buy,1389484800,,
397724,4,True,"11 10, 2017",AMAJGJKNPKD0O,B0197PWJR0,{'Color:': ' Black'},God&#039;s Cowgirl,I haven't used these brushes yet but from the ...,GREAT PAINT BRUSHES,1510272000,,
453128,5,True,"12 18, 2017",A3DBD5M5T66273,B00874ASQM,{'Color:': ' White'},acceberyag,Perfect,Perfect,1513555200,,
72228,5,True,"06 16, 2016",A1BM8X6G9M0A2L,B0013ZAJG6,"{'Size:': ' 12""x12""', 'Color:': ' Apple Red'}",S. Vanburen,I absolutely LOVE this album! I purchased my f...,LOVE this album!,1466035200,,
12209,5,True,"09 3, 2013",A2D2AORHOMU0CV,B0009RRTA8,{'Size:': ' 25 lb'},B. Barber,this clay is perfect for what we ordered it fo...,perfect,1378166400,,
443603,5,True,"02 20, 2015",A2G6X3AZ0ALCRT,B0050L4ZDC,,Liliian Hernandez,Love it!,Five Stars,1424390400,,
163596,5,True,"12 5, 2016",A2W1F51T0F3RZZ,B001VNNKPM,{'Color:': ' Copper'},MelissaF,Beautiful color and glitter detail,Beautiful warm color,1480896000,,
335496,5,False,"10 4, 2014",A3GOPVHU4KN0K4,B00JX10MMS,,Lita Norsworthy,So looking forward to using this great product...,Five Stars,1412380800,,
394260,5,False,"05 2, 2016",A1ING7W0IVGKQS,B017U0OB9I,,MissyBissy,I am obsessed with finding the perfect pencils...,Nice pencils for drawing,1462147200,7.0,[https://images-na.ssl-images-amazon.com/image...
43920,5,True,"06 22, 2016",A176JLEJK6E34L,B000WWGF2S,{'Color:': ' Poppyfield'},VALERIE,Awesome,Five Stars,1466553600,,


## Get the Bing Liu lexicon

In [3]:
import nltk

# Download the opinion lexicon 
nltk.download('opinion_lexicon')

# Import the opinion lexicon and word tokenizer
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize

# Print some information about the opinion lexicon
print('Total number of words in opinion lexicon:', len(opinion_lexicon.words()))
print('Examples of positive words in opinion lexicon:', opinion_lexicon.positive()[:10])
print('Examples of negative words in opinion lexicon:', opinion_lexicon.negative()[:10])


Total number of words in opinion lexicon: 6789
Examples of positive words in opinion lexicon: ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon: ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\pbhar\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


## Creating the dictionary

In [4]:
# Let's create a dictionary which we can use for scoring our review text
nltk.download('punkt')
df.rename(columns={"reviewText": "text"}, inplace=True)
pos_score = 1
neg_score = -1
word_dict = {}
 
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
        word_dict[word] = pos_score
      
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
        word_dict[word] = neg_score


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pbhar\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


## Function to do math

In [6]:
def bing_liu_score(text):
    sentiment_score = 0
    bag_of_words = word_tokenize(text.lower())
    for word in bag_of_words:
        if word in word_dict:
            sentiment_score += word_dict[word]
    return sentiment_score  


## Fill missing values 

In [8]:
df['text'].fillna('no review', inplace=True)
df['Bing_Liu_Score'] = df['text'].apply(bing_liu_score)


In [9]:
df[['overall',"text", 'Bing_Liu_Score']].head(10)


Unnamed: 0,overall,text,Bing_Liu_Score
0,4,Contains some interesting stitches.,1
1,5,I'm a fairly experienced knitter of the one-co...,22
2,4,Great book but the index is terrible. Had to w...,0
3,5,I purchased the Kindle edition which is incred...,4
4,5,Very well laid out and very easy to read.\n\nT...,5
5,5,"Beginning her career as a freelance knitter, M...",15
6,5,This is a terrific stitch handbook (and I have...,9
7,4,The book needs to be coil bound. The content i...,1
8,5,I really am enjoying this book! I like the siz...,12
9,5,Just received this book and looked over it cov...,6


## Output

In [10]:
df.groupby('overall').agg({'Bing_Liu_Score':'mean'})


Unnamed: 0_level_0,Bing_Liu_Score
overall,Unnamed: 1_level_1
1,-0.255049
2,0.566098
3,1.158796
4,2.028146
5,2.130005
