In [4]:
import pandas as pd
import spacy
nlp = spacy.load('en_core_web_sm')

In [5]:
from textblob import TextBlob

In [6]:
df = pd.read_csv('amazon_product_reviews.csv', delimiter= ',', usecols=[20], dtype=str)
print(df.head())
# inputting the data file containing the reviews. Printing to correct input

                                        reviews.text
0  I thought it would be as big as small paper bu...
1  This kindle is light and easy to use especiall...
2  Didnt know how much i'd use a kindle so went f...
3  I am 100 happy with my purchase. I caught it o...
4  Solid entry level Kindle. Great for kids. Gift...


In [7]:
reviews_data = df['reviews.text'] # Only to look at the reviews.text column.


In [8]:
clean_data = df.dropna(subset=['reviews.text']) # Removing any blank rows in the reviews.text column.
print(df.head(10)) # checking that no blank rows are there.

                                        reviews.text
0  I thought it would be as big as small paper bu...
1  This kindle is light and easy to use especiall...
2  Didnt know how much i'd use a kindle so went f...
3  I am 100 happy with my purchase. I caught it o...
4  Solid entry level Kindle. Great for kids. Gift...
5  This make an excellent ebook reader. Don't exp...
6  I ordered this for my daughter, as I have the ...
7  I bought my Kindle about 2 months ago and the ...
8  amazon kindle is always the best ebook, upgrad...
9  It's beyond my expectation, and it can even sh...


In [9]:
clean_data.count()
# Checking the number of rows in the data set to know the limits when choosing my data sample.

reviews.text    5000
dtype: int64

In [10]:
def analyse_polarity(text):
    # Preprocess the text with spaCy
    doc = nlp(text.lower())
    filtered_doc = [token.text for token in doc if not token.is_stop]

    # Analyze sentiment with TextBlob
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity

    return polarity
    

In [11]:
# Comparing two different reviews.

my_review_choice1 = clean_data['reviews.text'][50]
print(my_review_choice1)
my_review_choice2 = clean_data['reviews.text'][1500]
print(my_review_choice2)

query_doc = nlp(my_review_choice2)


This Kindle doesn't have a lighted screen like the pricier models but it has a higher dpi screen than the lower model. It's exactly what I needed, nothing more, nothing less. I've had no problems with the reader at all so far so I'll give it 5 stars.
I absolutely love Alexa! This device is loud, easy to install, and does exactly what I tell her to do.


In [12]:
# Compares the similarity between to selected reviews from the data set to 4DP.

for text in my_review_choice1:
    review = nlp(my_review_choice1)
    similarity = query_doc.similarity(review)
    scores = round(similarity, 4)

print(scores)

  similarity = query_doc.similarity(review)


0.5345


In [13]:
# Selecting a sample from the data set
my_review_choice_range = clean_data['reviews.text'][0:1000]

In [14]:
# Starting a counter for the number of positive, negative and neutral reviews.

neg_count = 0
pos_count = 0
neut_count = 0

In [15]:
for text in my_review_choice_range:

    polarity_score = analyse_polarity(text)

# Using the function to create a polarity score for each individual review.
# the count tracks how many positive, negative and neutral reviews are in the selected range for the data set.

    if polarity_score >0:
        sentiment = "Positive"
        pos_count = pos_count + 1
        #print(f"Text: {text}\nPolarity Score: {polarity_score}\nSentiment: {sentiment}")
    elif polarity_score < 0:
        sentiment = "Negative"
        neg_count = neg_count + 1
       # print(f"Text: {text}\nPolarity Score: {polarity_score}\nSentiment: {sentiment}")
    else:
        sentiment = "Neutral"
        neut_count = neut_count + 1
        #print(f"Text: {text}\nPolarity Score: {polarity_score}\nSentiment: {sentiment}")

In [16]:
# Gives an overall view of positive, negative and neutral reviews within the selected range of the data.
print(f"Positive:  {str(pos_count)}")
print(f"Negative: {str(neg_count)}")
print(f"Neutral: {str(neut_count)}")

Positive:  904
Negative: 32
Neutral: 64
