# API Handler

## Importing CSV to SQL

In [2]:
import sqlalchemy as alch
import pandas as pd
import os
import requests

password = "password"
dbName = "amazon_reviews"
connectionData = f"mysql+pymysql://root:{password}@localhost/{dbName}"
engine = alch.create_engine(connectionData)
df = pd.read_csv('data/amazon_reviews.csv', encoding='latin')
#df.to_sql("reviews", con=engine)

## GET (from Amazon Review Sentiment Analysis API)

In [None]:
# Get using an API decorator (no params)
response = requests.get(f"http://127.0.0.1:5000/db").json()

In [None]:
response[0]

In [None]:
# Get using an API decorator (with params)
params = {"reviewerName":"Ryan S."}
reviewerName = "Ryan%20S." #also try Sessue
response = requests.get(f"http://127.0.0.1:5000/db/{reviewerName}", params=params).json()

In [None]:
response[0]

In [None]:
# Get using an API decorator (no params)
response = requests.get(f"http://127.0.0.1:5000/db/sentiment").json()

In [None]:
response[0]

## POST (to Amazon Review Sentiment Analysis API)

In [None]:
url = f"http://127.0.0.1:5000/db/newReview"
data = {"reviewerName":"Joe R.", "helpful":"[1:1]", 
        "reviewText":"It was pretty good, but I expected more. I do not like the product.", 
        "overall":"2.0", "summary":"Less than expected."}
requests.post(url, params=params, data=data)

In [None]:
# If I wanted to add a new set of Amazon reviews...
#for index, row in df:
#    requests.post(url, params=params, data=row)

## Sentiment Analysis API – GET Function (already moved to .py file)

In [54]:
# libraries for NLP
#from nltk.corpus import stopwords
import spacy
from textblob import TextBlob
from statistics import mean

In [72]:
# Use NLP to find the sentiment score of review text

# SQL -> DF
df = pd.read_sql_query(
"""
SELECT reviewerName, helpful, reviewText, overall, summary FROM reviews ORDER BY helpful DESC LIMIT 100;
""", engine)

# init vars to use in function
word_list = []
sentiment_ready = []
lemmatized = []
nlp = spacy.load("en_core_web_sm")
stop = nlp.Defaults.stop_words # all stop words, for removing stop words from reviews

# find the sentiment of each review
for review in df['reviewText']: # for every review
    review_to_split = review
    for word in review_to_split.split(" "): # for every word in selected review
        if word not in stop:
            word_list.append(word)  # add split words from each review into a list of words (word_list)

    string_without_stop = " ".join(word_list) # turn back into a string
    tokens = nlp(string_without_stop) # tokenize each review

    for token in tokens:
        lemmatized.append(token.lemma_) # lemmatize each review
    sentiment_ready.append(TextBlob(" ".join(lemmatized)).sentiment.polarity) #store the sentiment of each review in a list

# compare to overall (overall-sentiment*5) & add to df
sentiment_comparison = []
for index, rows in df.iterrows():
    # print(rows['overall'], sentiment_ready[index]) # a test print, to visualize the difference before outputing a sum
    sentiment_comparison.append(rows['overall']-sentiment_ready[index]*5) # original review score (out of 5) - new score from NLP analysis of review

# store comparison and new value back into df for later visualization
dict_to_merge = {'sentiment polarity':sentiment_ready, 
                 'sentiment comparison':sentiment_comparison} # Add all lists to a new (temp) dataframe.

df2 = pd.DataFrame(dict_to_merge)
df_new = pd.concat([df, df2], axis=1)

# export df to new csv
df_new.to_csv('output/amazon_review_sentiment.csv', index="False")

print('average difference between user\'s review score (out of 5) vs review sentiment score: ',
      mean(sentiment_comparison))
#return sentiment_comparison.mean()

average difference between user's review score (out of 5) vs review sentiment score:  3.4785190488766182
