In [None]:
# Notebook from https://github.com/kendall-bug/DoorDash_Reviews/blob/main/DoorDash_Reviews.ipynb

import google_play_scraper
from google_play_scraper import app, Sort, reviews #reviews_all can also be used instead of reviews, but beware - has a limit of 19K

# domo
import domojupyter as domo 

#pandas
import pandas as pd
from pandas import json_normalize

#!pip install requests
import requests
import time
import json

#!pip install authlib
import authlib
from authlib.jose import jwt

import os
os.getcwd()

# date manipulations
from datetime import datetime
import calendar as calendar
#!pip install holidays
import holidays

import matplotlib.pyplot as plt
import numpy as np

# Tutorial: https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment?text=1st+time+using+the+app...+everything+seemed+to+be+working+fine.+when+the+app+said+my+order+was+ready%2C+i+drove+to+the+restaurant.+when+i+got+there%2C+they+said+they+were+taking+care+of+the+drive+through+orders+1st.+well%2C+thats+fine%2C+but+the+app+said+my+order+was+ready%3F+ready+is+ready...+right%3F%3F%3F+why+did+i+have+to+wait%3F+i+still+had+to+wait+over+20+minutes+to+get+the+order+filled.+i+don%27t+think+i+will+be+using+this+app+again.+there+is+no+point+if+the+local+store+is+this+inept.+%F0%9F%91%8E%F0%9F%91%8E%F0%9F%91%8E
#!pip install torch==1.10.2+cu102 torchvision==0.11.3+cu102 torchaudio===0.10.2+cu102 -f https://download.pytorch.org/whl/cu102/torch_stable.html
#!pip install transformers
#!pip install torch
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import AutoModel, TFAutoModel
from transformers import pipeline
#import numpy as np
import scipy
from scipy.special import softmax
import csv
import urllib.request


In [None]:
# reviews_all grabs all reviews, but there is a limit of 19K - so you will need to set limits by using the "count" argument for apps that have more than that
# https://github.com/JoMingyu/google-play-scraper
# the ,token needs to be used when using reviews, but not reviews all: https://python.plainenglish.io/scraping-storing-google-play-app-reviews-with-python-5640c933c476
android_reviews, token = reviews(
    'com.dd.doordash',
    #sleep_milliseconds=0, # defaults to 0
    lang='en', # defaults to 'en'
    country='us', # defaults to 'us'
    sort=Sort.NEWEST,# defaults to Sort.MOST_RELEVANT
    count=5000, #limits number of reviews pulled
    continuation_token=None
)

android_reviews

# # put reviews into a df 
# # https://www.linkedin.com/pulse/how-scrape-google-play-reviews-4-simple-steps-using-python-kundi/
androiddf = pd.DataFrame(np.array(android_reviews),columns=['review'])
androiddf = androiddf.join(pd.DataFrame(androiddf.pop('review').tolist()))
androiddf.head()

# select and rename only df columns we want to keep 
#list(androiddf.columns)

# select columns
androiddf = androiddf[['reviewId',
                       'userName',
                       'at',
                       'score',
                       'reviewCreatedVersion',
                       'userImage', # title field does not exist in android data. pulled in image as placeholder
                       'content']]



# rename columns
androiddf.rename(columns={'reviewId': 'URI', 
                      'userName': 'Author Username', 
                      'at': 'Date Review Submitted',
                     'score': 'Overall App Star Rating',
                     'reviewCreatedVersion': 'App Release Version',
                     'userImage': 'Review Title', # DO NOT USE title field does not exist in android data. pulled in image as placeholder
                     'content': 'Text'}, inplace=True)

androiddf.tail()
#androiddf.info()

In [None]:
# rename the df
reviewsdf=androiddf

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# convert the data type of the inquiries from float to string
reviewsdf['Text'] = reviewsdf['Text'].astype(str) 

# Convert all data to lowercase. This is so the analysis does not count "The" and "the" as different words
reviewsdf['Text'] = reviewsdf['Text'].str.lower() # makes everything in this column lowercase to avoid redundancy.

# Convert date column to a date
# grab substring 
reviewsdf['Date Review Submitted'] = reviewsdf['Date Review Submitted'].astype(str).str[:10]


# Convert date to a date/time var
reviewsdf['Date Review Submitted'] =  pd.to_datetime(reviewsdf['Date Review Submitted'])

### Extract month, day, year from Date into separate columns

# Extract month.
reviewsdf['Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).month

# Extract day.
reviewsdf['Day_of_Month'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).day

# Extract year.
reviewsdf['Year'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).year

# Extract quarter.
reviewsdf['Quarter'] = pd.DatetimeIndex(reviewsdf['Date Review Submitted']).quarter

# extract week day
reviewsdf['Day of Week'] = reviewsdf['Date Review Submitted'].apply(lambda date: calendar.day_name[date.weekday()]) 

# extract if date falls on a weekend or not. Is the day saturday or sunday - could possibly include friday as weekend.
reviewsdf['Is_Weekend']= np.where((reviewsdf['Day of Week'] == "Saturday")| (reviewsdf['Day of Week'] == "Sunday"), 1, 0)

# only keep past month of reviews
#from datetime import date
#from dateutil.relativedelta import relativedelta
#n = 1
#reviewsdf['pastmonth'] = date.today() - relativedelta(months=n)
#print(pastmonth)

# delete rows where the Date Review Submitted is more than a month old
#reviewsdf = reviewsdf[(reviewsdf['Date Review Submitted'] > reviewsdf['pastmonth'])]

# rename df
df = reviewsdf.copy()
df.tail(10)
#df.dtypes

In [None]:
# get the length of the df
reviewsdf['length'] = reviewsdf['Text'].apply(lambda x: len(x))
reviewsdf = reviewsdf.sort_values(by=['length'], ascending=False)
#reviewsdf.head(50)


# drop rows that have a length of more than 512 characters
# this model cannot handle reviews with more than 512 characters: https://github.com/huggingface/transformers/issues/1791
reviewsdf.drop(reviewsdf[reviewsdf['length'] >= 512].index, inplace = True)

# drop the entire length column - we don't need it anymore
reviewsdf.drop('length', axis=1, inplace=True)
reviewsdf.shape
reviewsdf.tail(20)

In [None]:
classifier = pipeline("sentiment-analysis", model="j-hartmann/emotion-english-distilroberta-base")

In [None]:
def classifier_emotion(text):
  classifier_results = classifier(text)
  return classifier_results[0]['label']


def classifier_score(text):
  classifier_results = classifier(text)
  return classifier_results[0]['score']

In [None]:
df['Emotion'] = df['Text'].apply(lambda x: classifier_emotion(x))
df['Score'] = df['Text'].apply(lambda x: classifier_score(x))

#convert the emotion column to title case
df['Emotion'] = df['Emotion'].str.title()
df

In [None]:
# show which operating system the review comes from 
df['OS'] = np.where(df.URI.str.startswith('https://itunes.apple.com'), 'Apple', 'Android')
df.head()

In [None]:
# changes data types before going back into domo
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('float')
df['Overall App Star Rating']= df['Overall App Star Rating'].astype('int64')

In [None]:
# remove duplicate reviews. for some reason, a few app store reviews have duplicated
df = df.drop_duplicates()

In [None]:
df.dtypes

In [None]:
domo.write_dataframe(df, 'doordash jupyter')