# Import necessary modules

In [102]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

import gensim
import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk
#nltk.download()
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
from nltk.stem.wordnet import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

nltk.download('wordnet')
dictionary_words = set(nltk.corpus.words.words())

from pprint import pprint

from textblob import TextBlob, Word

import spacy
import spacy_legacy
#loading the english language small model of spacy
en = spacy.load('en_core_web_sm')
sw_spacy = en.Defaults.stop_words

from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

import pickle
import re 
import pyLDAvis
import pyLDAvis.gensim_models

import itertools 
# #custom list of stop words
# stop_words_english = []
# with open('stop_words_english.txt', encoding="utf8") as my_file:
#     for line in my_file:
#         stop_words_english.append(line.replace("\n", ""))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


#  Read the dataset

In [103]:
df = pd.read_csv ('SampleProcessedFinal.csv')

In [104]:
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview
0,10608,10609,review,First time apple watch purchase. No complaints.,5,"""First time apple watch purchase .', 'No compl..."
1,11193,11194,review,I love the new features this Apple Watch has. ...,5,"""I love the new this Apple Watch ha .', 'Easy ..."
2,11475,11476,review,Easy to put on and protects the Apple Watch gr...,5,"""Easy to put on and protects the Apple Watch g..."
3,12084,12085,review,Great Apple Watch with wonderful features. Eas...,5,"""Great Apple Watch with wonderful .', 'Easy to..."
4,13081,13082,review,Love my Apple Watch series 6 44mm best watch s...,5,"""Love my Apple Watch series 6 44mm best watch ..."


In [105]:
df.shape

(750, 6)

# Change reviews into lowercase, and join reviews

In [106]:
df= df.apply(lambda x: x.astype(str).str.lower())
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl..."
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ..."
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g..."
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to..."
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ..."


# Preprocess reviews

In [107]:
def lemmatizeStr(sentence):
    # Tokenize: Split the sentence into words
    word_list = nltk.word_tokenize(sentence)
    # Lemmatize list of words and join
    lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in word_list])
    return lemmatized_output

In [108]:
def clean_text(docs):
    for i in range(len(docs)):
        #remove hashtags and mentions
        docs[i] = " ".join(filter(lambda x:x[0]!='#', docs[i].split())) #remove hashtags
        docs[i] = " ".join(filter(lambda x:x[0]!='@', docs[i].split())) #remove mentiones
        
        #Lemmatize text
        docs[i] = lemmatizeStr(docs[i])
       
    
    
        #Remove brands related keywords    
        brands5kw = ["Samsung Galaxy Watch 4 Classic" , "Samsung Galaxy Watch 5 Pro" , "Samsung Galaxy Watch Active 2"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands5kw]
        docs[i] = ' '.join(resultwords)
        
        
        brands4kw = ["Apple watch series 1" , "Apple watch series 2" , "Apple watch series 3" , "Apple watch series 4",
                     "Apple watch series 5" , "Apple watch series 6" , "Apple watch series 7" , "Apple watch series 8" , 
                     "fossil hybrid smartwatch"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands4kw]
        docs[i] = ' '.join(resultwords)

        brands3kw = ["Forerunner 245 Music" , "Forerunner 255 Music" , "Forerunner 645 Music" , 
                     "Forerunner 945 LTE" , "Forerunner 955 Solar" , "Apple watch SE" , "Apple watch Ultra" , 
                     "Fitbit Alta HR" , "Fitbit Aria Air" , "Fitbit Charge 2" , "Fitbit Charge 3" , "Fitbit Charge 4" , 
                     "Fitbit Charge 5" , "Fitbit Charge HR" , "Fitbit Flex 2" , "Fitbit Inspire 2" , 
                     "Fitbit Inspire 3" , "Fitbit Inspire HR" , "Fitbit Sense 2" , "Fitbit Versa 2" , 
                     "Fitbit Versa 3" , "Fitbit Versa 4" , "fitbit versa lite" , "fossil sport smartwatch" , 
                     "Galaxy Watch 2" , "Galaxy Watch 3" , "Galaxy Watch 4" , "Galaxy Watch 5" , 
                     "Galaxy Watch Active" , "garmin captain marvel" , "garmin darth vader" , "garmin Forerunner 10" , 
                     "garmin Forerunner 101" , "garmin Forerunner 110" , "garmin Forerunner 15" , 
                     "garmin Forerunner 201" , "garmin Forerunner 205" , "garmin Forerunner 210" , 
                     "garmin Forerunner 220" , "garmin Forerunner 225" , "garmin Forerunner 230" , 
                     "garmin Forerunner 235" , "garmin Forerunner 245" , "garmin Forerunner 25" , 
                     "garmin Forerunner 255" , "garmin Forerunner 30" , "garmin Forerunner 301" , 
                     "garmin Forerunner 305" , "garmin Forerunner 310XT" , "garmin Forerunner 35" , 
                     "garmin Forerunner 405" , "garmin Forerunner 405CX" , "garmin Forerunner 410" , 
                     "garmin Forerunner 45" , "garmin Forerunner 45S" , "garmin Forerunner 50" , 
                     "garmin Forerunner 55" , "garmin Forerunner 60" , "garmin Forerunner 610" , 
                     "garmin Forerunner 620" , "garmin Forerunner 630" , "garmin Forerunner 645" , 
                     "garmin Forerunner 735XT" , "garmin Forerunner 745" , "garmin Forerunner 910XT" , 
                     "garmin Forerunner 920XT" , "garmin Forerunner 935" , "garmin Forerunner 945" , 
                     "garmin Forerunner 955" , "letsfit fitness racker" , "polar grit x" , "samsung galaxy fit" , 
                     "Samsung Galaxy Gear" , "Samsung Galaxy Watch " , "Samsung Gear 2" , "Samsung Gear Live" , 
                     "Samsung Gear S" , "Samsung Gear S2" , "Samsung Gear S3" , "Samsung Gear Sport" , 
                     "Samsung Watch 2" , "Samsung Watch 3" , "Samsung Watch 4" , "Samsung Watch 5" , 
                     "Samsung Watch Active" , "striiv apex hr" , "striiv dash hr" , "withings move ecg" , 
                     "xiaomi mi band"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands3kw]
        docs[i] = ' '.join(resultwords)
        
        brands2kw = ["Forerunner 10" , "Forerunner 101" , "Forerunner 110" , "Forerunner 15" , "Forerunner 201" , 
                     "Forerunner 205" , "Forerunner 210" , "Forerunner 220" , "Forerunner 225" , 
                     "Forerunner 230" , "Forerunner 235" , "Forerunner 245" , "Forerunner 25" , 
                     "Forerunner 255" , "Forerunner 30" , "Forerunner 301" , "Forerunner 305" , 
                     "Forerunner 310XT" , "Forerunner 35" , "Forerunner 405" , "Forerunner 405CX" , 
                     "Forerunner 410" , "Forerunner 45" , "Forerunner 45S" , "Forerunner 50" , 
                     "Forerunner 55" , "Forerunner 60" , "Forerunner 610" , "Forerunner 620" , 
                     "Forerunner 630" , "Forerunner 645" , "Forerunner 735XT" , "Forerunner 745" , 
                     "Forerunner 910XT" , "Forerunner 920XT" , " Forerunner 935" , "Forerunner 945" , 
                     "Forerunner 955" , "Inspire 2" , "Sense 2" , "amazon halo" , "Aria Air" , "Charge 2" , 
                     "Charge 3" , "Charge 4" , "Charge 5" , "coros apex" , "coros pace" , "Fitbit Ace" , 
                     "Fitbit Alta" , "Fitbit Aria" , "Fitbit Blaze" , "Fitbit Charge" , "Fitbit Flex" , 
                     "Fitbit Flyer" , "Fitbit Force" , "Fitbit Ionic" , "fitbit ionic" , "Fitbit Luxe" , 
                     "Fitbit One" , "Fitbit Sense" , "Fitbit Surge" , "Fitbit Ultra" , "fitbit verca" , 
                     "Fitbit Versa" , "Fitbit Zip" , "Flex 2" , "fossil gen" , "Galaxy Watch" , "garmin approach" , 
                     "garmin fēnix" , "garmin instinct" , "garmin lily" , "garmin luxe" , "garmin swim" , 
                     "garmin venu" , "garmin vívoactive" , "garmin vívofit" , "garmin vívoki" , "garmin vívomove" , 
                     "garmin vívosmart" , "garmin vívosport" , "Gear 2" , "Gear Live" , "Gear S" , "Gear S2" , 
                     "Gear S3" , "Gear Sport" , "huawei band" , "huawei talkband" , "huawei watch" , 
                     "ihealth watch" , "Inspire 3" , "misfit command" , "misfit path" , "misfit ray" , 
                     "misfit vap" , "moov hr" , "moov now" , "mykronoz zefit" , "mykronoz zefit" , "mykronoz zeneo" , 
                     "mykronoz zeround" , "mykronoz zesport" , "mykronoz zetime" , "mykronoz zetrack" , 
                     "mykronoz zewatch" , "polar a360" , "polar a370" , "polar h10" , "polar h9" , "polar ignite" , 
                     "polar m200" , "polar m430" , "polar oh1" , "polar titan" , "polar vantage" , "Samsung Gear" , 
                     "Samsung Watch " , "striiv fusion" , "suunto peak" , "timex ironman" , "Versa 2" , "Versa 3" , 
                     "Versa 4" , "wahoo fitness" , "withings move" , "withings pulds" , "withings scanwatch" , 
                     "withings steel" , "wyze band"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands2kw]
        docs[i] = ' '.join(resultwords)        

        brands1kw = ["Forerunner" , "Forerunner" , "fossil" , "Nüvi " , "Quest"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands1kw]
        docs[i] = ' '.join(resultwords)
        
        #Remove feature-related words
        features = ["ability", "quality", "feature", "aspect", "abilities", "qualities", "features", "aspects"]
        resultwords  = [word for word in docs[i].split() if word.lower() not in features]
        docs[i] = ' '.join(resultwords)

    return docs

In [109]:
reviews = np.array(df['SegmentedReview'])
SegmentedReview_processed_reviews = clean_text(reviews)
SegmentedReview_processed_reviews
df['SegmentedReview_processed_reviews']=pd.Series(SegmentedReview_processed_reviews)

In [110]:
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c..."
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e..."
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas..."
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...


In [111]:
df.shape

(750, 7)

# Functions to check sentiment, quality terms, and context terms 

In [112]:
############## Check Sentiment ####################

def checkSent(string):
    #print(string)
    return sentiment_pipeline(string)[0].get('label') #returns POSITIVE or NEGATIVE as a string

data = "I love you"
checkSent(data)

'POSITIVE'

In [113]:
############## Check Terms ####################

terms = ['easy to use', 'simple', 'straightforward', 'user-friendly', 'intuitive']

def checkterms(string, termsvalues):
    checkterm = False
    for i in range(len(termsvalues)): #now can do exact matching
        #print(string)
        if ((re.search(r"\b"+termsvalues[i]+r"\b",string) != None)): 
            checkterm = True
    return checkterm
 
checkterms('So easy to use .', terms)   # return true if a quality term was found, otherwise, false

True

In [114]:
############## Check Context ####################

def checkcontext(string, contextvalues):
    checkcontext = False
    for i in range(len(contextvalues)):
        #print(string)
        if ((re.search(r"\b"+contextvalues[i]+r"\b",string) != None)):
            checkcontext = True
    return checkcontext
 
#terms = ['health activities','target']   
#checkterms('I use wearables to achieve target goals', terms)
#return true iof a context term was found, otherwise, false.

# Define fucntions to encode reviews


In [115]:
############## Encode Reviews v1 without Context terms  'encodereviews' ####################

In [116]:
#myArray = [ 'love my fitbit charge 4 . ' , 'so easy to use . ' ]

#ease_of_use_terms = ['simple', 'easy to use',  'straightforward', 'user-friendly', 'intuitive']

# ['this replacement band for the charge 3 seemed great at the start', ' but the two loops broke within 6 months of use', ' in this case', " the cheap price wasn't worth it"]
def encodereviews(reviewarray, qualityterms):
    encode = 0
    for i in reviewarray:
        if((checkSent(i) == 'POSITIVE') and # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True)):                                   # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

#encodereviews(myArray, ease_of_use_terms)

In [117]:
############## Encode Reviews v2 including Context terms  'encodereviewswc' ####################


In [118]:
def encodereviewswc(reviewarray, qualityterms, contextterms):
    encode = 0
    for i in reviewarray:
        if((checkSent(i) == 'POSITIVE') and   # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True) and                           # has a quality dimension term
           (checkcontext(i, contextterms) == True)):                                     # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

# Encode Reviews

In [119]:
df["Hedonic Motivation"] = ""

terms = ['enjoyable', 'enjoyment', 'enjoy', 'pleasant', 'flow experience', 'pleasure', 'fun', 
         'interesting', 'satisfied', 'satisfaction', 'feel good', 'happiness', 'happy', 'happier', 
         'entertainment', 'entertain', 'exciting', 'playfulness', 'playful',  'feeling of accomplishment']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Hedonic Motivation'] = 1
    else:
        df.at[i,'Hedonic Motivation'] = 0
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0


In [120]:
df["Connectivity Support"] = ""

terms = ['pair', 'sync', 'connect']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Connectivity Support'] = 1
    else:
        df.at[i,'Connectivity Support'] = 0
    
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0


In [121]:
df["Customizability"] = ""

terms = ['customize', 'custom', 'personalize', 'personalization']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Customizability'] = 1
    else:
        df.at[i,'Customizability'] = 0
        
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0


In [123]:
df["Perceived Ease of Use"] = ""

terms = ['easy', 'clear', 'understandable', 'easy to use', 'mental effort', 
         'trouble free', 'simple', 'controllable', 'easy to learn']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Perceived Ease of Use'] = 1
    else:
        df.at[i,'Perceived Ease of Use'] = 0
        
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0


In [124]:
df["Appeal"] = ""

terms = ['cool', 'appearance', 'aesthetic', 'stylish', 'fashion', 'accessory', 'pleasing aesthetics', 'color', 
         'design', 'texture', 'uniqueness', 'unique', 'size', 'look']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Appeal'] = 1
    else:
        df.at[i,'Appeal'] = 0
        
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0,0


In [125]:
df["Perceived Value"] = ""

terms = ['price', 'value', 'cost', 'monetary', 'money']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Perceived Value'] = 1
    else:
        df.at[i,'Perceived Value'] = 0

df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1,0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1,0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0,0,0


In [127]:
# sleep tracking	648       # track of	488    # does everything	439   # rate monitor	382
# ability to	370    # working out	359     # blood pressure	326     # keep track	284
# fitness tracking	269    It tracks	155    everything I need	202    #keep track of	267
# my heart rate	405      # to keep track	135    # to track my	411     # use it for	181
# use it to	134  # was able to	317 # and heart rate	69 # everything I need it	79 #has all the features	56
# heart rate	57   #it to track my	64  #keep track of my	134  # steps	71   #helps me keep track of	25   
#I use it every day	25    #I use it to track	24   track my heart rate	24    track my steps and	33

df["Perceived Usefulness"] = ""

terms = ['rejuvenated', 'simplify', 'control over', 'reduce', 'accomplish more', 'save time', 'work more quickly',
        'accomplish','accomplished','accomplishes','accomplishing','achieve','achieved','achieves','achieving',
        'advantageous ','beneficial','check','checked','checking','checks','complete','completes','completing',
        'completing','convenient','conveniently','develop','developed','developing','develops','effective',
        'effectively','effectiveness','enable','enables','enhance','enhanced','enhances','enhancing','handle',
        'handled','handles','help','helpful','helps','improve','improved','improves ','improving','increase',
        'increased','increases','increasing','maintain','maintained ','maintaining','manage','managed','manages',
        'managing','monitor','monitored','monitoring','monitors','obtain','obtained','obtaining','obtains',
        'perform','performed','performing','performs','provide','provided','provides','providing','reduce',
        'reduced','reduces','reducing','save','saved','saves','saving','useful', 'track', 'does']


context = ['act','activities','concerns','convenience','daily activities','daily health','daily healthcare',
           'daily life','efficiency','effort','exercise','fitness','goals','good features','health condition',
           'health information','health management','health monitoring','healthcare','healthcare activities',
           'healthy habits','healthy status','information','job','learning achievement','life',
           'life more convenient ','life more effective','managing my health','managing my health',
           'meeting my needs','meeting my needs','performance','physical activities','physical health',
           'productivity','quality','quality of my life','safety','service','stress','task','tasks','things',
           'things more quickly','time','work satisfaction','activity','acts','concern','exercises','goal',
           'good feature','health conditions','healthcare activity','healthy habit','jobs','manage health',
           'manage my heakth','managing health','meeting need','meeting needs','physical activity','quality of life',
           'services','task','tasks','manages my health','meet needs','manages health','meet need', 'steps','calories',
           'distance','floors','active minutes','sleep time','sleep quality', 'heart rate', 'everything']



for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviewswc(df['SegmentedReview_processed_reviews'][i].split(","), terms, context) == 1):
        df.at[i,'Perceived Usefulness'] = 1
    else:
        df.at[i,'Perceived Usefulness'] = 0
        
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1,0,0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1,0,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1,0,0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0,0,0,0


In [128]:
df["Device Quality"] = ""

terms = ['quality', 'system quality', 'service quality', 'battery life', 'charge', 'long time', 'battery last']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Device Quality'] = 1
    else:
        df.at[i,'Device Quality'] = 0
        
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0,0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1,0,0,0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1,0,0,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1,0,0,0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0,0,0,0,0


In [129]:
df["Credibility Support"] = ""

terms = ['accuracy', 'accurate']

for i in range(len(df['SegmentedReview_processed_reviews'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['SegmentedReview_processed_reviews'][i].split(","), terms) == 1):
        df.at[i,'Credibility Support'] = 1
    else:
        df.at[i,'Credibility Support'] = 0
        
df.head()

0
250
500


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,SegmentedReview,SegmentedReview_processed_reviews,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality,Credibility Support
0,10608,10609,review,first time apple watch purchase. no complaints.,5,"""first time apple watch purchase .', 'no compl...","`` first time apple watch purchase . ' , 'no c...",0,0,0,0,0,0,0,0,0
1,11193,11194,review,i love the new features this apple watch has. ...,5,"""i love the new this apple watch ha .', 'easy ...","`` i love the new this apple watch ha . ' , 'e...",0,0,0,1,0,0,0,0,0
2,11475,11476,review,easy to put on and protects the apple watch gr...,5,"""easy to put on and protects the apple watch g...",`` easy to put on and protects the apple watch...,0,0,0,1,0,0,0,0,0
3,12084,12085,review,great apple watch with wonderful features. eas...,5,"""great apple watch with wonderful .', 'easy to...","`` great apple watch with wonderful . ' , 'eas...",0,0,0,1,0,0,0,0,0
4,13081,13082,review,love my apple watch series 6 44mm best watch s...,5,"""love my apple watch series 6 44mm best watch ...",`` love my apple watch series 6 44mm best watc...,0,0,0,0,0,0,0,0,0


# Remove irrelevant reviews

In [130]:
df1 = df

In [131]:
############## Remove irrelevant words ####################
filterwords = ["band", "bands", "case", "cases", "cover", "protective", "protector", "shield", 
               "appltv", "gear vr", "tv", "shoe", "mountain", "mountains", "beach", "appltv", 
               "accessory", "accessories", "charger"]

# checking if string contains list element
for i in range(len(df1['Full Text'])):
    #print(i)
    if((any(ele in df1['Full Text'][i] for ele in filterwords) == True)):
        df.drop([i], axis=0, inplace=True)

In [132]:
df1.shape

(632, 16)

In [133]:
df1.to_csv('ProcessedReviewsv6.csv')