# Import necessary modules

In [1]:
import warnings
import pickle
import re 
import pyLDAvis
import pyLDAvis.gensim_models
import itertools 
import pandas as pd
import numpy as np
import nltk
import spacy
import spacy_legacy

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt 
import gensim
import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel
from sklearn.feature_extraction.text import TfidfVectorizer
#nltk.download()
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
from nltk.stem.wordnet import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
nltk.download('wordnet')
dictionary_words = set(nltk.corpus.words.words())
from pprint import pprint
from textblob import TextBlob, Word
#loading the english language small model of spacy
en = spacy.load('en_core_web_sm')
sw_spacy = en.Defaults.stop_words
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


#  Read the dataset

In [2]:
df = pd.read_csv ('Sample37kSegmented031223.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments
0,18168,18169,review,Great quality authentic Apple Watch Band.\n\nT...,5,['Great quality authentic Apple Watch Band.\\n...
1,18377,18378,review,Love the Apple Watch...not as user-friendly as...,5,['Love the Apple Watch ... not as user-friendl...
2,18542,18543,review,Up graded my wife Apple Watch from a 2 to a 5!...,5,['Up graded my wife Apple Watch from a 2 to a ...
3,18610,18611,review,I bought this watch about a year ago and reall...,5,"['I bought this watch about a year ago', 'and ..."
4,18859,18860,review,I can say I'm very satisfied with my Fitbit ch...,5,"['I can say', ""I 'm very satisfied with my Fit..."


In [4]:
df.shape

(2000, 6)

# Change reviews into lowercase, and join reviews

In [5]:
df= df.apply(lambda x: x.astype(str).str.lower())
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,['great quality authentic apple watch band.\\n...
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,['love the apple watch ... not as user-friendl...
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,['up graded my wife apple watch from a 2 to a ...
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"['i bought this watch about a year ago', 'and ..."
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"['i can say', ""i 'm very satisfied with my fit..."


# Preprocess reviews

In [6]:
def lemmatizeStr(sentence):
    # Tokenize: Split the sentence into words
    word_list = nltk.word_tokenize(sentence)
    # Lemmatize list of words and join
    lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in word_list])
    return lemmatized_output

In [7]:
def cleanBrands(text, brands):
    for brand in sorted(brands, reverse = True):
        text = text.replace(brand, "")
    return text

In [8]:
def clean_text(docs):
    for i in range(len(docs)):
        #remove hashtags and mentions
        docs[i] = " ".join(filter(lambda x:x[0]!='#', docs[i].split())) #remove hashtags
        docs[i] = " ".join(filter(lambda x:x[0]!='@', docs[i].split())) #remove mentiones
        
        #Lemmatize text
        docs[i] = lemmatizeStr(docs[i])
    
        #Remove brands related keywords    
        brands = ["samsung galaxy watch 4 classic" , "samsung galaxy watch 5 pro" , "samsung galaxy watch active 2",        
                  "apple watch series 1" , "apple watch series 2" , "apple watch series 3" , "apple watch series 4",
                  "apple watch series 5" , "apple watch series 6" , "apple watch series 7" , "apple watch series 8" , 
                  "fossil hybrid smartwatch", "forerunner 245 music" , "forerunner 255 music" , "forerunner 645 music" , 
                     "forerunner 945 lte" , "forerunner 955 solar" , "apple watch se" , "apple watch ultra" , 
                     "fitbit alta hr" , "fitbit aria air" , "fitbit charge 2" , "fitbit charge 3" , "fitbit charge 4" , 
                     "fitbit charge 5" , "fitbit charge hr" , "fitbit flex 2" , "fitbit inspire 2" , 
                     "fitbit inspire 3" , "fitbit inspire hr" , "fitbit sense 2" , "fitbit versa 2" , 
                     "fitbit versa 3" , "fitbit versa 4" , "fitbit versa lite" , "fossil sport smartwatch" , 
                     "galaxy watch 2" , "galaxy watch 3" , "galaxy watch 4" , "galaxy watch 5" , 
                     "galaxy watch active" , "garmin captain marvel" , "garmin darth vader" , "garmin forerunner 10" , 
                     "garmin forerunner 101" , "garmin forerunner 110" , "garmin forerunner 15" , 
                     "garmin forerunner 201" , "garmin forerunner 205" , "garmin forerunner 210" , 
                     "garmin forerunner 220" , "garmin forerunner 225" , "garmin forerunner 230" , 
                     "garmin forerunner 235" , "garmin forerunner 245" , "garmin forerunner 25" , 
                     "garmin forerunner 255" , "garmin forerunner 30" , "garmin forerunner 301" , 
                     "garmin forerunner 305" , "garmin forerunner 310xt" , "garmin forerunner 35" , 
                     "garmin forerunner 405" , "garmin forerunner 405cx" , "garmin forerunner 410" , 
                     "garmin forerunner 45" , "garmin forerunner 45s" , "garmin forerunner 50" , 
                     "garmin forerunner 55" , "garmin forerunner 60" , "garmin forerunner 610" , 
                     "garmin forerunner 620" , "garmin forerunner 630" , "garmin forerunner 645" , 
                     "garmin forerunner 735xt" , "garmin forerunner 745" , "garmin forerunner 910xt" , 
                     "garmin forerunner 920xt" , "garmin forerunner 935" , "garmin forerunner 945" , 
                     "garmin forerunner 955" , "letsfit fitness racker" , "polar grit x" , "samsung galaxy fit" , 
                     "samsung galaxy gear" , "samsung galaxy watch " , "samsung gear 2" , "samsung gear live" , 
                     "samsung gear s" , "samsung gear s2" , "samsung gear s3" , "samsung gear sport" , 
                     "samsung watch 2" , "samsung watch 3" , "samsung watch 4" , "samsung watch 5" , 
                     "samsung watch active" , "striiv apex hr" , "striiv dash hr" , "withings move ecg" , 
                     "xiaomi mi band", "forerunner 10" , "forerunner 101" , "forerunner 110" , "forerunner 15" , "forerunner 201" , 
                     "forerunner 205" , "forerunner 210" , "forerunner 220" , "forerunner 225" , 
                     "forerunner 230" , "forerunner 235" , "forerunner 245" , "forerunner 25" , 
                     "forerunner 255" , "forerunner 30" , "forerunner 301" , "forerunner 305" , 
                     "forerunner 310xt" , "forerunner 35" , "forerunner 405" , "forerunner 405cx" , 
                     "forerunner 410" , "forerunner 45" , "forerunner 45s" , "forerunner 50" , 
                     "forerunner 55" , "forerunner 60" , "forerunner 610" , "forerunner 620" , 
                     "forerunner 630" , "forerunner 645" , "forerunner 735xt" , "forerunner 745" , 
                     "forerunner 910xt" , "forerunner 920xt" , " forerunner 935" , "forerunner 945" , 
                     "forerunner 955" , "inspire 2" , "sense 2" , "amazon halo" , "aria air" , "charge 2" , 
                     "charge 3" , "charge 4" , "charge 5" , "coros apex" , "coros pace" , "fitbit ace" , 
                     "fitbit alta" , "fitbit aria" , "fitbit blaze" , "fitbit charge" , "fitbit flex" , 
                     "fitbit flyer" , "fitbit force" , "fitbit ionic" , "fitbit ionic" , "fitbit luxe" , 
                     "fitbit one" , "fitbit sense" , "fitbit surge" , "fitbit ultra" , "fitbit verca" , 
                     "fitbit versa" , "fitbit zip" , "flex 2" , "fossil gen" , "galaxy watch" , "garmin approach" , 
                     "garmin fēnix" , "garmin instinct" , "garmin lily" , "garmin luxe" , "garmin swim" , 
                     "garmin venu" , "garmin vívoactive" , "garmin vívofit" , "garmin vívoki" , "garmin vívomove" , 
                     "garmin vívosmart" , "garmin vívosport" , "gear 2" , "gear live" , "gear s" , "gear s2" , 
                     "gear s3" , "gear sport" , "huawei band" , "huawei talkband" , "huawei watch" , 
                     "ihealth watch" , "inspire 3" , "misfit command" , "misfit path" , "misfit ray" , 
                     "misfit vap" , "moov hr" , "moov now" , "mykronoz zefit" , "mykronoz zefit" , "mykronoz zeneo" , 
                     "mykronoz zeround" , "mykronoz zesport" , "mykronoz zetime" , "mykronoz zetrack" , 
                     "mykronoz zewatch" , "polar a360" , "polar a370" , "polar h10" , "polar h9" , "polar ignite" , 
                     "polar m200" , "polar m430" , "polar oh1" , "polar titan" , "polar vantage" , "samsung gear" , 
                     "samsung watch " , "striiv fusion" , "suunto peak" , "timex ironman" , "versa 2" , "versa 3" , 
                     "versa 4" , "wahoo fitness" , "withings move" , "withings pulds" , "withings scanwatch" , 
                     "withings steel" , "wyze band", "forerunner", "fossil", "nüvi", "nuvi", "quest"]

        docs[i] = cleanBrands(docs[i], brands)
    return docs

In [10]:
reviews = np.array(df['Segments'])
SegmentedReview_processed_reviews = clean_text(reviews)
SegmentedReview_processed_reviews
df['Segments']=pd.Series(SegmentedReview_processed_reviews)

In [11]:
df.head()

Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '..."
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ..."


In [12]:
df.shape

(2000, 6)

# Functions to check sentiment, quality terms, and context terms 

In [13]:
############## Check Sentiment ####################

def checkSent(string):
    #print(string)
    return sentiment_pipeline(string)[0].get('label') #returns POSITIVE or NEGATIVE as a string

#data = "love my fitbit charge 4"
data = "so great"
#data = "I love you"
checkSent(data)

'POSITIVE'

In [14]:
############## Check Terms ####################

terms = ['easy to use', 'simple', 'straightforward', 'user-friendly', 'intuitive']

def checkterms(string, termsvalues):
    for term in termsvalues:
        pattern = r"\b" + re.escape(term) + r"\b"  # match whole words
        if re.search(pattern, string, re.IGNORECASE):
            return True
    return False
 
checkterms('love my fitbit simplecharge 4 ', terms)   # return true if a quality term was found, otherwise, false

False

In [15]:
############## Check Context ####################

def checkcontext(string, contextvalues):
    checkcontext = False
    for term in contextvalues:
        pattern = r"\b" + re.escape(term) + r"\b"  # match whole words
        if re.search(pattern, string, re.IGNORECASE):
            return True
    return False
 
terms = ['health activities','target']   
checkterms('health activities I use wearables to achieve  goals ', terms)
#return true iof a context term was found, otherwise, false.

True

# Define fucntions to encode reviews


In [16]:
############## Encode Reviews v1 without Context terms  'encodereviews' ####################

In [17]:
#myArray = [ 'love my fitbit charge 4 . ' , 'so easy to use . ' ]

#ease_of_use_terms = ['simple', 'easy to use',  'straightforward', 'user-friendly', 'intuitive']

# ['this replacement band for the charge 3 seemed great at the start', ' but the two loops broke within 6 months of use', ' in this case', " the cheap price wasn't worth it"]
def encodereviews(reviewarray, qualityterms):
    encode = 0
    for i in reviewarray:
        if((checkSent(i) == 'POSITIVE') and # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True)):                                   # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

#encodereviews(myArray, ease_of_use_terms)

In [18]:
myArray = [ 'love my fitbit charge 4 . ' , 'so great . ' ]

ease_of_use_terms = ['simple', 'easy to use',  'straightforward', 'user-friendly', 'intuitive']

# ['this replacement band for the charge 3 seemed great at the start', ' but the two loops broke within 6 months of use', ' in this case', " the cheap price wasn't worth it"]
def encodereviews(reviewarray, qualityterms):
    encode = 0
    for i in reviewarray:
        if((checkSent(i) == 'POSITIVE') and # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True)):                                   # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

#encodereviews(myArray, ease_of_use_terms)

In [19]:
############## Encode Reviews v2 including Context terms  'encodereviewswc' ####################


In [20]:
def encodereviewswc(reviewarray, qualityterms, contextterms):
    encode = 0
    pattern = r"\b" + re.escape('useful') + r"\b"  # match whole words
    for i in reviewarray:
        if((checkSent(i) == 'POSITIVE') and   # no negatives, not negative sentiment
           ((checkterms(i, qualityterms) == True) or (re.search(pattern, i, re.IGNORECASE)!= None)) and                           # has a quality dimension term
           (checkcontext(i, contextterms) == True)): # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

In [21]:
def check_useful(sentence):
    #return " useful " in " " + sentence + " "
    return " useful " in " " + sentence + " " or " helpful " in " " + sentence + " "

def encodereviewswc(reviewarray, qualityterms, contextterms):
    encode = 0
    pattern = r"\b" + re.escape('useful') + r"\b"  # match whole words
    for i in reviewarray:
        if (
            checkSent(i) == 'POSITIVE' and
            (checkterms(i, qualityterms) or check_useful(i)) and                          # has a quality dimension term
            (checkcontext(i, contextterms) or check_useful(i))
        ): # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode  

# Encode Reviews

In [23]:
df["Hedonic Motivation"] = ""

terms = ['enjoyable', 'enjoyment', 'enjoy', 'enjoys pleasant', 'flow experience', 'pleasure', 'fun', 'interesting', 'satisfied', 'satisfaction', 'feel good', 'happiness', 'happy', 'happier', 'entertainment', 'entertain', 'exciting', 'playfulness', 'playful', 'feeling of accomplishment']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Hedonic Motivation'] = 1
    else:
        df.at[i,'Hedonic Motivation'] = 0
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1


In [24]:
df["Connectivity Support"] = ""

terms = ['pair with', 'pairs with', 'pairs', 'pairing', 'sync', 'syncs', 'syncing', 'synchronize', 'synchronization', 'connect']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Connectivity Support'] = 1
    else:
        df.at[i,'Connectivity Support'] = 0
    
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0


In [25]:
df["Customizability"] = ""

terms = ['customize', 'custom', 'personalize', 'personalizes', 'personalization']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Customizability'] = 1
    else:
        df.at[i,'Customizability'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0


In [26]:
df["Perceived Ease of Use"] = ""

terms = ['easy', 'clear', 'understandable', 'easy to use', 'mental effort', 'trouble free', 'simple', 
         'controllable', 'easy to learn', 'skillful', 'user friendly']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Perceived Ease of Use'] = 1
    else:
        df.at[i,'Perceived Ease of Use'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0


In [27]:
df["Appeal"] = ""

terms = ['cool', 'appearance', 'aesthetic', 'aesthetics', 'style', 'stylish', 'fashion', 'accessory', 'accessories', 'pleasing',
         'color', 'design', 'texture', 'uniqueness', 'unique','size', 'look', 'attractive', 'appeal', 'appealing']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Appeal'] = 1
    else:
        df.at[i,'Appeal'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0


In [28]:
df["Perceived Value"] = ""

terms = ['price', 'value', 'cost', 'monetary', 'money']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Perceived Value'] = 1
    else:
        df.at[i,'Perceived Value'] = 0

df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0,0


In [29]:
# sleep tracking	648       # track of	488    # does everything	439   # rate monitor	382
# ability to	370    # working out	359     # blood pressure	326     # keep track	284
# fitness tracking	269    It tracks	155    everything I need	202    #keep track of	267
# my heart rate	405      # to keep track	135    # to track my	411     # use it for	181
# use it to	134  # was able to	317 # and heart rate	69 # everything I need it	79 #has all the features	56
# heart rate	57   #it to track my	64  #keep track of my	134  # steps	71   #helps me keep track of	25   
#I use it every day	25    #I use it to track	24   track my heart rate	24    track my steps and	33

df["Perceived Usefulness"] = ""

terms = ['simplify', 'control over', 'reduce', 'accomplish more', 'save time', 'work more quickly', 'accomplish',
         'accomplished', 'accomplishes', 'accomplishing', 'achieve', 'achieved', 'achieves', 'achieving',
         'advantageous', 'beneficial', 'check', 'checked', 'checking', 'check', 'complete', 'completes', 'completing',
         'completing', 'convenient', 'conveniently', 'develop', 'developed', 'developing', 'develops', 'effective',
         'effectively', 'effectiveness', 'enable', 'enables', 'enhance', 'enhanced', 'enhances', 'enhancing', 'handle',
         'handled', 'handle', 'help', 'helpful', 'help', 'improve', 'improved', 'improves', 'improving', 'increase',
         'increased', 'increase', 'increasing rejuvenated', 'maintain', 'maintained', 'maintaining', 'manage', 'managed',
         'manages', 'managing', 'meet', 'meeting', 'monitor', 'monitored', 'monitoring', 'monitor', 'obtain', 'obtained',
         'obtaining', 'obtains', 'perform', 'performed', 'performing', 'performs', 'provide', 'provided', 'provides',
         'providing', 'reduce', 'reduced', 'reduces', 'reducing', 'save', 'saved', 'save', 'saving',
         'track', 'tracks', 'tracking']


context = ['achievement', 'active minutes', 'activities', 'activities', 'activity', 'activity', 'calories', 'concern',
           'concerns', 'condition', 'conditions', 'convenience', 'distance', 'efficiency', 'effort', 'everything',
           'exercise', 'exercises', 'feature', 'features', 'fitness', 'floors', 'goal', 'goals', 'habit', 'habits', 'health',
           'heart rate', 'information', 'information', 'job', 'jobs', 'life', 'need', 'needs', 'performance', 'productivity',
           'quality', 'safety', 'service', 'services', 'sleep', 'sleep quality', 'sleep time', 'status', 'steps', 'stress',
           'task', 'tasks', 'things', 'time', 'work']



for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviewswc(df['Segments'][i].split(","), terms, context) == 1):
        df.at[i,'Perceived Usefulness'] = 1
    else:
        df.at[i,'Perceived Usefulness'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0,0,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0,0,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0,0,0


In [30]:
df["Device Quality"] = ""

terms = ['quality', 'system quality', 'service quality', 'battery life', 'charge', 'long time', 'battery last']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Device Quality'] = 1
    else:
        df.at[i,'Device Quality'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0,0,0,1
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0,0,0,1
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0,0,0,0


In [31]:
df["Credibility Support"] = ""

terms = ['accuracy', 'accurate', 'trustworthy', 'consistent', 'reliable', 'reliability', 'error-free', 'error free']

for i in range(len(df['Segments'])):
    if(i%250 == 0):
        print(i)
    if(encodereviews(df['Segments'][i].split(","), terms) == 1):
        df.at[i,'Credibility Support'] = 1
    else:
        df.at[i,'Credibility Support'] = 0
        
df.head()

0
250
500
750
1000
1250
1500
1750


Unnamed: 0.1,Unnamed: 0,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality,Credibility Support
0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0,0,0,1,0
1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0,0,0,0,0
2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0,0,0,1,0
3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0,0,0,0,0
4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0,0,0,0,0


# Remove irrelevant reviews

In [32]:
df.shape

(2000, 15)

In [50]:
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,ReviewID,Page Type,Full Text,Rating,Segments,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality,Credibility Support
0,0,18168,18169,review,great quality authentic apple watch band.\n\nt...,5,[ 'great quality authentic apple watch band.\\...,0,0,0,0,0,0,0,1,0
1,1,18377,18378,review,love the apple watch...not as user-friendly as...,5,[ 'love the apple watch ... not a user-friendl...,0,0,0,0,0,0,0,0,0
2,2,18542,18543,review,up graded my wife apple watch from a 2 to a 5!...,5,[ 'up graded my wife apple watch from a 2 to a...,0,0,0,0,0,0,0,1,0
3,3,18610,18611,review,i bought this watch about a year ago and reall...,5,"[ ' i bought this watch about a year ago ' , '...",0,0,0,0,0,0,0,0,0
4,4,18859,18860,review,i can say i'm very satisfied with my fitbit ch...,5,"[ ' i can say ' , `` i 'm very satisfied with ...",1,0,0,0,0,0,0,0,0


In [51]:
# Import pandas library
import pandas as pd

# Import pandas library
import pandas as pd

# Define list of filter words
filterwords = ["appletv", "accessory", "accessories", "beach", "band", "bands", "bumper", "case", "cases", "cover", 
               "charger", "gear vr", "mountain", "mountains", "protect", "protects", "protective", "protector", 
               "protectors", "shield", "shields", "shoe", "strap", "straps", "sticker", "stickers", "tv",  
               "wall plug", "guard", "guards" ]

# Create list of indices to drop
indices_to_drop = []
for i in range(len(df['Full Text'])):
    match_found = False
    for filterword in filterwords:
        if " " + filterword + " " in " " + df['Full Text'][i] + " ":
            match_found = True
            break
    if match_found:
        indices_to_drop.append(i)

# Drop rows by indices
df.drop(indices_to_drop, axis=0, inplace=True)

In [55]:
df.shape

(1633, 16)

In [56]:
df.to_csv('ProcessedReviewsv8.csv')