# Import necessary modules

In [34]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

import gensim
import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel
from sklearn.feature_extraction.text import TfidfVectorizer

import nltk
#nltk.download()
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
from nltk.stem.wordnet import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

nltk.download('wordnet')
dictionary_words = set(nltk.corpus.words.words())

from pprint import pprint

from textblob import TextBlob, Word

import spacy
import spacy_legacy
#loading the english language small model of spacy
en = spacy.load('en_core_web_sm')
sw_spacy = en.Defaults.stop_words

from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

import pickle
import re 
import pyLDAvis
import pyLDAvis.gensim_models

import itertools 
#custom list of stop words
stop_words_english = []
with open('stop_words_english.txt', encoding="utf8") as my_file:
    for line in my_file:
        stop_words_english.append(line.replace("\n", ""))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Wahbeh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a Ber

#  Read the dataset

In [22]:
df = pd.read_csv ('AllReviews0120-1122.csv')

In [23]:
df.head()

Unnamed: 0,ReviewID,Page Type,Full Text,Rating
0,1,review,"When they work they are great, however, in my ...",1
1,2,review,I have a garmin Vivosmart 4 and this fitbit ch...,1
2,3,review,Absolute Trash! Seriously felt like I bought t...,1
3,4,review,I got this to pair with my apple watch and cha...,1
4,5,review,I am very disappointed with the FitBit Charge ...,1


In [24]:
df.shape

(35895, 4)

# Remove irrelevant reviews

In [25]:
############## Remove irrelevant words ####################
filterwords = ["band", "bands", "case", "cases", "cover", "protective", "protector", "shield", 
               "appltv", "gear vr", "tv", "shoe", "mountain", "mountains", "beach", "appltv", 
               "accessory", "accessories", "charger"]

# checking if string contains list element
for i in range(len(df['Full Text'])):
    #print(i)
    if((any(ele in df['Full Text'][i] for ele in filterwords) == True)):
        df.drop([i], axis=0, inplace=True)

In [26]:
df.shape

(27354, 4)

# Change reviews into lowercase, and join reviews

In [27]:
df= df.apply(lambda x: x.astype(str).str.lower())

In [28]:
reviews = np.array(df['Full Text'])
reviews = ' '.join(reviews)

# Preprocess reviews

In [35]:
def lemmatizeStr(sentence):
    # Tokenize: Split the sentence into words
    word_list = nltk.word_tokenize(sentence)
    # Lemmatize list of words and join
    lemmatized_output = ' '.join([lemmatizer.lemmatize(w) for w in word_list])
    return lemmatized_output

In [36]:
def clean_text(docs):
    for i in range(len(docs)):
        #remove hashtags and mentions
        docs[i] = " ".join(filter(lambda x:x[0]!='#', docs[i].split())) #remove hashtags
        docs[i] = " ".join(filter(lambda x:x[0]!='@', docs[i].split())) #remove mentiones
        
        #Lemmatize text
        docs[i] = lemmatizeStr(docs[i])
       
    
    
        #Remove brands related keywords    
        brands5kw = ["Samsung Galaxy Watch 4 Classic" , "Samsung Galaxy Watch 5 Pro" , "Samsung Galaxy Watch Active 2"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands5kw]
        docs[i] = ' '.join(resultwords)
        
        
        brands4kw = ["Apple watch series 1" , "Apple watch series 2" , "Apple watch series 3" , "Apple watch series 4",
                     "Apple watch series 5" , "Apple watch series 6" , "Apple watch series 7" , "Apple watch series 8" , 
                     "fossil hybrid smartwatch"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands4kw]
        docs[i] = ' '.join(resultwords)

        brands3kw = ["Forerunner 245 Music" , "Forerunner 255 Music" , "Forerunner 645 Music" , 
                     "Forerunner 945 LTE" , "Forerunner 955 Solar" , "Apple watch SE" , "Apple watch Ultra" , 
                     "Fitbit Alta HR" , "Fitbit Aria Air" , "Fitbit Charge 2" , "Fitbit Charge 3" , "Fitbit Charge 4" , 
                     "Fitbit Charge 5" , "Fitbit Charge HR" , "Fitbit Flex 2" , "Fitbit Inspire 2" , 
                     "Fitbit Inspire 3" , "Fitbit Inspire HR" , "Fitbit Sense 2" , "Fitbit Versa 2" , 
                     "Fitbit Versa 3" , "Fitbit Versa 4" , "fitbit versa lite" , "fossil sport smartwatch" , 
                     "Galaxy Watch 2" , "Galaxy Watch 3" , "Galaxy Watch 4" , "Galaxy Watch 5" , 
                     "Galaxy Watch Active" , "garmin captain marvel" , "garmin darth vader" , "garmin Forerunner 10" , 
                     "garmin Forerunner 101" , "garmin Forerunner 110" , "garmin Forerunner 15" , 
                     "garmin Forerunner 201" , "garmin Forerunner 205" , "garmin Forerunner 210" , 
                     "garmin Forerunner 220" , "garmin Forerunner 225" , "garmin Forerunner 230" , 
                     "garmin Forerunner 235" , "garmin Forerunner 245" , "garmin Forerunner 25" , 
                     "garmin Forerunner 255" , "garmin Forerunner 30" , "garmin Forerunner 301" , 
                     "garmin Forerunner 305" , "garmin Forerunner 310XT" , "garmin Forerunner 35" , 
                     "garmin Forerunner 405" , "garmin Forerunner 405CX" , "garmin Forerunner 410" , 
                     "garmin Forerunner 45" , "garmin Forerunner 45S" , "garmin Forerunner 50" , 
                     "garmin Forerunner 55" , "garmin Forerunner 60" , "garmin Forerunner 610" , 
                     "garmin Forerunner 620" , "garmin Forerunner 630" , "garmin Forerunner 645" , 
                     "garmin Forerunner 735XT" , "garmin Forerunner 745" , "garmin Forerunner 910XT" , 
                     "garmin Forerunner 920XT" , "garmin Forerunner 935" , "garmin Forerunner 945" , 
                     "garmin Forerunner 955" , "letsfit fitness racker" , "polar grit x" , "samsung galaxy fit" , 
                     "Samsung Galaxy Gear" , "Samsung Galaxy Watch " , "Samsung Gear 2" , "Samsung Gear Live" , 
                     "Samsung Gear S" , "Samsung Gear S2" , "Samsung Gear S3" , "Samsung Gear Sport" , 
                     "Samsung Watch 2" , "Samsung Watch 3" , "Samsung Watch 4" , "Samsung Watch 5" , 
                     "Samsung Watch Active" , "striiv apex hr" , "striiv dash hr" , "withings move ecg" , 
                     "xiaomi mi band"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands3kw]
        docs[i] = ' '.join(resultwords)
        
        brands2kw = ["Forerunner 10" , "Forerunner 101" , "Forerunner 110" , "Forerunner 15" , "Forerunner 201" , 
                     "Forerunner 205" , "Forerunner 210" , "Forerunner 220" , "Forerunner 225" , 
                     "Forerunner 230" , "Forerunner 235" , "Forerunner 245" , "Forerunner 25" , 
                     "Forerunner 255" , "Forerunner 30" , "Forerunner 301" , "Forerunner 305" , 
                     "Forerunner 310XT" , "Forerunner 35" , "Forerunner 405" , "Forerunner 405CX" , 
                     "Forerunner 410" , "Forerunner 45" , "Forerunner 45S" , "Forerunner 50" , 
                     "Forerunner 55" , "Forerunner 60" , "Forerunner 610" , "Forerunner 620" , 
                     "Forerunner 630" , "Forerunner 645" , "Forerunner 735XT" , "Forerunner 745" , 
                     "Forerunner 910XT" , "Forerunner 920XT" , " Forerunner 935" , "Forerunner 945" , 
                     "Forerunner 955" , "Inspire 2" , "Sense 2" , "amazon halo" , "Aria Air" , "Charge 2" , 
                     "Charge 3" , "Charge 4" , "Charge 5" , "coros apex" , "coros pace" , "Fitbit Ace" , 
                     "Fitbit Alta" , "Fitbit Aria" , "Fitbit Blaze" , "Fitbit Charge" , "Fitbit Flex" , 
                     "Fitbit Flyer" , "Fitbit Force" , "Fitbit Ionic" , "fitbit ionic" , "Fitbit Luxe" , 
                     "Fitbit One" , "Fitbit Sense" , "Fitbit Surge" , "Fitbit Ultra" , "fitbit verca" , 
                     "Fitbit Versa" , "Fitbit Zip" , "Flex 2" , "fossil gen" , "Galaxy Watch" , "garmin approach" , 
                     "garmin fēnix" , "garmin instinct" , "garmin lily" , "garmin luxe" , "garmin swim" , 
                     "garmin venu" , "garmin vívoactive" , "garmin vívofit" , "garmin vívoki" , "garmin vívomove" , 
                     "garmin vívosmart" , "garmin vívosport" , "Gear 2" , "Gear Live" , "Gear S" , "Gear S2" , 
                     "Gear S3" , "Gear Sport" , "huawei band" , "huawei talkband" , "huawei watch" , 
                     "ihealth watch" , "Inspire 3" , "misfit command" , "misfit path" , "misfit ray" , 
                     "misfit vap" , "moov hr" , "moov now" , "mykronoz zefit" , "mykronoz zefit" , "mykronoz zeneo" , 
                     "mykronoz zeround" , "mykronoz zesport" , "mykronoz zetime" , "mykronoz zetrack" , 
                     "mykronoz zewatch" , "polar a360" , "polar a370" , "polar h10" , "polar h9" , "polar ignite" , 
                     "polar m200" , "polar m430" , "polar oh1" , "polar titan" , "polar vantage" , "Samsung Gear" , 
                     "Samsung Watch " , "striiv fusion" , "suunto peak" , "timex ironman" , "Versa 2" , "Versa 3" , 
                     "Versa 4" , "wahoo fitness" , "withings move" , "withings pulds" , "withings scanwatch" , 
                     "withings steel" , "wyze band"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands2kw]
        docs[i] = ' '.join(resultwords)        

        brands1kw = ["Forerunner" , "Forerunner" , "fossil" , "Nüvi " , "Quest"]

        resultwords  = [word for word in docs[i].split() if word.lower() not in brands1kw]
        docs[i] = ' '.join(resultwords)
        
        #Remove feature-related words
        features = ["ability", "quality", "feature", "aspect", "abilities", "qualities", "features", "aspects"]
        resultwords  = [word for word in docs[i].split() if word.lower() not in features]
        docs[i] = ' '.join(resultwords)

    return docs

In [37]:
reviews = np.array(df['Full Text'])
processed_reviews = clean_text(reviews)

# Define a function to split each review using punctuations and conjunctions

In [38]:
def replace_punctuations(string):
    punctuations = ['.', ',', '&', ';', '!', ':']
    for punctuation in punctuations:
        string = string.replace(punctuation, '-')
    return string

In [39]:
for i in range(len(processed_reviews)):
    processed_reviews[i] = replace_punctuations(processed_reviews[i]) 

In [40]:
def replace_keywords(string, keywords):
    words = string.split()
    for i, word in enumerate(words):
        if word in keywords:
            words[i] = '-'
    return ' '.join(words)

In [41]:
keywords = ['for', 'and', 'but', 'or', 'so', 'after', 'as', 'because', 'before', 'even', 'if', 'now', 'once', 'since', 'than', 'that', 'thought', 'when', 'where', 'which', 'while', 'who', 'what']
for i in range(len(processed_reviews)):
    processed_reviews[i] = replace_keywords(processed_reviews[i], keywords) 

In [42]:
def partition_string(string):
    return string.split("-")

In [43]:
for i in range(len(processed_reviews)):
    processed_reviews[i] = partition_string(processed_reviews[i]) 

In [44]:
df1 = pd.DataFrame(processed_reviews)
df1.columns = ['Full Text']
df1.head()

Unnamed: 0,Full Text
0,"[, they work they are great , however , in ..."
1,"[i have a garmin vivosmart 4 , this fitbit ch..."
2,"[absolute trash , seriously felt like i bough..."
3,"[i got this to pair with my apple watch , cha..."
4,[i am very disappointed with the fitbit charge...


In [45]:
df1 = df1.join(df['Rating'])

In [46]:
df1

Unnamed: 0,Full Text,Rating
0,"[, they work they are great , however , in ...",1
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1
2,"[absolute trash , seriously felt like i bough...",1
3,"[i got this to pair with my apple watch , cha...",1
4,[i am very disappointed with the fitbit charge...,1
...,...,...
27349,"[work great , far , i wa confused how to use...",5
27350,"[i loved this , better , my apple watch , i...",5
27351,[apple watch series 3 ( gps ) 38mm silver alum...,5
27352,"[i love my apple watch , help me keep track o...",5


# Functions to check sentiment, quality terms, negators, and context terms 

In [48]:
# # # test string matching using regular expressions
# import re
# check = 0
# check = re.search(r"\b"+'dish'+r"\b","this is broken dishwasher")
# if check == None:
#     print('No')
# else:
#     print('Yes')



In [49]:
############## Check Sentiment ####################

def checkSent(string):
    return sentiment_pipeline(string)[0].get('label') #returns POSITIVE or NEGATIVE as a string

data = "I love you"
checkSent(data)

'POSITIVE'

In [50]:
############## Check Terms ####################

terms = ['easy to use', 'simple', 'straightforward', 'user-friendly', 'intuitive']

def checkterms(string, termsvalues):
    checkterm = False
    for i in range(len(termsvalues)): #now can do exact matching
        if ((re.search(r"\b"+termsvalues[i]+r"\b",string) != None)): 
            checkterm = True
    return checkterm
 
#checkterms('This is hello', terms)   # return true if a quality term was found, otherwise, false

In [51]:
############## Check Negators ####################
def checknegators(string):
    negatorvalues = ["ain't", "aren't", "cannot", "cant", "can't", "couldn't", "daren't", "didn't", 
                     "don't", "hasn't","haven't", "isn't", "mayn't","mightn't","mustn't","needn't",
                     "not","oughtn't","shan't","shouldn't","weren't", "won't", "wouldn't"]
    checknegator = False
    for i in range(len(negatorvalues)):
        if ((re.search(r"\b"+negatorvalues[i]+r"\b",string) != None)):
            checknegator = True
    return checknegator
 
#checknegators('This is not hello simple') # return true if a negator term was found, otherwise, false.

In [52]:
############## Check Context ####################

def checkcontext(string, contextvalues):
    checkcontext = False
    for i in range(len(contextvalues)):
        if ((re.search(r"\b"+contextvalues[i]+r"\b",string) != None)):
            checkcontext = True
    return checkcontext
 
#terms = ['health activities','target']   
#checkterms('I use wearables to achieve target goals', terms)
#return true iof a context term was found, otherwise, false.

# Define fucntions to encode reviews


In [53]:
############## Encode Reviews v1 without Context terms  'encodereviews' ####################

In [54]:
#myArray = ['easy to use watch', 'health ', 'attention']

#ease_of_use_terms = ['simple', 'easy to use',  'straightforward', 'user-friendly', 'intuitive']

# ['this replacement band for the charge 3 seemed great at the start', ' but the two loops broke within 6 months of use', ' in this case', " the cheap price wasn't worth it"]
def encodereviews(reviewarray, qualityterms):
    encode = 0
    for i in reviewarray:
        if(((checknegators(i) == False) or (checkSent(i) == 'POSITIVE')) and # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True)):                                   # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

#encodereviews(myArray, ease_of_use_terms)

In [55]:
############## Encode Reviews v2 including Context terms  'encodereviewswc' ####################


In [56]:
def encodereviewswc(reviewarray, qualityterms, contextterms):
    encode = 0
    for i in reviewarray:
        if(((checknegators(i) == False) or (checkSent(i) == 'POSITIVE')) and   # no negatives, not negative sentiment
           (checkterms(i, qualityterms) == True) and                           # has a quality dimension term
           (checkcontext(i, contextterms) == True)):                                     # has no irrelevant terms
            encode = 1
        else:
            continue
    return encode   

# Encode Reviews

In [57]:
df1["Hedonic Motivation"] = ""

terms = ['enjoyable', 'enjoyment', 'enjoy', 'pleasant', 'flow experience', 'pleasure', 'fun', 
         'interesting', 'satisfied', 'satisfaction', 'feel good', 'happiness', 'happy', 'happier', 
         'entertainment', 'entertain', 'exciting', 'playfulness', 'playful',  'feeling of accomplishment']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Hedonic Motivation'] = 1
    else:
        df1.at[i,'Hedonic Motivation'] = 0
        

df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation
0,"[, they work they are great , however , in ...",1,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0
2,"[absolute trash , seriously felt like i bough...",1,1
3,"[i got this to pair with my apple watch , cha...",1,0
4,[i am very disappointed with the fitbit charge...,1,0


In [58]:
df1["Connectivity Support"] = ""

terms = ['pair', 'sync', 'connect']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Connectivity Support'] = 1
    else:
        df1.at[i,'Connectivity Support'] = 0
    
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support
0,"[, they work they are great , however , in ...",1,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0
3,"[i got this to pair with my apple watch , cha...",1,0,1
4,[i am very disappointed with the fitbit charge...,1,0,0


In [59]:
df1["Customizability"] = ""

terms = ['customize', 'custom', 'personalize', 'personalization']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Customizability'] = 1
    else:
        df1.at[i,'Customizability'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability
0,"[, they work they are great , however , in ...",1,0,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0
4,[i am very disappointed with the fitbit charge...,1,0,0,0


In [60]:
df1["Perceived Ease of Use"] = ""

terms = ['easy', 'clear', 'understandable', 'easy to use', 'mental effort', 
         'trouble free', 'simple', 'controllable', 'easy to learn']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Perceived Ease of Use'] = 1
    else:
        df1.at[i,'Perceived Ease of Use'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use
0,"[, they work they are great , however , in ...",1,0,0,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0


In [61]:
df1["Appeal"] = ""

terms = ['cool', 'appearance', 'aesthetic', 'stylish', 'fashion', 'accessory', 'pleasing aesthetics', 'color', 
         'design', 'texture', 'uniqueness', 'unique', 'size', 'look']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Appeal'] = 1
    else:
        df1.at[i,'Appeal'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal
0,"[, they work they are great , however , in ...",1,0,0,0,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0


In [62]:
df1["Perceived Value"] = ""

terms = ['price', 'value', 'cost', 'monetary', 'money']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Perceived Value'] = 1
    else:
        df1.at[i,'Perceived Value'] = 0

df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value
0,"[, they work they are great , however , in ...",1,0,0,0,0,0,1
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0,0
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0,0


In [63]:
# sleep tracking	648       # track of	488    # does everything	439   # rate monitor	382
# ability to	370    # working out	359     # blood pressure	326     # keep track	284
# fitness tracking	269    It tracks	155    everything I need	202    #keep track of	267
# my heart rate	405      # to keep track	135    # to track my	411     # use it for	181
# use it to	134  # was able to	317 # and heart rate	69 # everything I need it	79 #has all the features	56
# heart rate	57   #it to track my	64  #keep track of my	134  # steps	71   #helps me keep track of	25   
#I use it every day	25    #I use it to track	24   track my heart rate	24    track my steps and	33

df1["Perceived Usefulness"] = ""

terms = ['rejuvenated', 'simplify', 'control over', 'reduce', 'accomplish more', 'save time', 'work more quickly',
        'accomplish','accomplished','accomplishes','accomplishing','achieve','achieved','achieves','achieving',
        'advantageous ','beneficial','check','checked','checking','checks','complete','completes','completing',
        'completing','convenient','conveniently','develop','developed','developing','develops','effective',
        'effectively','effectiveness','enable','enables','enhance','enhanced','enhances','enhancing','handle',
        'handled','handles','help','helpful','helps','improve','improved','improves ','improving','increase',
        'increased','increases','increasing','maintain','maintained ','maintaining','manage','managed','manages',
        'managing','monitor','monitored','monitoring','monitors','obtain','obtained','obtaining','obtains',
        'perform','performed','performing','performs','provide','provided','provides','providing','reduce',
        'reduced','reduces','reducing','save','saved','saves','saving','useful', 'track', 'does']


context = ['act','activities','concerns','convenience','daily activities','daily health','daily healthcare',
           'daily life','efficiency','effort','exercise','fitness','goals','good features','health condition',
           'health information','health management','health monitoring','healthcare','healthcare activities',
           'healthy habits','healthy status','information','job','learning achievement','life',
           'life more convenient ','life more effective','managing my health','managing my health',
           'meeting my needs','meeting my needs','performance','physical activities','physical health',
           'productivity','quality','quality of my life','safety','service','stress','task','tasks','things',
           'things more quickly','time','work satisfaction','activity','acts','concern','exercises','goal',
           'good feature','health conditions','healthcare activity','healthy habit','jobs','manage health',
           'manage my heakth','managing health','meeting need','meeting needs','physical activity','quality of life',
           'services','task','tasks','manages my health','meet needs','manages health','meet need', 'steps','calories',
           'distance','floors','active minutes','sleep time','sleep quality', 'heart rate', 'everything']



for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviewswc(df1['Full Text'][i], terms, context) == 1):
        df1.at[i,'Perceived Usefulness'] = 1
    else:
        df1.at[i,'Perceived Usefulness'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness
0,"[, they work they are great , however , in ...",1,0,0,0,0,0,1,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0,0,0
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0,0,0
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0,0,0


In [64]:
df1["Device Quality"] = ""

terms = ['quality', 'system quality', 'service quality', 'battery life', 'charge', 'long time', 'battery last']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Device Quality'] = 1
    else:
        df1.at[i,'Device Quality'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality
0,"[, they work they are great , however , in ...",1,0,0,0,0,0,1,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0,0,0,1
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0,0,0,0
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0,0,0,1


In [65]:
df1["Credibility Support"] = ""

terms = ['accuracy', 'accurate']

for i in range(len(df1['Full Text'])):
    if(i%5000 == 0):
        print(i)
    if(encodereviews(df1['Full Text'][i], terms) == 1):
        df1.at[i,'Credibility Support'] = 1
    else:
        df1.at[i,'Credibility Support'] = 0
        
df1.head()

0
5000
10000
15000
20000
25000


Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality,Credibility Support
0,"[, they work they are great , however , in ...",1,0,0,0,0,0,1,0,0,0
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0,0,0,1,1
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0,0,0,0,0
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0,0,0,0,1
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0,0,0,1,0


In [66]:
df2 = df1
df2["Sum"] = ""

In [67]:
for i in range(len(df2['Full Text'])):
    df2['Sum'][i] = df2['Perceived Ease of Use'][i] + df2['Hedonic Motivation'][i] + \
                df2['Connectivity Support'][i] + df2['Customizability'][i] + df2['Appeal'][i] + \
                df2['Perceived Value'][i] + df2['Perceived Value'][i] + df2['Perceived Usefulness'][i] + \
                df2['Device Quality'][i] + df2['Credibility Support'][i]

In [68]:
df2.head()

Unnamed: 0,Full Text,Rating,Hedonic Motivation,Connectivity Support,Customizability,Perceived Ease of Use,Appeal,Perceived Value,Perceived Usefulness,Device Quality,Credibility Support,Sum
0,"[, they work they are great , however , in ...",1,0,0,0,0,0,1,0,0,0,2
1,"[i have a garmin vivosmart 4 , this fitbit ch...",1,0,0,0,0,0,0,0,1,1,2
2,"[absolute trash , seriously felt like i bough...",1,1,0,0,0,0,0,0,0,0,1
3,"[i got this to pair with my apple watch , cha...",1,0,1,0,1,0,0,0,0,1,3
4,[i am very disappointed with the fitbit charge...,1,0,0,0,0,0,0,0,1,0,1


In [69]:
df2.shape

(27354, 12)

In [70]:
df2 = df2[df2.Sum != 0]
df2.shape

(14906, 12)

In [71]:
df2.to_csv('ProcessedReviewsv5.csv')