In [357]:
import pandas as pd
import numpy as np
import nltk
import regex
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.wordnet import WordNetLemmatizer 
from sklearn.feature_extraction.text import TfidfVectorizer
import stanza
stanza.download('en') # download English model
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.2.0.json: 128kB [00:00, 125MB/s]
2021-03-08 18:47:21 INFO: Downloading default packages for language: en (English)...
2021-03-08 18:47:22 INFO: File exists: C:\Users\vibkr\stanza_resources\en\default.zip.
2021-03-08 18:47:25 INFO: Finished downloading models and saved to C:\Users\vibkr\stanza_resources.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [358]:
def feature_extraction(txt, nlp):

    sentList = nltk.sent_tokenize(txt)

    retlist = [];
    
    for line in sentList:
        
        txt_list = nltk.word_tokenize(line)
        taggedList = nltk.pos_tag(txt_list)
        
        newwordList = []
        flag = 0
        for i in range(0,len(taggedList)-1):
            if(taggedList[i][1]=="NN" and taggedList[i+1][1]=="NN"):
                newwordList.append(taggedList[i][0]+taggedList[i+1][0])
                flag=1
            else:
                if(flag==1):
                    flag=0
                    continue
                newwordList.append(taggedList[i][0])
                if(i==len(taggedList)-2):
                    newwordList.append(taggedList[i+1][0])
        finaltxt = ' '.join(word for word in newwordList)
    
    
        stop_words = set(stopwords.words('english'))
        new_txt_list = nltk.word_tokenize(finaltxt)
        wordsList = [w for w in new_txt_list if not w in stop_words]
        taggedList = nltk.pos_tag(wordsList)
        
        doc = nlp(finaltxt)
        dep_node = []
        try:
            for dep_edge in doc.sentences[0].dependencies:
                dep_node.append([dep_edge[2].text, dep_edge[0].id, dep_edge[1]])
            for i in range(0, len(dep_node)):
                if (int(dep_node[i][1]) != 0):
                    dep_node[i][1] = newwordList[(int(dep_node[i][1]) - 1)]
        except:
            pass;
        
        #print(dep_node)
        
        featureList = []
        categories = []
        for i in taggedList:
            if(i[1]=='JJ' or i[1]=='NN' or i[1]=='JJR' or i[1]=='NNS' or i[1]=='RB'):
                featureList.append(list(i))
                categories.append(i[0])
        #print(featureList)
        #print(categories)
        
        
        fcluster = []
        for i in featureList:
            filist = []
            for j in dep_node:
                if((j[0]==i[0] or j[1]==i[0]) and (j[2] in [
                    # Different types of words that are identified as potential features
                    "nsubj",
                    "acl:relcl",
                    "obj",
                    "dobj",
                    "agent",
                    "advmod",
                    "amod",
                    "neg",
                    "prep_of",
                    "acomp",
                    "xcomp",
                    "compound"
                ])):
                    if(j[0]==i[0]):
                        filist.append(j[1])
                    else:
                        filist.append(j[0])
            fcluster.append([i[0], filist])
        print(fcluster) 
        
        # Remove all features with no sentiment word:
        
        retlist.append(fcluster)
    return retlist;
    

In [359]:
def do_extraction(df, nlp, feat_count, feat_sent, content_str = "Content"):
    idx = 0;
    # Replace "" with nan's for removal
    df[content_str].replace('', np.nan, inplace=True)
    df.dropna(subset=[content_str], inplace=True)
    review_list = df[content_str].to_list()
    #feat_count = dict()
    #feat_sent = dict()
    #nlp = stanza.Pipeline('en')
    
    
    
    
    print(" Processing : " , df.shape[0], "rows of data")
    for review in tqdm(review_list):
        print("Review Number : ", idx);
        
        # Some data pre-processing
        
        review = review.lower()
        
        # Merge hyphenated words
        separate = review.split('-')
        review = ''.join(separate)
        
        # Remove non-alphabets
        review = re.sub(r'[^a-z\s\t.!?]', '', review)
        
        idx += 1;
        if idx >= df.shape[0]:
            break;
        try:
            output = feature_extraction(review, nlp);
        except:
            pass;
        for sent in output:
            for pair in sent:
                print(pair)
                if pair[0] in feat_sent:
                    if pair[1] is not None:
                        flist = feat_sent[pair[0]]
                        if isinstance(pair[1], list):
                            for i in pair[1]:
                                flist.append(i)
                        else:
                            flist.append(pair[1])
                        feat_sent[pair[0]] = flist;
                else:
                    if pair[1] is not None:
                        flist = pair[1]
                    else:
                        flist = list()
                    feat_sent[pair[0]] = flist;
                
                if pair[0] in feat_count:
                    feat_count[pair[0]] = feat_count[pair[0]] + 1;
                else:
                    feat_count[pair[0]] = 1
    
    return feat_count, feat_sent;

In [360]:
def get_sentiment(feat_count, feat_sent, nlp):

    sentiment_score = dict()

    # Delete features with no descriptors
    cob = feat_sent.copy()
    for feat in cob.keys():
        #print(cob[feat])
        
        if cob[feat] == []:
            del feat_sent[feat]
        else:
            feat_sent[feat] = ' ,'.join(feat_sent[feat])

    # Run pre-built sentiment score and take avg of all descriptors
    for f in tqdm(feat_sent.keys()):
        print("Calculating Sentiment for: ", f);
        ssum = 0;
        for g in feat_sent[f]:
            try:
                doc = nlp(g);

                for i in doc.sentences:

                        #print(i.sentiment)
                        ssum += i.sentiment;
            except:
                pass;

        sentiment_score[f] = ssum / len(b[f])

    adf = pd.DataFrame.from_dict(feat_count, orient='index', columns=['Freq'])
    adf.sort_values(by="Freq", ascending=False, inplace = True)

    

    avg_sent = pd.DataFrame.from_dict(sentiment_score, orient='index', columns=["Avg_sent"])
    desc_words = pd.DataFrame.from_dict(feat_sent, orient="index", columns=["Descriptors"])
    
    avg_sent = avg_sent.merge(desc_words, left_index=True, right_index=True)
    
    
    final_sent = avg_sent.merge(adf, left_index=True, right_index=True)
    final_sent.sort_values(by="Freq", ascending=False, inplace=True)
    return final_sent;

In [361]:
rdr = pd.read_csv('../ScrapedOutput/cmpb.csv')

nlp = stanza.Pipeline('en')
a = dict()
b = dict()
a, b = do_extraction(rdr, nlp, a, b)

2021-03-08 18:47:25 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| pos       | combined  |
| lemma     | combined  |
| depparse  | combined  |
| sentiment | sstplus   |
| ner       | ontonotes |

2021-03-08 18:47:25 INFO: Use device: cpu
2021-03-08 18:47:25 INFO: Loading: tokenize
2021-03-08 18:47:25 INFO: Loading: pos
2021-03-08 18:47:25 INFO: Loading: lemma
2021-03-08 18:47:25 INFO: Loading: depparse
2021-03-08 18:47:25 INFO: Loading: sentiment
2021-03-08 18:47:26 INFO: Loading: ner
2021-03-08 18:47:26 INFO: Done loading processors!
  0%|                                                                                           | 0/52 [00:00<?, ?it/s]

 Processing :  52 rows of data
Review Number :  0
[['professional', ['very', 'people']], ['people', ['professional', 'there']]]


  2%|█▌                                                                                 | 1/52 [00:00<00:19,  2.60it/s]

[['patient', ['very', 'recommended']], ['kind', ['respectful']], ['respectful', ['kind']], ['..', []], ['smooth', ['very']], ['medical', ['checkup']], ['checkup', ['medical']]]
['professional', ['very', 'people']]
['people', ['professional', 'there']]
['patient', ['very', 'recommended']]
['kind', ['respectful']]
['respectful', ['kind']]
['..', []]
['smooth', ['very']]
['medical', ['checkup']]
['checkup', ['medical']]
Review Number :  1


  4%|███▏                                                                               | 2/52 [00:00<00:20,  2.43it/s]

[['reviews', ['other', 'suggest']], ['people', ['suggest', 'here']], ['adequately', ['friendlylike']], ['anywhere', ['friendlylike', 'else']], ['else', ['anywhere']], ['sg', []]]
[['place', ['clean']], ['really', ['clean']], ['clean', ['place', 'really']], ['efficient', []]]
['reviews', ['other', 'suggest']]
['people', ['suggest', 'here']]
['adequately', ['friendlylike']]
['anywhere', ['friendlylike', 'else']]
['else', ['anywhere']]
['sg', []]
['place', ['clean']]
['really', ['clean']]
['clean', ['place', 'really']]
['efficient', []]
Review Number :  2
[['place', ['fine']], ['overall', ['fine']], ['fine', ['place', 'overall']]]
[['good', ['experience']], ['experience', ['good']]]
[['tip', ['just']], ['u', []], ['medical', ['checkup']], ['checkup', ['medical']]]
[['dont', []], ['late', ['go']], ['else', []], ['u', ['have']], ['back', ['come']], ['day', ['other']], ['complete', ['rest']], ['rest', ['complete']]]
['place', ['fine']]
['overall', ['fine']]
['fine', ['place', 'overall']]
['g

  6%|████▊                                                                              | 3/52 [00:01<00:24,  2.02it/s]

['rest', ['complete']]
Review Number :  3


  8%|██████▍                                                                            | 4/52 [00:01<00:25,  1.91it/s]

[['nscheckup', []], ['checkuptoday', []], ['august', ['am']], ['gateentrance', ['checkcounter']], ['entrancesecurity', ['checkcounter']], ['checkcounter', ['gateentrance', 'entrancesecurity', 'securitycheck']], ['stickerpass', ['take']], ['walk', ['just']], ['gate', []], ['dont', []], ['thinking', []], ['scan', ['i', 'the']]]
['nscheckup', []]
['checkuptoday', []]
['august', ['am']]
['gateentrance', ['checkcounter']]
['entrancesecurity', ['checkcounter']]
['checkcounter', ['gateentrance', 'entrancesecurity', 'securitycheck']]
['stickerpass', ['take']]
['walk', ['just']]
['gate', []]
['dont', []]
['thinking', []]
['scan', ['i', 'the']]
Review Number :  4
[['dontbother', ['end']], ['youll', []], ['end', ['dontbother', 'waiting']], ['hours', ['more']], ['realise', ['only', 'that', 'youre', 'person']], ['youre', ['realise']], ['last', ['person']], ['person', ['last', 'realise']], ['line', []]]


 10%|███████▉                                                                           | 5/52 [00:02<00:28,  1.63it/s]

[['doctors', ['care']], ['really', ['care', 'have']], ['fair', ['which']], ['dont', []], ['really', ['care', 'have']], ['choice', ['have']]]
[['place', ['waste']], ['complete', ['waste']], ['waste', ['place', 'complete']], ['space', []], ['time', []]]
['dontbother', ['end']]
['youll', []]
['end', ['dontbother', 'waiting']]
['hours', ['more']]
['realise', ['only', 'that', 'youre', 'person']]
['youre', ['realise']]
['last', ['person']]
['person', ['last', 'realise']]
['line', []]
['doctors', ['care']]
['really', ['care', 'have']]
['fair', ['which']]
['dont', []]
['really', ['care', 'have']]
['choice', ['have']]
['place', ['waste']]
['complete', ['waste']]
['waste', ['place', 'complete']]
['space', []]
['time', []]
Review Number :  5


 12%|█████████▌                                                                         | 6/52 [00:02<00:20,  2.20it/s]

[['nsf', ['reviews']], ['reviews', ['nsf']], ['lol', []]]
['nsf', ['reviews']]
['reviews', ['nsf']]
['lol', []]
Review Number :  6
[['extremely', ['long']], ['long', ['extremely', 'time']], ['time', ['long', 'takes']], ['due', []], ['waiting', []]]


 13%|███████████▏                                                                       | 7/52 [00:03<00:17,  2.53it/s]

[['overall', ['waste']], ['complete', ['waste']], ['waste', ['overall', 'complete']], ['time', []]]
['extremely', ['long']]
['long', ['extremely', 'time']]
['time', ['long', 'takes']]
['due', []]
['waiting', []]
['overall', ['waste']]
['complete', ['waste']]
['waste', ['overall', 'complete']]
['time', []]
Review Number :  7
[['staff', ['professional']], ['professional', ['staff']], ['knows', []]]


 15%|████████████▊                                                                      | 8/52 [00:03<00:19,  2.31it/s]

[['idk', []], ['bad', ['reviews']], ['personal', ['experienceeveryone']], ['experienceeveryone', ['personal']], ['helpful', ['very', 'is']], ['initiative', ['take']], ['help', ['me']]]
['staff', ['professional']]
['professional', ['staff']]
['knows', []]
['idk', []]
['bad', ['reviews']]
['personal', ['experienceeveryone']]
['experienceeveryone', ['personal']]
['helpful', ['very', 'is']]
['initiative', ['take']]
['help', ['me']]
Review Number :  8
[['unfriendly', ['staff']], ['staff', ['unfriendly']]]
[['guards', ['doing']], ['job', ['doing']], ['staff', ['keep']], ['stuff', ['more', 'do', 'suppose']], ['suppose', ['they', 'not', 'stuff']]]
[['absolutely', ['atrocious']], ['atrocious', ['absolutely']]]


 17%|██████████████▎                                                                    | 9/52 [00:04<00:26,  1.61it/s]

[['woman', ['is']], ['keeps', ['who', 'vac', 'changing']], ['tone', ['changing']], ['talks', ['she']], ['people', ['threatens']], ['people', ['threatens']]]
[['always', ['removes']], ['mask', ['removes']], ['talk', []], ['people', []], ['expressions', ['show']]]
['unfriendly', ['staff']]
['staff', ['unfriendly']]
['guards', ['doing']]
['job', ['doing']]
['staff', ['keep']]
['stuff', ['more', 'do', 'suppose']]
['suppose', ['they', 'not', 'stuff']]
['absolutely', ['atrocious']]
['atrocious', ['absolutely']]
['woman', ['is']]
['keeps', ['who', 'vac', 'changing']]
['tone', ['changing']]
['talks', ['she']]
['people', ['threatens']]
['people', ['threatens']]
['always', ['removes']]
['mask', ['removes']]
['talk', []]
['people', []]
['expressions', ['show']]
Review Number :  9


 19%|███████████████▊                                                                  | 10/52 [00:04<00:20,  2.01it/s]

[['staff', ['rude']], ['medical', ['screening']], ['screening', ['medical', 'station']], ['station', ['screening']], ['weight', []], ['extremely', ['rude']], ['rude', ['staff', 'extremely']], ['unfriendly', []]]
['staff', ['rude']]
['medical', ['screening']]
['screening', ['medical', 'station']]
['station', ['screening']]
['weight', []]
['extremely', ['rude']]
['rude', ['staff', 'extremely']]
['unfriendly', []]
Review Number :  10
[['staff', ['impatient']], ['serious', ['impatient']], ['impatient', ['staff', 'not', 'serious']]]


 21%|█████████████████▎                                                                | 11/52 [00:05<00:19,  2.15it/s]

[['undesirably', ['long']], ['long', ['undesirably', 'times']], ['times', ['long', 'waiting']]]
[['cmpb', ['recommend']], ['friend', []]]
['staff', ['impatient']]
['serious', ['impatient']]
['impatient', ['staff', 'not', 'serious']]
['undesirably', ['long']]
['long', ['undesirably', 'times']]
['times', ['long', 'waiting']]
['cmpb', ['recommend']]
['friend', []]
Review Number :  11
[['sent', ['just', 'son']], ['son', ['sent']], ['preenlistment', ['enlistmentcheckup']], ['enlistmentcheckup', ['preenlistment']], ['morning', []]]
[['guard', ['give']], ['give', ['guard', 'instructions']], ['clear', ['instructions']], ['instructions', ['clear', 'give']]]


 23%|██████████████████▉                                                               | 12/52 [00:06<00:21,  1.83it/s]

[['son', ['got']], ['alight', ['got']], ['couldnt', []], ['drive', ['we', 'in']]]
[['helloplease', []], ['train', ['army']], ['army', ['train']]]
['sent', ['just', 'son']]
['son', ['sent']]
['preenlistment', ['enlistmentcheckup']]
['enlistmentcheckup', ['preenlistment']]
['morning', []]
['guard', ['give']]
['give', ['guard', 'instructions']]
['clear', ['instructions']]
['instructions', ['clear', 'give']]
['son', ['got']]
['alight', ['got']]
['couldnt', []]
['drive', ['we', 'in']]
['helloplease', []]
['train', ['army']]
['army', ['train']]
Review Number :  12
[['inconvenient', ['most', 'locations']], ['locations', ['inconvenient', 'seen']], ['ever', ['seen']]]
[['terrible', ['directions']], ['directions', ['terrible']]]
[['rude', ['staff']], ['staff', ['rude']]]


 25%|████████████████████▌                                                             | 13/52 [00:06<00:21,  1.79it/s]

[['expect', ['process']], ['whole', ['process']], ['process', ['whole', 'expect']], ['take', ['hours']], ['hours', ['take']]]
[]
['inconvenient', ['most', 'locations']]
['locations', ['inconvenient', 'seen']]
['ever', ['seen']]
['terrible', ['directions']]
['directions', ['terrible']]
['rude', ['staff']]
['staff', ['rude']]
['expect', ['process']]
['whole', ['process']]
['process', ['whole', 'expect']]
['take', ['hours']]
['hours', ['take']]
Review Number :  13
[['ok', []], ['lah', []], ['review', []], ['visitjanuary', []], ['maybe', ['sikit']], ['sikit', ['so', 'maybe']], ['date', []]]
[['nsf', ['staff']], ['staff', ['nsf', 'ok']], ['ok', ['staff', 'typical']], ['typical', ['ok']], ['bochap', []], ['happy', []], ['bird', []], ['tio', ['switch', 'vocation']], ['switch', ['tio']], ['vocation', ['tio']]]
[['mo', ['seemed']], ['hand', ['other']], ['si', []], ['pehbuay', ['buaysong']], ['buaysong', ['pehbuay', 'seemed']]]


 29%|███████████████████████▋                                                          | 15/52 [00:07<00:17,  2.06it/s]

[['probably', ['this']]]
['ok', []]
['lah', []]
['review', []]
['visitjanuary', []]
['maybe', ['sikit']]
['sikit', ['so', 'maybe']]
['date', []]
['nsf', ['staff']]
['staff', ['nsf', 'ok']]
['ok', ['staff', 'typical']]
['typical', ['ok']]
['bochap', []]
['happy', []]
['bird', []]
['tio', ['switch', 'vocation']]
['switch', ['tio']]
['vocation', ['tio']]
['mo', ['seemed']]
['hand', ['other']]
['si', []]
['pehbuay', ['buaysong']]
['buaysong', ['pehbuay', 'seemed']]
['probably', ['this']]
Review Number :  14
[['tbh', []], ['bad', ['not', 'that']], ['place', []], ['visit', ['contrary']], ['contrary', ['visit']], ['others', ['saying']]]
['tbh', []]
['bad', ['not', 'that']]
['place', []]
['visit', ['contrary']]
['contrary', ['visit']]
['others', ['saying']]
Review Number :  15
[['please', []], ['sure', ['make']], ['medical', ['conditions']], ['conditions', ['medical', 'declare']]]
[['severe', []], ['minor', []], ['medical', ['officer']], ['officer', ['medical']], ['checkup', []]]


 31%|█████████████████████████▏                                                        | 16/52 [00:08<00:19,  1.87it/s]

[['believe', ['you', 'not']], ['well', ['fare']], ['combatpesfitbmt', []], ['medical', ['specialistletter']], ['specialistletter', ['medical', 'get']]]
['please', []]
['sure', ['make']]
['medical', ['conditions']]
['conditions', ['medical', 'declare']]
['severe', []]
['minor', []]
['medical', ['officer']]
['officer', ['medical']]
['checkup', []]
['believe', ['you', 'not']]
['well', ['fare']]
['combatpesfitbmt', []]
['medical', ['specialistletter']]
['specialistletter', ['medical', 'get']]
Review Number :  16
[['guards', ['rude']], ['rude', ['guards', 'very']]]
[['ask', ['question']], ['question', ['ask', 'ignore']], ['ignore', ['they', 'question', 'you']]]


 35%|████████████████████████████▍                                                     | 18/52 [00:08<00:13,  2.54it/s]

[['rest', ['nice']], ['staff', []], ['nice', ['rest', 'though']], ['friendly', []]]
['guards', ['rude']]
['rude', ['guards', 'very']]
['ask', ['question']]
['question', ['ask', 'ignore']]
['ignore', ['they', 'question', 'you']]
['rest', ['nice']]
['staff', []]
['nice', ['rest', 'though']]
['friendly', []]
Review Number :  17
[['overall', ['experience']], ['great', ['experience']], ['experience', ['overall', 'great']], ['medic', ['professional']], ['professional', ['medic']], ['blooddraw', []]]
['overall', ['experience']]
['great', ['experience']]
['experience', ['overall', 'great']]
['medic', ['professional']]
['professional', ['medic']]
['blooddraw', []]
Review Number :  18
[['kind', ['very', 'people']], ['people', ['kind']], ['cmpd', []], ['medical', ['check']], ['check', ['medical', 'up']]]


 37%|█████████████████████████████▉                                                    | 19/52 [00:09<00:12,  2.74it/s]

[['constantly', ['greeted']], ['smile', []], ['patience', []]]
['kind', ['very', 'people']]
['people', ['kind']]
['cmpd', []]
['medical', ['check']]
['check', ['medical', 'up']]
['constantly', ['greeted']]
['smile', []]
['patience', []]
Review Number :  19
[['dont', []], ['bully', ['me']]]
['dont', []]
['bully', ['me']]
Review Number :  20
[['meh', []]]


 40%|█████████████████████████████████                                                 | 21/52 [00:09<00:07,  3.95it/s]

[['staff', ['nice']], ['pretty', ['nice']], ['nice', ['staff', 'pretty']]]
['meh', []]
['staff', ['nice']]
['pretty', ['nice']]
['nice', ['staff', 'pretty']]
Review Number :  21
[['lousy', ['dk']], ['service', ['dk']], ['dk', ['lousy', 'service', 'help']], ['help', ['dk', 'people']], ['people', ['help']]]


 42%|██████████████████████████████████▋                                               | 22/52 [00:09<00:07,  3.81it/s]

[['ask', ['question', 'taiji']], ['question', ['ask']], ['also', ['say']], ['dont', []], ['ask', ['question', 'taiji']], ['taiji', ['not', 'ask']]]
['lousy', ['dk']]
['service', ['dk']]
['dk', ['lousy', 'service', 'help']]
['help', ['dk', 'people']]
['people', ['help']]
['ask', ['question', 'taiji']]
['question', ['ask']]
['also', ['say']]
['dont', []]
['ask', ['question', 'taiji']]
['taiji', ['not', 'ask']]
Review Number :  22
[['tuesday', []], ['negative', ['reviews']], ['reviews', ['negative']], ['share', ['i', 'opinion']], ['quick', ['opinion']], ['personal', ['opinion']], ['opinion', ['quick', 'personal', 'share']]]


 44%|████████████████████████████████████▎                                             | 23/52 [00:10<00:10,  2.85it/s]

[['perhaps', ['varies']], ['varies', ['perhaps', 'it']], ['person', []], ['person', []], ['trip', ['great']], ['cmpb', []], ['ultimately', ['great']], ['great', ['trip', 'ultimately']], ['definitely', ['experience']], ['memorable', ['experience']], ['experience', ['definitely', 'memorable']]]
[['medical', []]]
['tuesday', []]
['negative', ['reviews']]
['reviews', ['negative']]
['share', ['i', 'opinion']]
['quick', ['opinion']]
['personal', ['opinion']]
['opinion', ['quick', 'personal', 'share']]
['perhaps', ['varies']]
['varies', ['perhaps', 'it']]
['person', []]
['person', []]
['trip', ['great']]
['cmpb', []]
['ultimately', ['great']]
['great', ['trip', 'ultimately']]
['definitely', ['experience']]
['memorable', ['experience']]
['experience', ['definitely', 'memorable']]
['medical', []]
Review Number :  23
[['medical', ['check']], ['check', ['medical', 'place']], ['place', ['check']], ['saf', []]]
['medical', ['check']]


 48%|███████████████████████████████████████▍                                          | 25/52 [00:10<00:06,  4.04it/s]

['check', ['medical', 'place']]
['place', ['check']]
['saf', []]
Review Number :  24
[['others', []], ['dirt', []]]
['others', []]
['dirt', []]
Review Number :  25


 50%|█████████████████████████████████████████                                         | 26/52 [00:10<00:07,  3.61it/s]

[['preenlistment', ['screening']], ['sessions', ['few', 'counselling', 'screening']], ['charge', ['answer']]]
[['inconvenient', ['location']], ['location', ['inconvenient']]]
['preenlistment', ['screening']]
['sessions', ['few', 'counselling', 'screening']]
['charge', ['answer']]
['inconvenient', ['location']]
['location', ['inconvenient']]
Review Number :  26
[['cookhouse', []]]


 52%|██████████████████████████████████████████▌                                       | 27/52 [00:11<00:06,  3.69it/s]

[['nsf', ['need']], ['meagre', ['pay']], ['pay', ['meagre']]]
['cookhouse', []]
['nsf', ['need']]
['meagre', ['pay']]
['pay', ['meagre']]
Review Number :  27
[['officerattitude', ['good']], ['good', ['officerattitude', 'not']], ['patience', []], ['service', []]]


 54%|████████████████████████████████████████████▏                                     | 28/52 [00:11<00:07,  3.01it/s]

[['dont', []], ['understand', ['i', 'just', 'what']], ['use', ['he', 'tone']], ['unfriendly', ['tone']], ['tone', ['unfriendly', 'use']], ['repeat', []], ['language', []]]
['officerattitude', ['good']]
['good', ['officerattitude', 'not']]
['patience', []]
['service', []]
['dont', []]
['understand', ['i', 'just', 'what']]
['use', ['he', 'tone']]
['unfriendly', ['tone']]
['tone', ['unfriendly', 'use']]
['repeat', []]
['language', []]
Review Number :  28


 58%|███████████████████████████████████████████████▎                                  | 30/52 [00:12<00:06,  3.50it/s]

[['idk', []], ['many', ['so', 'people']], ['people', ['many', 'give']], ['negative', ['reviews']], ['reviews', ['negative', 'give']], ['medical', ['check']], ['check', ['medical', 'up']], ['staff', ['friendly']], ['friendly', ['staff']], ['nsf', ['cool']], ['cool', ['nsf']], ['overall', ['had']], ['good', ['experience']], ['experience', ['good', 'had', 'there']]]
['idk', []]
['many', ['so', 'people']]
['people', ['many', 'give']]
['negative', ['reviews']]
['reviews', ['negative', 'give']]
['medical', ['check']]
['check', ['medical', 'up']]
['staff', ['friendly']]
['friendly', ['staff']]
['nsf', ['cool']]
['cool', ['nsf']]
['overall', ['had']]
['good', ['experience']]
['experience', ['good', 'had', 'there']]
Review Number :  29
[['gold', ['star']], ['star', ['gold']], ['public', ['service']], ['service', ['public']]]
['gold', ['star']]
['star', ['gold']]
['public', ['service']]
['service', ['public']]
Review Number :  30
[['hrs', ['here']], ['form', []], ['meeting', []]]
[['even', ['bor

 62%|██████████████████████████████████████████████████▍                               | 32/52 [00:12<00:05,  3.80it/s]

['cold', ['conditioning']]
['air', ['conditioning']]
['wifi', []]
Review Number :  31
[['accessible', ['not']]]
['accessible', ['not']]
Review Number :  32


 63%|████████████████████████████████████████████████████                              | 33/52 [00:12<00:04,  4.38it/s]

[['extremely', ['poor']], ['poor', ['extremely', 'customerservice']], ['rude', []], ['customerservice', ['poor']]]
['extremely', ['poor']]
['poor', ['extremely', 'customerservice']]
['rude', []]
['customerservice', ['poor']]
Review Number :  33
[['worst', ['day']], ['day', ['worst']], ['life', []]]
['worst', ['day']]
['day', ['worst']]
['life', []]
Review Number :  34


 67%|███████████████████████████████████████████████████████▏                          | 35/52 [00:13<00:03,  5.54it/s]

[['wooo', []], ['real', ['edgy']], ['edgy', ['real', 'ziyuan']], ['ziyuan', ['edgy', 'writer']], ['novel', ['writer']], ['writer', ['ziyuan', 'you', 'novel']]]
['wooo', []]
['real', ['edgy']]
['edgy', ['real', 'ziyuan']]
['ziyuan', ['edgy', 'writer']]
['novel', ['writer']]
['writer', ['ziyuan', 'you', 'novel']]
Review Number :  35
[['highly', ['inaccessible']], ['inaccessible', ['highly']]]


 71%|██████████████████████████████████████████████████████████▎                       | 37/52 [00:13<00:02,  5.67it/s]

[['hard', ['so']], ['get', ['there']], ['mrt', ['stations']], ['stations', ['not', 'mrt']]]
['highly', ['inaccessible']]
['inaccessible', ['highly']]
['hard', ['so']]
['get', ['there']]
['mrt', ['stations']]
['stations', ['not', 'mrt']]
Review Number :  36
[['bane', []], ['existence', []]]
['bane', []]
['existence', []]
Review Number :  37
[['cookhouse', []], ['book', ['get', 'everyday']], ['everyday', ['book']]]
[['troublesome', ['most', 'thing']], ['thing', ['troublesome', 'discussing']], ['whats', ['discussing']], ['lunch', []]]


 75%|█████████████████████████████████████████████████████████████▌                    | 39/52 [00:14<00:03,  4.29it/s]

[['sidenotecanteen', ['canteenb']], ['canteenb', ['sidenotecanteen', 'bad']], ['bad', ['canteenb', 'real']], ['real', ['bad']], ['bad', ['canteenb', 'real']]]
['cookhouse', []]
['book', ['get', 'everyday']]
['everyday', ['book']]
['troublesome', ['most', 'thing']]
['thing', ['troublesome', 'discussing']]
['whats', ['discussing']]
['lunch', []]
['sidenotecanteen', ['canteenb']]
['canteenb', ['sidenotecanteen', 'bad']]
['bad', ['canteenb', 'real']]
['real', ['bad']]
['bad', ['canteenb', 'real']]
Review Number :  38
[['people', ['go']], ['even', ['go']], ['place', []], ['middle', []], ['nowhere', []]]
['people', ['go']]
['even', ['go']]
['place', []]
['middle', []]
['nowhere', []]
Review Number :  39


 77%|███████████████████████████████████████████████████████████████                   | 40/52 [00:14<00:03,  3.86it/s]

[['officertalk', ['prepared']], ['money', ['own']], ['hard', ['very']], ['middle', []], ['village', []], ['something', []]]
['officertalk', ['prepared']]
['money', ['own']]
['hard', ['very']]
['middle', []]
['village', []]
['something', []]
Review Number :  40
[['rude', ['staff']], ['staff', ['rude']]]
['rude', ['staff']]
['staff', ['rude']]
Review Number :  41


 83%|███████████████████████████████████████████████████████████████████▊              | 43/52 [00:14<00:01,  5.28it/s]

[['inaccessible', ['need']], ['need', ['inaccessible']], ['h', []], ['time', ['travel']], ['waste', ['more']], ['time', ['travel']]]
['inaccessible', ['need']]
['need', ['inaccessible']]
['h', []]
['time', ['travel']]
['waste', ['more']]
['time', ['travel']]
Review Number :  42
[['far', ['so', 'away']], ['away', ['far']], ['middle', []], ['nowhere', []]]
['far', ['so', 'away']]
['away', ['far']]
['middle', []]
['nowhere', []]
Review Number :  43


 87%|██████████████████████████████████████████████████████████████████████▉           | 45/52 [00:15<00:01,  5.77it/s]

[['bad', ['very', 'troopers', 'very', 'attitude']], ['security', ['troopers']], ['troopers', ['bad', 'security', 'have']], ['bad', ['very', 'troopers', 'very', 'attitude']], ['attitude', ['bad', 'have']], ['towards', []], ['public', []]]
['bad', ['very', 'troopers', 'very', 'attitude']]
['security', ['troopers']]
['troopers', ['bad', 'security', 'have']]
['bad', ['very', 'troopers', 'very', 'attitude']]
['attitude', ['bad', 'have']]
['towards', []]
['public', []]
Review Number :  44
[['middle', []], ['nowhere', []]]
['middle', []]
['nowhere', []]
Review Number :  45


 88%|████████████████████████████████████████████████████████████████████████▌         | 46/52 [00:15<00:01,  5.40it/s]

[['place', []], ['well', ['kept']], ['people', ['kept']], ['unbelievably', ['rude']], ['rude', ['unbelievably']]]
['place', []]
['well', ['kept']]
['people', ['kept']]
['unbelievably', ['rude']]
['rude', ['unbelievably']]
Review Number :  46
[['bad', ['service']], ['service', ['bad']]]
['bad', ['service']]
['service', ['bad']]
Review Number :  47


 92%|███████████████████████████████████████████████████████████████████████████▋      | 48/52 [00:15<00:00,  6.17it/s]

[['interestingly', ['enough']], ['enough', ['interestingly', 'removed']], ['negative', ['reviews']], ['reviews', ['negative']]]
['interestingly', ['enough']]
['enough', ['interestingly', 'removed']]
['negative', ['reviews']]
['reviews', ['negative']]
Review Number :  48
[['bad', ['service']], ['service', ['bad']]]
['bad', ['service']]
['service', ['bad']]
Review Number :  49
[['sheat', []]]


 98%|████████████████████████████████████████████████████████████████████████████████▍ | 51/52 [00:15<00:00,  3.19it/s]

[['dirty', ['pigs']], ['pigs', ['dirty', 'training']], ['step', []], ['minefields', []]]
['sheat', []]
['dirty', ['pigs']]
['pigs', ['dirty', 'training']]
['step', []]
['minefields', []]
Review Number :  50
[['gncpresent', []]]
['gncpresent', []]
Review Number :  51





In [362]:
fin = get_sentiment(a, b, nlp)

  0%|                                                                                          | 0/197 [00:00<?, ?it/s]

Calculating Sentiment for:  professional


  1%|▍                                                                                 | 1/197 [00:01<03:36,  1.10s/it]

Calculating Sentiment for:  people


  1%|▊                                                                                 | 2/197 [00:04<08:30,  2.62s/it]

Calculating Sentiment for:  patient


  2%|█▏                                                                                | 3/197 [00:05<05:42,  1.77s/it]

Calculating Sentiment for:  kind


  3%|██                                                                                | 5/197 [00:06<03:14,  1.01s/it]

Calculating Sentiment for:  respectful


  3%|██▍                                                                               | 6/197 [00:06<02:19,  1.37it/s]

Calculating Sentiment for:  smooth
Calculating Sentiment for:  medical


  4%|██▉                                                                               | 7/197 [00:10<05:23,  1.70s/it]

Calculating Sentiment for:  checkup


  4%|███▎                                                                              | 8/197 [00:11<04:25,  1.40s/it]

Calculating Sentiment for:  reviews


  5%|███▋                                                                              | 9/197 [00:13<05:14,  1.67s/it]

Calculating Sentiment for:  adequately


  5%|████                                                                             | 10/197 [00:14<04:07,  1.33s/it]

Calculating Sentiment for:  anywhere


  6%|████▌                                                                            | 11/197 [00:14<03:35,  1.16s/it]

Calculating Sentiment for:  else


  6%|████▉                                                                            | 12/197 [00:15<02:48,  1.09it/s]

Calculating Sentiment for:  place


  7%|█████▎                                                                           | 13/197 [00:16<02:52,  1.07it/s]

Calculating Sentiment for:  really


  7%|█████▊                                                                           | 14/197 [00:17<03:22,  1.11s/it]

Calculating Sentiment for:  clean


  8%|██████▏                                                                          | 15/197 [00:18<02:52,  1.06it/s]

Calculating Sentiment for:  overall


  8%|██████▌                                                                          | 16/197 [00:19<03:02,  1.01s/it]

Calculating Sentiment for:  fine


  9%|██████▉                                                                          | 17/197 [00:20<02:42,  1.11it/s]

Calculating Sentiment for:  good


  9%|███████▍                                                                         | 18/197 [00:22<03:37,  1.22s/it]

Calculating Sentiment for:  experience


 10%|███████▊                                                                         | 19/197 [00:24<04:47,  1.62s/it]

Calculating Sentiment for:  tip


 11%|████████▋                                                                        | 21/197 [00:25<02:36,  1.13it/s]

Calculating Sentiment for:  u


 12%|█████████▍                                                                       | 23/197 [00:25<01:34,  1.83it/s]

Calculating Sentiment for:  late
Calculating Sentiment for:  back
Calculating Sentiment for:  day


 12%|█████████▊                                                                       | 24/197 [00:25<01:32,  1.87it/s]

Calculating Sentiment for:  complete


 13%|██████████▎                                                                      | 25/197 [00:26<01:41,  1.70it/s]

Calculating Sentiment for:  rest


 14%|███████████                                                                      | 27/197 [00:27<01:17,  2.19it/s]

Calculating Sentiment for:  august
Calculating Sentiment for:  gateentrance


 14%|███████████▌                                                                     | 28/197 [00:27<01:22,  2.05it/s]

Calculating Sentiment for:  entrancesecurity


 15%|███████████▉                                                                     | 29/197 [00:28<01:24,  1.98it/s]

Calculating Sentiment for:  checkcounter


 16%|████████████▋                                                                    | 31/197 [00:30<02:01,  1.37it/s]

Calculating Sentiment for:  stickerpass
Calculating Sentiment for:  walk


 16%|█████████████▏                                                                   | 32/197 [00:30<01:34,  1.75it/s]

Calculating Sentiment for:  scan


 17%|█████████████▉                                                                   | 34/197 [00:31<01:01,  2.67it/s]

Calculating Sentiment for:  dontbother
Calculating Sentiment for:  end


 18%|██████████████▍                                                                  | 35/197 [00:32<01:23,  1.94it/s]

Calculating Sentiment for:  hours


 18%|██████████████▊                                                                  | 36/197 [00:32<01:18,  2.05it/s]

Calculating Sentiment for:  realise


 19%|███████████████▏                                                                 | 37/197 [00:33<01:46,  1.50it/s]

Calculating Sentiment for:  youre


 19%|███████████████▌                                                                 | 38/197 [00:33<01:31,  1.74it/s]

Calculating Sentiment for:  last


 20%|████████████████                                                                 | 39/197 [00:34<01:19,  1.99it/s]

Calculating Sentiment for:  person


 21%|████████████████▊                                                                | 41/197 [00:35<01:09,  2.23it/s]

Calculating Sentiment for:  doctors
Calculating Sentiment for:  fair


 22%|█████████████████▋                                                               | 43/197 [00:35<00:50,  3.03it/s]

Calculating Sentiment for:  choice


 22%|██████████████████                                                               | 44/197 [00:37<01:53,  1.35it/s]

Calculating Sentiment for:  waste
Calculating Sentiment for:  time


 23%|██████████████████▌                                                              | 45/197 [00:38<02:11,  1.15it/s]

Calculating Sentiment for:  nsf


 23%|██████████████████▉                                                              | 46/197 [00:39<02:23,  1.05it/s]

Calculating Sentiment for:  extremely


 24%|███████████████████▎                                                             | 47/197 [00:40<02:10,  1.15it/s]

Calculating Sentiment for:  long


 24%|███████████████████▋                                                             | 48/197 [00:41<02:43,  1.10s/it]

Calculating Sentiment for:  staff


 25%|████████████████████▏                                                            | 49/197 [00:45<04:32,  1.84s/it]

Calculating Sentiment for:  bad


 25%|████████████████████▌                                                            | 50/197 [00:51<07:13,  2.95s/it]

Calculating Sentiment for:  personal


 26%|████████████████████▉                                                            | 51/197 [00:52<05:58,  2.46s/it]

Calculating Sentiment for:  experienceeveryone


 26%|█████████████████████▍                                                           | 52/197 [00:52<04:26,  1.84s/it]

Calculating Sentiment for:  helpful


 27%|██████████████████████▏                                                          | 54/197 [00:53<02:26,  1.02s/it]

Calculating Sentiment for:  initiative


 28%|██████████████████████▌                                                          | 55/197 [00:53<02:07,  1.12it/s]

Calculating Sentiment for:  help
Calculating Sentiment for:  unfriendly


 28%|███████████████████████                                                          | 56/197 [00:54<01:48,  1.30it/s]

Calculating Sentiment for:  guards


 29%|███████████████████████▍                                                         | 57/197 [00:54<01:34,  1.49it/s]

Calculating Sentiment for:  job


 29%|███████████████████████▊                                                         | 58/197 [00:55<01:15,  1.84it/s]

Calculating Sentiment for:  stuff


 30%|████████████████████████▎                                                        | 59/197 [00:55<01:22,  1.66it/s]

Calculating Sentiment for:  suppose


 30%|████████████████████████▋                                                        | 60/197 [00:56<01:25,  1.60it/s]

Calculating Sentiment for:  absolutely


 31%|█████████████████████████                                                        | 61/197 [00:56<01:15,  1.79it/s]

Calculating Sentiment for:  atrocious


 32%|█████████████████████████▉                                                       | 63/197 [00:57<00:54,  2.47it/s]

Calculating Sentiment for:  woman
Calculating Sentiment for:  keeps


 32%|██████████████████████████▎                                                      | 64/197 [00:58<01:08,  1.94it/s]

Calculating Sentiment for:  tone


 34%|███████████████████████████▏                                                     | 66/197 [00:59<01:10,  1.85it/s]

Calculating Sentiment for:  talks
Calculating Sentiment for:  always


 34%|███████████████████████████▌                                                     | 67/197 [00:59<01:02,  2.09it/s]

Calculating Sentiment for:  mask


 35%|███████████████████████████▉                                                     | 68/197 [01:00<00:56,  2.27it/s]

Calculating Sentiment for:  expressions


 35%|████████████████████████████▎                                                    | 69/197 [01:00<00:47,  2.69it/s]

Calculating Sentiment for:  screening


 36%|████████████████████████████▊                                                    | 70/197 [01:01<01:01,  2.08it/s]

Calculating Sentiment for:  station


 36%|█████████████████████████████▏                                                   | 71/197 [01:01<00:58,  2.14it/s]

Calculating Sentiment for:  rude


 37%|█████████████████████████████▌                                                   | 72/197 [01:04<02:18,  1.11s/it]

Calculating Sentiment for:  serious


 37%|██████████████████████████████                                                   | 73/197 [01:04<01:53,  1.09it/s]

Calculating Sentiment for:  impatient


 38%|██████████████████████████████▍                                                  | 74/197 [01:05<01:47,  1.14it/s]

Calculating Sentiment for:  undesirably


 38%|██████████████████████████████▊                                                  | 75/197 [01:05<01:21,  1.49it/s]

Calculating Sentiment for:  times


 39%|███████████████████████████████▏                                                 | 76/197 [01:06<01:19,  1.53it/s]

Calculating Sentiment for:  cmpb


 39%|███████████████████████████████▋                                                 | 77/197 [01:06<01:11,  1.69it/s]

Calculating Sentiment for:  sent


 40%|████████████████████████████████                                                 | 78/197 [01:07<01:03,  1.87it/s]

Calculating Sentiment for:  son


 40%|████████████████████████████████▍                                                | 79/197 [01:07<00:57,  2.06it/s]

Calculating Sentiment for:  preenlistment


 41%|████████████████████████████████▉                                                | 80/197 [01:08<01:24,  1.38it/s]

Calculating Sentiment for:  enlistmentcheckup


 41%|█████████████████████████████████▎                                               | 81/197 [01:09<01:20,  1.45it/s]

Calculating Sentiment for:  guard


 42%|█████████████████████████████████▋                                               | 82/197 [01:09<01:02,  1.83it/s]

Calculating Sentiment for:  give


 42%|██████████████████████████████████▏                                              | 83/197 [01:10<01:12,  1.57it/s]

Calculating Sentiment for:  clear


 43%|██████████████████████████████████▌                                              | 84/197 [01:10<01:09,  1.62it/s]

Calculating Sentiment for:  instructions


 44%|███████████████████████████████████▎                                             | 86/197 [01:11<00:49,  2.25it/s]

Calculating Sentiment for:  alight
Calculating Sentiment for:  drive


 45%|████████████████████████████████████▏                                            | 88/197 [01:12<00:35,  3.06it/s]

Calculating Sentiment for:  train
Calculating Sentiment for:  army


 45%|████████████████████████████████████▌                                            | 89/197 [01:12<00:32,  3.35it/s]

Calculating Sentiment for:  inconvenient


 46%|█████████████████████████████████████                                            | 90/197 [01:13<00:55,  1.94it/s]

Calculating Sentiment for:  locations


 47%|█████████████████████████████████████▊                                           | 92/197 [01:14<00:49,  2.14it/s]

Calculating Sentiment for:  ever


 47%|██████████████████████████████████████▏                                          | 93/197 [01:14<00:49,  2.10it/s]

Calculating Sentiment for:  terrible
Calculating Sentiment for:  directions


 48%|██████████████████████████████████████▋                                          | 94/197 [01:15<00:47,  2.18it/s]

Calculating Sentiment for:  expect


 48%|███████████████████████████████████████                                          | 95/197 [01:15<00:43,  2.37it/s]

Calculating Sentiment for:  whole


 49%|███████████████████████████████████████▍                                         | 96/197 [01:15<00:40,  2.51it/s]

Calculating Sentiment for:  process


 49%|███████████████████████████████████████▉                                         | 97/197 [01:16<00:44,  2.26it/s]

Calculating Sentiment for:  take


 50%|████████████████████████████████████████▎                                        | 98/197 [01:16<00:38,  2.57it/s]

Calculating Sentiment for:  ok


 50%|████████████████████████████████████████▋                                        | 99/197 [01:17<00:45,  2.14it/s]

Calculating Sentiment for:  maybe


 51%|████████████████████████████████████████▌                                       | 100/197 [01:17<00:38,  2.51it/s]

Calculating Sentiment for:  sikit


 51%|█████████████████████████████████████████                                       | 101/197 [01:17<00:36,  2.60it/s]

Calculating Sentiment for:  typical
Calculating Sentiment for:  tio


 53%|██████████████████████████████████████████▏                                     | 104/197 [01:18<00:30,  3.02it/s]

Calculating Sentiment for:  switch
Calculating Sentiment for:  vocation


 53%|██████████████████████████████████████████▋                                     | 105/197 [01:18<00:25,  3.55it/s]

Calculating Sentiment for:  mo


 54%|███████████████████████████████████████████                                     | 106/197 [01:19<00:26,  3.46it/s]

Calculating Sentiment for:  hand


 54%|███████████████████████████████████████████▍                                    | 107/197 [01:19<00:24,  3.62it/s]

Calculating Sentiment for:  pehbuay


 55%|███████████████████████████████████████████▊                                    | 108/197 [01:19<00:26,  3.30it/s]

Calculating Sentiment for:  buaysong


 56%|████████████████████████████████████████████▋                                   | 110/197 [01:20<00:29,  2.98it/s]

Calculating Sentiment for:  probably
Calculating Sentiment for:  visit


 56%|█████████████████████████████████████████████                                   | 111/197 [01:21<00:30,  2.84it/s]

Calculating Sentiment for:  contrary


 57%|█████████████████████████████████████████████▍                                  | 112/197 [01:21<00:27,  3.09it/s]

Calculating Sentiment for:  others


 58%|██████████████████████████████████████████████▎                                 | 114/197 [01:21<00:22,  3.67it/s]

Calculating Sentiment for:  sure
Calculating Sentiment for:  conditions


 58%|██████████████████████████████████████████████▋                                 | 115/197 [01:22<00:33,  2.47it/s]

Calculating Sentiment for:  officer


 59%|███████████████████████████████████████████████                                 | 116/197 [01:22<00:31,  2.59it/s]

Calculating Sentiment for:  believe


 59%|███████████████████████████████████████████████▌                                | 117/197 [01:23<00:29,  2.74it/s]

Calculating Sentiment for:  well


 60%|███████████████████████████████████████████████▉                                | 118/197 [01:23<00:30,  2.57it/s]

Calculating Sentiment for:  specialistletter


 60%|████████████████████████████████████████████████▎                               | 119/197 [01:24<00:35,  2.21it/s]

Calculating Sentiment for:  ask


 61%|████████████████████████████████████████████████▋                               | 120/197 [01:26<01:07,  1.14it/s]

Calculating Sentiment for:  question


 61%|█████████████████████████████████████████████████▏                              | 121/197 [01:26<01:01,  1.23it/s]

Calculating Sentiment for:  ignore


 62%|█████████████████████████████████████████████████▌                              | 122/197 [01:27<01:01,  1.21it/s]

Calculating Sentiment for:  nice


 62%|█████████████████████████████████████████████████▉                              | 123/197 [01:28<01:09,  1.07it/s]

Calculating Sentiment for:  friendly


 63%|██████████████████████████████████████████████████▎                             | 124/197 [01:29<00:53,  1.37it/s]

Calculating Sentiment for:  great


 63%|██████████████████████████████████████████████████▊                             | 125/197 [01:30<01:04,  1.12it/s]

Calculating Sentiment for:  medic


 64%|███████████████████████████████████████████████████▏                            | 126/197 [01:30<00:57,  1.23it/s]

Calculating Sentiment for:  check


 64%|███████████████████████████████████████████████████▌                            | 127/197 [01:32<01:16,  1.09s/it]

Calculating Sentiment for:  constantly


 65%|███████████████████████████████████████████████████▉                            | 128/197 [01:33<00:59,  1.15it/s]

Calculating Sentiment for:  bully
Calculating Sentiment for:  pretty


 66%|████████████████████████████████████████████████████▊                           | 130/197 [01:33<00:36,  1.86it/s]

Calculating Sentiment for:  lousy
Calculating Sentiment for:  service


 67%|█████████████████████████████████████████████████████▌                          | 132/197 [01:34<00:33,  1.94it/s]

Calculating Sentiment for:  dk


 68%|██████████████████████████████████████████████████████▍                         | 134/197 [01:35<00:30,  2.05it/s]

Calculating Sentiment for:  also
Calculating Sentiment for:  taiji


 69%|██████████████████████████████████████████████████████▊                         | 135/197 [01:35<00:28,  2.21it/s]

Calculating Sentiment for:  negative


 69%|███████████████████████████████████████████████████████▏                        | 136/197 [01:36<00:38,  1.59it/s]

Calculating Sentiment for:  share


 70%|███████████████████████████████████████████████████████▋                        | 137/197 [01:37<00:34,  1.73it/s]

Calculating Sentiment for:  quick


 70%|████████████████████████████████████████████████████████                        | 138/197 [01:37<00:29,  1.97it/s]

Calculating Sentiment for:  opinion


 71%|████████████████████████████████████████████████████████▍                       | 139/197 [01:38<00:36,  1.59it/s]

Calculating Sentiment for:  perhaps


 71%|████████████████████████████████████████████████████████▊                       | 140/197 [01:38<00:29,  1.90it/s]

Calculating Sentiment for:  varies


 72%|█████████████████████████████████████████████████████████▎                      | 141/197 [01:39<00:28,  1.97it/s]

Calculating Sentiment for:  trip


 72%|█████████████████████████████████████████████████████████▋                      | 142/197 [01:39<00:23,  2.30it/s]

Calculating Sentiment for:  ultimately


 73%|██████████████████████████████████████████████████████████                      | 143/197 [01:39<00:20,  2.67it/s]

Calculating Sentiment for:  definitely


 73%|██████████████████████████████████████████████████████████▍                     | 144/197 [01:40<00:21,  2.46it/s]

Calculating Sentiment for:  memorable


 74%|██████████████████████████████████████████████████████████▉                     | 145/197 [01:40<00:21,  2.40it/s]

Calculating Sentiment for:  sessions


 74%|███████████████████████████████████████████████████████████▎                    | 146/197 [01:41<00:32,  1.57it/s]

Calculating Sentiment for:  charge


 75%|███████████████████████████████████████████████████████████▋                    | 147/197 [01:42<00:26,  1.87it/s]

Calculating Sentiment for:  location


 76%|████████████████████████████████████████████████████████████▌                   | 149/197 [01:42<00:20,  2.38it/s]

Calculating Sentiment for:  meagre
Calculating Sentiment for:  pay


 77%|█████████████████████████████████████████████████████████████▎                  | 151/197 [01:43<00:15,  3.06it/s]

Calculating Sentiment for:  officerattitude
Calculating Sentiment for:  understand


 77%|█████████████████████████████████████████████████████████████▋                  | 152/197 [01:43<00:17,  2.58it/s]

Calculating Sentiment for:  use


 78%|██████████████████████████████████████████████████████████████▏                 | 153/197 [01:44<00:16,  2.69it/s]

Calculating Sentiment for:  many


 79%|██████████████████████████████████████████████████████████████▉                 | 155/197 [01:44<00:13,  3.15it/s]

Calculating Sentiment for:  cool
Calculating Sentiment for:  gold


 79%|███████████████████████████████████████████████████████████████▎                | 156/197 [01:44<00:11,  3.53it/s]

Calculating Sentiment for:  star


 80%|███████████████████████████████████████████████████████████████▊                | 157/197 [01:45<00:10,  3.70it/s]

Calculating Sentiment for:  public


 81%|████████████████████████████████████████████████████████████████▌               | 159/197 [01:45<00:10,  3.79it/s]

Calculating Sentiment for:  hrs


 81%|████████████████████████████████████████████████████████████████▉               | 160/197 [01:46<00:11,  3.30it/s]

Calculating Sentiment for:  even
Calculating Sentiment for:  cold


 82%|█████████████████████████████████████████████████████████████████▍              | 161/197 [01:46<00:13,  2.61it/s]

Calculating Sentiment for:  air


 83%|██████████████████████████████████████████████████████████████████▏             | 163/197 [01:47<00:11,  2.85it/s]

Calculating Sentiment for:  accessible
Calculating Sentiment for:  poor


 84%|███████████████████████████████████████████████████████████████████             | 165/197 [01:48<00:15,  2.10it/s]

Calculating Sentiment for:  customerservice


 84%|███████████████████████████████████████████████████████████████████▍            | 166/197 [01:48<00:11,  2.66it/s]

Calculating Sentiment for:  worst
Calculating Sentiment for:  real


 85%|███████████████████████████████████████████████████████████████████▊            | 167/197 [01:49<00:11,  2.67it/s]

Calculating Sentiment for:  edgy


 85%|████████████████████████████████████████████████████████████████████▏           | 168/197 [01:49<00:12,  2.37it/s]

Calculating Sentiment for:  ziyuan


 86%|████████████████████████████████████████████████████████████████████▋           | 169/197 [01:50<00:12,  2.20it/s]

Calculating Sentiment for:  novel


 86%|█████████████████████████████████████████████████████████████████████           | 170/197 [01:50<00:10,  2.49it/s]

Calculating Sentiment for:  writer


 87%|█████████████████████████████████████████████████████████████████████▍          | 171/197 [01:51<00:13,  1.96it/s]

Calculating Sentiment for:  highly


 87%|█████████████████████████████████████████████████████████████████████▊          | 172/197 [01:51<00:13,  1.91it/s]

Calculating Sentiment for:  inaccessible


 88%|██████████████████████████████████████████████████████████████████████▎         | 173/197 [01:52<00:12,  1.92it/s]

Calculating Sentiment for:  hard


 88%|██████████████████████████████████████████████████████████████████████▋         | 174/197 [01:52<00:10,  2.21it/s]

Calculating Sentiment for:  get


 89%|███████████████████████████████████████████████████████████████████████         | 175/197 [01:52<00:08,  2.61it/s]

Calculating Sentiment for:  mrt


 89%|███████████████████████████████████████████████████████████████████████▍        | 176/197 [01:53<00:07,  2.63it/s]

Calculating Sentiment for:  stations


 90%|███████████████████████████████████████████████████████████████████████▉        | 177/197 [01:53<00:07,  2.70it/s]

Calculating Sentiment for:  book


 91%|████████████████████████████████████████████████████████████████████████▋       | 179/197 [01:54<00:06,  2.78it/s]

Calculating Sentiment for:  everyday


 91%|█████████████████████████████████████████████████████████████████████████       | 180/197 [01:54<00:06,  2.52it/s]

Calculating Sentiment for:  troublesome
Calculating Sentiment for:  thing


 92%|█████████████████████████████████████████████████████████████████████████▌      | 181/197 [01:56<00:09,  1.68it/s]

Calculating Sentiment for:  whats


 92%|█████████████████████████████████████████████████████████████████████████▉      | 182/197 [01:56<00:08,  1.80it/s]

Calculating Sentiment for:  sidenotecanteen


 93%|██████████████████████████████████████████████████████████████████████████▎     | 183/197 [01:56<00:07,  2.00it/s]

Calculating Sentiment for:  canteenb


 93%|██████████████████████████████████████████████████████████████████████████▋     | 184/197 [01:57<00:08,  1.57it/s]

Calculating Sentiment for:  officertalk


 94%|███████████████████████████████████████████████████████████████████████████▌    | 186/197 [01:58<00:04,  2.31it/s]

Calculating Sentiment for:  money
Calculating Sentiment for:  need


 95%|███████████████████████████████████████████████████████████████████████████▉    | 187/197 [01:58<00:04,  2.12it/s]

Calculating Sentiment for:  far


 96%|████████████████████████████████████████████████████████████████████████████▊   | 189/197 [01:59<00:02,  2.90it/s]

Calculating Sentiment for:  away
Calculating Sentiment for:  security


 96%|█████████████████████████████████████████████████████████████████████████████▏  | 190/197 [01:59<00:02,  2.84it/s]

Calculating Sentiment for:  troopers


 97%|█████████████████████████████████████████████████████████████████████████████▌  | 191/197 [02:00<00:02,  2.01it/s]

Calculating Sentiment for:  attitude


 98%|██████████████████████████████████████████████████████████████████████████████▍ | 193/197 [02:01<00:01,  2.62it/s]

Calculating Sentiment for:  unbelievably


 98%|██████████████████████████████████████████████████████████████████████████████▊ | 194/197 [02:01<00:01,  2.83it/s]

Calculating Sentiment for:  interestingly
Calculating Sentiment for:  enough


 99%|███████████████████████████████████████████████████████████████████████████████▌| 196/197 [02:02<00:00,  2.30it/s]

Calculating Sentiment for:  dirty


100%|████████████████████████████████████████████████████████████████████████████████| 197/197 [02:03<00:00,  1.60it/s]

Calculating Sentiment for:  pigs





In [363]:
fin

Unnamed: 0,Avg_sent,Descriptors,Freq
staff,0.880952,"professional ,unfriendly ,keep ,rude ,impatien...",11
people,0.877778,"professional ,there ,suggest ,here ,threatens ...",10
medical,0.908046,"checkup ,checkup ,screening ,conditions ,offic...",10
bad,0.878788,"reviews ,not ,that ,canteenb ,real ,canteenb ,...",8
place,0.880000,"clean ,fine ,waste ,check",7
...,...,...,...
whole,1.000000,process,1
process,0.923077,"whole ,expect",1
take,1.000000,hours,1
august,1.000000,am,1


In [365]:
def get_tfidf_features(df, content_str = "Content", min_ = 2, max_ = 0.5, ngramrange = (1,2)):
    
    # Replace "" with nan's for removal
    #df[content_str].replace('', np.nan, inplace=True)
    #df.dropna(subset=[content_str], inplace=True)
    #stop_words = set(stopwords.words('english'))
    #df[content_str] = df[content_str].apply(lambda x: ''.join([word for word in x.split() if word not in (stop_words)]))
    
    review_list = df[content_str].to_list()
    #feat_count = dict()
    #feat_sent = dict()
    #nlp = stanza.Pipeline('en')

        
    #print(review_list)
    tfidf = TfidfVectorizer(min_df = min_, max_df = max_, ngram_range = ngramrange);
    features = tfidf.fit_transform(review_list);
    q = pd.DataFrame(features.todense(), columns=tfidf.get_feature_names())
    
    return list(q.columns)

In [366]:
def refine_features(originaldf, sentimentdf):
    tfidf_output = get_tfidf_features(originaldf)
    sentimentdf = sentimentdf.reset_index()
    ft_extract = set(sentimentdf['index']);
    tfidf_extract = set(tfidf_output)
    
    intersecting_features = ft_extract.intersection(tfidf_extract)
    
    return_df = sentimentdf
    return_df = return_df.loc[return_df['index'].isin(list(intersecting_features))]
    print("Number of extracted features:")
    print("Initial = ", len(ft_extract), " TFIDF = ", len(intersecting_features), " Final after intersection = ", return_df.shape[0])
    return return_df
    
    

In [369]:
rt = refine_features(rdr, fin)
rt

Number of extracted features:
Initial =  197  TFIDF =  57  Final after intersection =  57


Unnamed: 0,index,Avg_sent,Descriptors,Freq
0,staff,0.880952,"professional ,unfriendly ,keep ,rude ,impatien...",11
1,people,0.877778,"professional ,there ,suggest ,here ,threatens ...",10
2,medical,0.908046,"checkup ,checkup ,screening ,conditions ,offic...",10
3,bad,0.878788,"reviews ,not ,that ,canteenb ,real ,canteenb ,...",8
4,place,0.88,"clean ,fine ,waste ,check",7
5,rude,0.896552,"staff ,extremely ,staff ,guards ,very ,staff ,...",6
6,time,0.888889,"long ,takes ,travel ,travel",5
7,reviews,0.890909,"other ,suggest ,nsf ,negative ,negative ,give ...",5
8,service,0.85,"dk ,public ,bad ,bad",5
9,nsf,0.884615,"reviews ,staff ,need ,cool",4


In [368]:
#rt.to_csv("finalFeatures.csv")