In [314]:
import pandas as pd
import numpy as np
import nltk
import regex
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.wordnet import WordNetLemmatizer 
from sklearn.feature_extraction.text import TfidfVectorizer
import stanza
stanza.download('en') # download English model
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/master/resources_1.2.0.json: 128kB [00:00, 18.4MB/s]
2021-03-08 17:52:01 INFO: Downloading default packages for language: en (English)...
2021-03-08 17:52:02 INFO: File exists: C:\Users\vibkr\stanza_resources\en\default.zip.
2021-03-08 17:52:05 INFO: Finished downloading models and saved to C:\Users\vibkr\stanza_resources.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\vibkr\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [315]:
def feature_extraction(txt, nlp):

    sentList = nltk.sent_tokenize(txt)

    retlist = [];
    
    for line in sentList:
        
        txt_list = nltk.word_tokenize(line)
        taggedList = nltk.pos_tag(txt_list)
        
        newwordList = []
        flag = 0
        for i in range(0,len(taggedList)-1):
            if(taggedList[i][1]=="NN" and taggedList[i+1][1]=="NN"):
                newwordList.append(taggedList[i][0]+taggedList[i+1][0])
                flag=1
            else:
                if(flag==1):
                    flag=0
                    continue
                newwordList.append(taggedList[i][0])
                if(i==len(taggedList)-2):
                    newwordList.append(taggedList[i+1][0])
        finaltxt = ' '.join(word for word in newwordList)
    
    
        stop_words = set(stopwords.words('english'))
        new_txt_list = nltk.word_tokenize(finaltxt)
        wordsList = [w for w in new_txt_list if not w in stop_words]
        taggedList = nltk.pos_tag(wordsList)
        
        doc = nlp(finaltxt)
        dep_node = []
        try:
            for dep_edge in doc.sentences[0].dependencies:
                dep_node.append([dep_edge[2].text, dep_edge[0].id, dep_edge[1]])
            for i in range(0, len(dep_node)):
                if (int(dep_node[i][1]) != 0):
                    dep_node[i][1] = newwordList[(int(dep_node[i][1]) - 1)]
        except:
            pass;
        
        #print(dep_node)
        
        featureList = []
        categories = []
        for i in taggedList:
            if(i[1]=='JJ' or i[1]=='NN' or i[1]=='JJR' or i[1]=='NNS' or i[1]=='RB'):
                featureList.append(list(i))
                categories.append(i[0])
        #print(featureList)
        #print(categories)
        
        
        fcluster = []
        for i in featureList:
            filist = []
            for j in dep_node:
                if((j[0]==i[0] or j[1]==i[0]) and (j[2] in [
                    # Different types of words that are identified as potential features
                    "nsubj",
                    "acl:relcl",
                    "obj",
                    "dobj",
                    "agent",
                    "advmod",
                    "amod",
                    "neg",
                    "prep_of",
                    "acomp",
                    "xcomp",
                    "compound"
                ])):
                    if(j[0]==i[0]):
                        filist.append(j[1])
                    else:
                        filist.append(j[0])
            fcluster.append([i[0], filist])
        print(fcluster) 
        
        # Remove all features with no sentiment word:
        
        retlist.append(fcluster)
    return retlist;
    

In [316]:
def do_extraction(df, nlp, feat_count, feat_sent, content_str = "Content"):
    idx = 0;
    # Replace "" with nan's for removal
    df[content_str].replace('', np.nan, inplace=True)
    df.dropna(subset=[content_str], inplace=True)
    
    review_list = df[content_str].to_list()
    #feat_count = dict()
    #feat_sent = dict()
    #nlp = stanza.Pipeline('en')
    
    
    
    
    print(" Processing : " , df.shape[0], "rows of data")
    for review in tqdm(review_list):
        print("Review Number : ", idx);
        
        # Some data pre-processing
        
        review = review.lower()
        
        # Merge hyphenated words
        separate = review.split('-')
        review = ''.join(separate)
        
        # Remove non-alphabets
        review = re.sub(r'[^a-z\s\t]', '', review)
        
        idx += 1;
        if idx >= df.shape[0]:
            break;
        try:
            output = feature_extraction(review, nlp);
        except:
            pass;
        for sent in output:
            for pair in sent:
                print(pair)
                if pair[0] in feat_sent:
                    if pair[1] is not None:
                        flist = feat_sent[pair[0]]
                        if isinstance(pair[1], list):
                            for i in pair[1]:
                                flist.append(i)
                        else:
                            flist.append(pair[1])
                        feat_sent[pair[0]] = flist;
                else:
                    if pair[1] is not None:
                        flist = pair[1]
                    else:
                        flist = list()
                    feat_sent[pair[0]] = flist;
                
                if pair[0] in feat_count:
                    feat_count[pair[0]] = feat_count[pair[0]] + 1;
                else:
                    feat_count[pair[0]] = 1
    
    return feat_count, feat_sent;

In [317]:
def get_sentiment(feat_count, feat_sent, nlp):

    sentiment_score = dict()

    # Delete features with no descriptors
    cob = feat_sent.copy()
    for feat in cob.keys():
        #print(cob[feat])
        
        if cob[feat] == []:
            del feat_sent[feat]
        else:
            feat_sent[feat] = ' ,'.join(feat_sent[feat])

    # Run pre-built sentiment score and take avg of all descriptors
    for f in tqdm(feat_sent.keys()):
        print("Calculating Sentiment for: ", f);
        ssum = 0;
        for g in feat_sent[f]:
            try:
                doc = nlp(g);

                for i in doc.sentences:

                        #print(i.sentiment)
                        ssum += i.sentiment;
            except:
                pass;

        sentiment_score[f] = ssum / len(b[f])

    adf = pd.DataFrame.from_dict(feat_count, orient='index', columns=['Freq'])
    adf.sort_values(by="Freq", ascending=False, inplace = True)

    

    avg_sent = pd.DataFrame.from_dict(sentiment_score, orient='index', columns=["Avg_sent"])
    desc_words = pd.DataFrame.from_dict(feat_sent, orient="index", columns=["Descriptors"])
    
    avg_sent = avg_sent.merge(desc_words, left_index=True, right_index=True)
    
    
    final_sent = avg_sent.merge(adf, left_index=True, right_index=True)
    final_sent.sort_values(by="Freq", ascending=False, inplace=True)
    return final_sent;

In [318]:
rdr = pd.read_csv('../ScrapedOutput/cmpb.csv')

nlp = stanza.Pipeline('en')
a = dict()
b = dict()
a, b = do_extraction(rdr, nlp, a, b)

2021-03-08 17:52:05 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| pos       | combined  |
| lemma     | combined  |
| depparse  | combined  |
| sentiment | sstplus   |
| ner       | ontonotes |

2021-03-08 17:52:05 INFO: Use device: cpu
2021-03-08 17:52:05 INFO: Loading: tokenize
2021-03-08 17:52:05 INFO: Loading: pos
2021-03-08 17:52:06 INFO: Loading: lemma
2021-03-08 17:52:06 INFO: Loading: depparse
2021-03-08 17:52:06 INFO: Loading: sentiment
2021-03-08 17:52:06 INFO: Loading: ner
2021-03-08 17:52:07 INFO: Done loading processors!
  0%|                                                                                           | 0/52 [00:00<?, ?it/s]

 Processing :  52 rows of data
Review Number :  0


  2%|█▌                                                                                 | 1/52 [00:00<00:14,  3.49it/s]

[['professional', ['very', 'people']], ['people', ['professional', 'recommended', 'there']], ['patient', ['very', 'kind']], ['kind', ['patient', 'recommended', 'respectful']], ['respectful', ['kind', 'smooth']], ['smooth', ['very', 'respectful']], ['medical', ['checkup']], ['checkup', ['medical']]]
['professional', ['very', 'people']]
['people', ['professional', 'recommended', 'there']]
['patient', ['very', 'kind']]
['kind', ['patient', 'recommended', 'respectful']]
['respectful', ['kind', 'smooth']]
['smooth', ['very', 'respectful']]
['medical', ['checkup']]
['checkup', ['medical']]
Review Number :  1


  4%|███▏                                                                               | 2/52 [00:00<00:16,  3.01it/s]

[['reviews', ['other', 'suggest']], ['people', ['suggest', 'here']], ['adequately', ['friendlylike']], ['anywhere', ['friendlylike', 'else']], ['else', ['anywhere']], ['sg', []], ['place', ['clean']], ['really', ['clean']], ['clean', ['place', 'really']], ['efficient', []]]
['reviews', ['other', 'suggest']]
['people', ['suggest', 'here']]
['adequately', ['friendlylike']]
['anywhere', ['friendlylike', 'else']]
['else', ['anywhere']]
['sg', []]
['place', ['clean']]
['really', ['clean']]
['clean', ['place', 'really']]
['efficient', []]
Review Number :  2


  6%|████▊                                                                              | 3/52 [00:01<00:17,  2.77it/s]

[['place', ['experience']], ['overall', ['experience']], ['fine', ['experience']], ['good', ['experience']], ['experience', ['place', 'overall', 'fine', 'good']], ['tip', ['just']], ['u', ['have']], ['medical', ['checkupdont']], ['checkupdont', ['medical']], ['late', ['go']], ['else', []], ['u', ['have']], ['back', ['come']], ['day', ['other']], ['complete', ['rest']], ['rest', ['complete']]]
['place', ['experience']]
['overall', ['experience']]
['fine', ['experience']]
['good', ['experience']]
['experience', ['place', 'overall', 'fine', 'good']]
['tip', ['just']]
['u', ['have']]
['medical', ['checkupdont']]
['checkupdont', ['medical']]
['late', ['go']]
['else', []]
['u', ['have']]
['back', ['come']]
['day', ['other']]
['complete', ['rest']]
['rest', ['complete']]
Review Number :  3


  8%|██████▍                                                                            | 4/52 [00:01<00:21,  2.27it/s]

[['nscheckup', []], ['checkuptoday', []], ['gateentrance', ['checkcounter']], ['entrancesecurity', ['checkcounter']], ['checkcounter', ['gateentrance', 'entrancesecurity', 'securitycheck']], ['stickerpass', ['take']], ['walk', ['just']], ['gatedont', ['be']], ['mei', ['stood']], ['thinking', []], ['scan', ['i', 'the']]]
['nscheckup', []]
['checkuptoday', []]
['gateentrance', ['checkcounter']]
['entrancesecurity', ['checkcounter']]
['checkcounter', ['gateentrance', 'entrancesecurity', 'securitycheck']]
['stickerpass', ['take']]
['walk', ['just']]
['gatedont', ['be']]
['mei', ['stood']]
['thinking', []]
['scan', ['i', 'the']]
Review Number :  4


 12%|█████████▌                                                                         | 6/52 [00:02<00:17,  2.61it/s]

[['dontbother', ['end']], ['youll', []], ['end', ['dontbother', 'waiting']], ['hours', ['more']], ['realise', ['only', 'that', 'youre']], ['youre', ['realise']], ['last', ['person']], ['person', ['last', 'care']], ['line', ['doctors']], ['doctors', ['line']], ['really', ['care', 'have']], ['fair', ['which']], ['dont', []], ['really', ['care', 'have']], ['choice', ['have']], ['place', ['waste']], ['complete', ['waste']], ['waste', ['place', 'complete']], ['space', []], ['time', []]]
['dontbother', ['end']]
['youll', []]
['end', ['dontbother', 'waiting']]
['hours', ['more']]
['realise', ['only', 'that', 'youre']]
['youre', ['realise']]
['last', ['person']]
['person', ['last', 'care']]
['line', ['doctors']]
['doctors', ['line']]
['really', ['care', 'have']]
['fair', ['which']]
['dont', []]
['really', ['care', 'have']]
['choice', ['have']]
['place', ['waste']]
['complete', ['waste']]
['waste', ['place', 'complete']]
['space', []]
['time', []]
Review Number :  5
[['nsf', ['reviews']], ['rev

 13%|███████████▏                                                                       | 7/52 [00:02<00:14,  3.07it/s]

[['extremely', ['long']], ['long', ['extremely', 'time']], ['time', ['long', 'takes']], ['due', []], ['overall', ['waste']], ['complete', ['waste']], ['waste', ['overall', 'complete', 'waiting']], ['time', ['long', 'takes']]]
['extremely', ['long']]
['long', ['extremely', 'time']]
['time', ['long', 'takes']]
['due', []]
['overall', ['waste']]
['complete', ['waste']]
['waste', ['overall', 'complete', 'waiting']]
['time', ['long', 'takes']]
Review Number :  7


 15%|████████████▊                                                                      | 8/52 [00:03<00:15,  2.79it/s]

[['staff', ['professional']], ['professional', ['staff']], ['idk', []], ['bad', ['reviews']], ['reviews', ['bad', 'what']], ['personal', ['experienceeveryone']], ['experienceeveryone', ['personal']], ['helpful', ['very', 'is']], ['initiative', ['take']], ['help', ['me']]]
['staff', ['professional']]
['professional', ['staff']]
['idk', []]
['bad', ['reviews']]
['reviews', ['bad', 'what']]
['personal', ['experienceeveryone']]
['experienceeveryone', ['personal']]
['helpful', ['very', 'is']]
['initiative', ['take']]
['help', ['me']]
Review Number :  8


 17%|██████████████▎                                                                    | 9/52 [00:03<00:20,  2.12it/s]

[['unfriendly', ['staff']], ['staff', ['unfriendly', 'doing', 'keep']], ['guards', ['doing']], ['job', ['doing']], ['staff', ['unfriendly', 'doing', 'keep']], ['stuff', ['more', 'do', 'suppose']], ['suppose', ['which', 'they', 'not', 'stuff']], ['absolutely', ['atrocious']], ['atrocious', ['absolutely']], ['woman', ['is']], ['keeps', ['who', 'vac', 'changing']], ['tone', ['changing']], ['talks', ['she']], ['people', ['threatens']], ['people', ['threatens']], ['always', ['removes']], ['mask', ['removes']], ['talk', []], ['people', ['threatens']], ['expressions', ['show']]]
['unfriendly', ['staff']]
['staff', ['unfriendly', 'doing', 'keep']]
['guards', ['doing']]
['job', ['doing']]
['staff', ['unfriendly', 'doing', 'keep']]
['stuff', ['more', 'do', 'suppose']]
['suppose', ['which', 'they', 'not', 'stuff']]
['absolutely', ['atrocious']]
['atrocious', ['absolutely']]
['woman', ['is']]
['keeps', ['who', 'vac', 'changing']]
['tone', ['changing']]
['talks', ['she']]
['people', ['threatens']]


 19%|███████████████▊                                                                  | 10/52 [00:03<00:16,  2.52it/s]

[['staff', ['rude']], ['medical', ['screening']], ['screening', ['medical', 'station']], ['station', ['screening']], ['weight', []], ['extremely', ['rude']], ['rude', ['staff', 'extremely']], ['unfriendly', []]]
['staff', ['rude']]
['medical', ['screening']]
['screening', ['medical', 'station']]
['station', ['screening']]
['weight', []]
['extremely', ['rude']]
['rude', ['staff', 'extremely']]
['unfriendly', []]
Review Number :  10


 21%|█████████████████▎                                                                | 11/52 [00:04<00:14,  2.80it/s]

[['staff', ['impatient']], ['serious', ['impatient']], ['impatient', ['staff', 'not', 'serious']], ['undesirably', ['long']], ['long', ['undesirably', 'times']], ['times', ['long', 'waiting', 'recommend']], ['cmpb', ['recommend']], ['friend', []]]
['staff', ['impatient']]
['serious', ['impatient']]
['impatient', ['staff', 'not', 'serious']]
['undesirably', ['long']]
['long', ['undesirably', 'times']]
['times', ['long', 'waiting', 'recommend']]
['cmpb', ['recommend']]
['friend', []]
Review Number :  11


 23%|██████████████████▉                                                               | 12/52 [00:04<00:16,  2.46it/s]

[['sent', ['just', 'son']], ['son', ['sent', 'got']], ['preenlistment', ['enlistmentcheckup']], ['enlistmentcheckup', ['preenlistment']], ['morning', []], ['guard', ['give']], ['entrancecouldnt', []], ['clear', ['instructions']], ['instructions', ['clear', 'give']], ['son', ['sent', 'got']], ['alight', ['got']], ['couldnt', []], ['drive', ['we', 'further']], ['hello', []], ['please', []], ['train', ['army']], ['army', ['train']]]
['sent', ['just', 'son']]
['son', ['sent', 'got']]
['preenlistment', ['enlistmentcheckup']]
['enlistmentcheckup', ['preenlistment']]
['morning', []]
['guard', ['give']]
['entrancecouldnt', []]
['clear', ['instructions']]
['instructions', ['clear', 'give']]
['son', ['sent', 'got']]
['alight', ['got']]
['couldnt', []]
['drive', ['we', 'further']]
['hello', []]
['please', []]
['train', ['army']]
['army', ['train']]
Review Number :  12


 25%|████████████████████▌                                                             | 13/52 [00:05<00:15,  2.59it/s]

[['inconvenient', ['most', 'locations']], ['locations', ['inconvenient', 'seen']], ['ever', ['seen']], ['terrible', ['directions']], ['directions', ['terrible', 'seen']], ['staff', ['rude', 'expect']], ['whole', ['process']], ['process', ['whole', 'expect']], ['take', ['hours', 'recommended']], ['hours', ['take']]]
['inconvenient', ['most', 'locations']]
['locations', ['inconvenient', 'seen']]
['ever', ['seen']]
['terrible', ['directions']]
['directions', ['terrible', 'seen']]
['staff', ['rude', 'expect']]
['whole', ['process']]
['process', ['whole', 'expect']]
['take', ['hours', 'recommended']]
['hours', ['take']]
Review Number :  13


 27%|██████████████████████                                                            | 14/52 [00:05<00:16,  2.31it/s]

[['ok', ['so', 'staff', 'typical']], ['lah', []], ['review', []], ['visitjanuary', []], ['maybe', ['sikit']], ['sikit', ['maybe', 'staff']], ['date', []], ['nsf', ['staff']], ['staff', ['sikit', 'nsf', 'ok']], ['ok', ['so', 'staff', 'typical']], ['typical', ['ok']], ['bochap', []], ['happy', []], ['bird', []], ['tio', ['vocationmo']], ['switch', ['vocationmo']], ['vocationmo', ['switch', 'tio']], ['hand', ['other']], ['si', []], ['pehbuay', []], ['buaysong', []], ['probably', ['this']]]
['ok', ['so', 'staff', 'typical']]
['lah', []]
['review', []]
['visitjanuary', []]
['maybe', ['sikit']]
['sikit', ['maybe', 'staff']]
['date', []]
['nsf', ['staff']]
['staff', ['sikit', 'nsf', 'ok']]
['ok', ['so', 'staff', 'typical']]
['typical', ['ok']]
['bochap', []]
['happy', []]
['bird', []]
['tio', ['vocationmo']]
['switch', ['vocationmo']]
['vocationmo', ['switch', 'tio']]
['hand', ['other']]
['si', []]
['pehbuay', []]
['buaysong', []]
['probably', ['this']]
Review Number :  14


 29%|███████████████████████▋                                                          | 15/52 [00:05<00:13,  2.70it/s]

[['tbh', []], ['bad', ['its', 'not', 'that']], ['place', []], ['visit', ['contrary']], ['contrary', ['visit']], ['others', ['saying']]]
['tbh', []]
['bad', ['its', 'not', 'that']]
['place', []]
['visit', ['contrary']]
['contrary', ['visit']]
['others', ['saying']]
Review Number :  15


 31%|█████████████████████████▏                                                        | 16/52 [00:06<00:15,  2.36it/s]

[['please', []], ['sure', ['make']], ['medical', ['conditions', 'officer', 'specialistletter']], ['conditions', ['medical', 'declare', 'severe']], ['severe', ['conditions']], ['minor', []], ['medical', ['conditions', 'officer', 'specialistletter']], ['officer', ['medical']], ['checkup', []], ['fare', ['you', 'well']], ['well', ['fare']], ['medical', ['conditions', 'officer', 'specialistletter']], ['specialistletter', ['medical', 'get']]]
['please', []]
['sure', ['make']]
['medical', ['conditions', 'officer', 'specialistletter']]
['conditions', ['medical', 'declare', 'severe']]
['severe', ['conditions']]
['minor', []]
['medical', ['conditions', 'officer', 'specialistletter']]
['officer', ['medical']]
['checkup', []]
['fare', ['you', 'well']]
['well', ['fare']]
['medical', ['conditions', 'officer', 'specialistletter']]
['specialistletter', ['medical', 'get']]
Review Number :  16


 35%|████████████████████████████▍                                                     | 18/52 [00:06<00:10,  3.11it/s]

[['guards', ['ask', 'rude']], ['rude', ['very', 'guards']], ['ask', ['guards', 'question']], ['question', ['ask', 'ignore']], ['ignore', ['they', 'question']], ['rest', ['nice']], ['staff', []], ['nice', ['rest', 'though']], ['friendly', []]]
['guards', ['ask', 'rude']]
['rude', ['very', 'guards']]
['ask', ['guards', 'question']]
['question', ['ask', 'ignore']]
['ignore', ['they', 'question']]
['rest', ['nice']]
['staff', []]
['nice', ['rest', 'though']]
['friendly', []]
Review Number :  17
[['overall', ['experience']], ['great', ['experience']], ['experience', ['overall', 'great']], ['medic', ['professional']], ['professional', ['medic']], ['blooddraw', []]]
['overall', ['experience']]
['great', ['experience']]
['experience', ['overall', 'great']]
['medic', ['professional']]
['professional', ['medic']]
['blooddraw', []]
Review Number :  18


 40%|█████████████████████████████████                                                 | 21/52 [00:07<00:06,  4.88it/s]

[['kind', ['very', 'people']], ['people', ['kind']], ['cmpd', []], ['medical', ['check']], ['check', ['medical', 'up']], ['constantly', ['greeted']], ['smile', []], ['patience', []]]
['kind', ['very', 'people']]
['people', ['kind']]
['cmpd', []]
['medical', ['check']]
['check', ['medical', 'up']]
['constantly', ['greeted']]
['smile', []]
['patience', []]
Review Number :  19
[['dont', []], ['bully', ['me']]]
['dont', []]
['bully', ['me']]
Review Number :  20
[['meh', ['staff']], ['staff', ['meh', 'nice']], ['pretty', ['nice']], ['nice', ['staff', 'pretty']]]
['meh', ['staff']]
['staff', ['meh', 'nice']]
['pretty', ['nice']]
['nice', ['staff', 'pretty']]
Review Number :  21


 42%|██████████████████████████████████▋                                               | 22/52 [00:07<00:06,  4.70it/s]

[['lousy', ['service']], ['service', ['lousy', 'dk', 'ask']], ['dk', ['service']], ['help', ['ask']], ['people', ['ask']], ['question', ['ask']], ['also', ['say']], ['dont', []], ['ask', ['help', 'people', 'service', 'question', 'say', 'taiji']], ['taiji', ['not', 'ask']]]
['lousy', ['service']]
['service', ['lousy', 'dk', 'ask']]
['dk', ['service']]
['help', ['ask']]
['people', ['ask']]
['question', ['ask']]
['also', ['say']]
['dont', []]
['ask', ['help', 'people', 'service', 'question', 'say', 'taiji']]
['taiji', ['not', 'ask']]
Review Number :  22


 46%|█████████████████████████████████████▊                                            | 24/52 [00:08<00:06,  4.11it/s]

[['tuesday', []], ['negative', ['reviews']], ['reviews', ['negative']], ['share', ['i', 'opinion']], ['quick', ['opinion']], ['personal', ['opinion']], ['opinion', ['quick', 'personal', 'share']], ['perhaps', ['varies']], ['varies', ['perhaps', 'it']], ['person', []], ['person', []], ['trip', ['great']], ['cmpb', []], ['ultimately', ['great']], ['great', ['trip', 'ultimately']], ['definitely', ['experience']], ['memorable', ['experience']], ['experience', ['definitely', 'memorable']], ['medical', []]]
['tuesday', []]
['negative', ['reviews']]
['reviews', ['negative']]
['share', ['i', 'opinion']]
['quick', ['opinion']]
['personal', ['opinion']]
['opinion', ['quick', 'personal', 'share']]
['perhaps', ['varies']]
['varies', ['perhaps', 'it']]
['person', []]
['person', []]
['trip', ['great']]
['cmpb', []]
['ultimately', ['great']]
['great', ['trip', 'ultimately']]
['definitely', ['experience']]
['memorable', ['experience']]
['experience', ['definitely', 'memorable']]
['medical', []]
Review

 48%|███████████████████████████████████████▍                                          | 25/52 [00:08<00:05,  4.66it/s]

[['others', []], ['dirt', []]]
['others', []]
['dirt', []]
Review Number :  25


 52%|██████████████████████████████████████████▌                                       | 27/52 [00:08<00:05,  4.57it/s]

[['preenlistment', []], ['sessions', ['few', 'counselling', 'screening']], ['chargeinconvenient', ['inconvenientlocation']], ['inconvenientlocation', ['chargeinconvenient', 'answer']]]
['preenlistment', []]
['sessions', ['few', 'counselling', 'screening']]
['chargeinconvenient', ['inconvenientlocation']]
['inconvenientlocation', ['chargeinconvenient', 'answer']]
Review Number :  26
[['cookhouse', ['nsf']], ['nsf', ['cookhouse', 'need']], ['meagre', ['pay']], ['pay', ['meagre']]]
['cookhouse', ['nsf']]
['nsf', ['cookhouse', 'need']]
['meagre', ['pay']]
['pay', ['meagre']]
Review Number :  27


 54%|████████████████████████████████████████████▏                                     | 28/52 [00:09<00:06,  3.65it/s]

[['officerattitude', ['good']], ['good', ['officerattitude', 'not']], ['patience', ['understand']], ['servicei', []], ['dont', []], ['use', ['he', 'tone']], ['unfriendly', ['tone']], ['tone', ['unfriendly', 'use']], ['repeat', []], ['language', []]]
['officerattitude', ['good']]
['good', ['officerattitude', 'not']]
['patience', ['understand']]
['servicei', []]
['dont', []]
['use', ['he', 'tone']]
['unfriendly', ['tone']]
['tone', ['unfriendly', 'use']]
['repeat', []]
['language', []]
Review Number :  28


 58%|███████████████████████████████████████████████▎                                  | 30/52 [00:09<00:05,  3.83it/s]

[['idk', []], ['many', ['so', 'people']], ['people', ['many', 'give']], ['negative', ['reviews']], ['reviews', ['negative', 'give']], ['medical', ['check']], ['check', ['medical', 'up']], ['staff', ['friendly']], ['friendly', ['staff']], ['nsf', ['cool']], ['cool', ['nsf']], ['overall', ['had']], ['good', ['experience']], ['experience', ['good', 'had']]]
['idk', []]
['many', ['so', 'people']]
['people', ['many', 'give']]
['negative', ['reviews']]
['reviews', ['negative', 'give']]
['medical', ['check']]
['check', ['medical', 'up']]
['staff', ['friendly']]
['friendly', ['staff']]
['nsf', ['cool']]
['cool', ['nsf']]
['overall', ['had']]
['good', ['experience']]
['experience', ['good', 'had']]
Review Number :  29
[['gold', ['star']], ['star', ['gold']], ['public', ['service']], ['service', ['public']]]
['gold', ['star']]
['star', ['gold']]
['public', ['service']]
['service', ['public']]
Review Number :  30


 60%|████████████████████████████████████████████████▉                                 | 31/52 [00:10<00:05,  3.53it/s]

[['hrs', []], ['form', []], ['meeting', []], ['even', ['conditioning']], ['really', ['bored']], ['bored', ['really', 'conditioning']], ['cold', ['conditioning']], ['air', ['conditioning']], ['wifi', []]]
['hrs', []]
['form', []]
['meeting', []]
['even', ['conditioning']]
['really', ['bored']]
['bored', ['really', 'conditioning']]
['cold', ['conditioning']]
['air', ['conditioning']]
['wifi', []]
Review Number :  31
[['accessible', ['not']]]
['accessible', ['not']]
Review Number :  32


 63%|████████████████████████████████████████████████████                              | 33/52 [00:10<00:04,  4.63it/s]

[['extremely', ['poor']], ['poor', ['extremely', 'customerservice']], ['rude', []], ['customerservice', ['poor']]]
['extremely', ['poor']]
['poor', ['extremely', 'customerservice']]
['rude', []]
['customerservice', ['poor']]
Review Number :  33
[['worst', ['day']], ['day', ['worst']], ['life', []]]
['worst', ['day']]
['day', ['worst']]
['life', []]
Review Number :  34


 67%|███████████████████████████████████████████████████████▏                          | 35/52 [00:10<00:03,  5.33it/s]

[['wooo', []], ['real', ['edgy']], ['edgy', ['real', 'ziyuan']], ['ziyuan', ['edgy', 'writer']], ['novel', ['writer']], ['writer', ['ziyuan', 'you', 'novel']]]
['wooo', []]
['real', ['edgy']]
['edgy', ['real', 'ziyuan']]
['ziyuan', ['edgy', 'writer']]
['novel', ['writer']]
['writer', ['ziyuan', 'you', 'novel']]
Review Number :  35


 71%|██████████████████████████████████████████████████████████▎                       | 37/52 [00:10<00:02,  5.54it/s]

[['highly', ['inaccessible']], ['inaccessible', ['highly']], ['hard', ['so', 'get']], ['get', ['hard', 'there']], ['mrt', ['stations']], ['stations', ['not', 'mrt']]]
['highly', ['inaccessible']]
['inaccessible', ['highly']]
['hard', ['so', 'get']]
['get', ['hard', 'there']]
['mrt', ['stations']]
['stations', ['not', 'mrt']]
Review Number :  36
[['bane', []], ['existence', []]]
['bane', []]
['existence', []]
Review Number :  37


 75%|█████████████████████████████████████████████████████████████▌                    | 39/52 [00:11<00:02,  4.58it/s]

[['cookhouse', []], ['book', ['get']], ['everyday', ['thing']], ['troublesome', ['most', 'thing']], ['thing', ['everyday', 'troublesome', 'discussing']], ['whats', ['discussing']], ['sidenotecanteen', ['lunchsidenote', 'canteenb']], ['canteenb', ['sidenotecanteen', 'bad']], ['bad', ['canteenb', 'real']], ['real', ['bad']], ['bad', ['canteenb', 'real']]]
['cookhouse', []]
['book', ['get']]
['everyday', ['thing']]
['troublesome', ['most', 'thing']]
['thing', ['everyday', 'troublesome', 'discussing']]
['whats', ['discussing']]
['sidenotecanteen', ['lunchsidenote', 'canteenb']]
['canteenb', ['sidenotecanteen', 'bad']]
['bad', ['canteenb', 'real']]
['real', ['bad']]
['bad', ['canteenb', 'real']]
Review Number :  38
[['people', ['go']], ['even', ['go']], ['placemiddle', []], ['nowhere', []]]
['people', ['go']]
['even', ['go']]
['placemiddle', []]
['nowhere', []]
Review Number :  39


 77%|███████████████████████████████████████████████████████████████                   | 40/52 [00:11<00:02,  4.04it/s]

[['officertalk', ['prepared']], ['money', ['own']], ['hard', ['very']], ['middle', []], ['village', []], ['something', []]]
['officertalk', ['prepared']]
['money', ['own']]
['hard', ['very']]
['middle', []]
['village', []]
['something', []]
Review Number :  40
[['rude', ['staff']], ['staff', ['rude']]]
['rude', ['staff']]
['staff', ['rude']]
Review Number :  41


 83%|███████████████████████████████████████████████████████████████████▊              | 43/52 [00:12<00:01,  5.36it/s]

[['inaccessible', ['need']], ['need', ['inaccessible']], ['h', []], ['time', ['travel']], ['waste', ['more']], ['time', ['travel']]]
['inaccessible', ['need']]
['need', ['inaccessible']]
['h', []]
['time', ['travel']]
['waste', ['more']]
['time', ['travel']]
Review Number :  42
[['far', ['so', 'away']], ['away', ['far']], ['middle', []], ['nowhere', []]]
['far', ['so', 'away']]
['away', ['far']]
['middle', []]
['nowhere', []]
Review Number :  43


 87%|██████████████████████████████████████████████████████████████████████▉           | 45/52 [00:12<00:01,  5.64it/s]

[['bad', ['very', 'troopers', 'very', 'attitude']], ['security', ['troopers']], ['troopers', ['bad', 'security', 'have']], ['bad', ['very', 'troopers', 'very', 'attitude']], ['attitude', ['bad', 'have']], ['towards', []], ['public', []]]
['bad', ['very', 'troopers', 'very', 'attitude']]
['security', ['troopers']]
['troopers', ['bad', 'security', 'have']]
['bad', ['very', 'troopers', 'very', 'attitude']]
['attitude', ['bad', 'have']]
['towards', []]
['public', []]
Review Number :  44
[['middle', []], ['nowhere', []]]
['middle', []]
['nowhere', []]
Review Number :  45


 88%|████████████████████████████████████████████████████████████████████████▌         | 46/52 [00:12<00:01,  5.33it/s]

[['place', []], ['well', ['kept']], ['people', ['kept']], ['unbelievably', ['rude']]]
['place', []]
['well', ['kept']]
['people', ['kept']]
['unbelievably', ['rude']]
Review Number :  46
[['bad', ['service']], ['service', ['bad']]]
['bad', ['service']]
['service', ['bad']]
Review Number :  47


 92%|███████████████████████████████████████████████████████████████████████████▋      | 48/52 [00:13<00:00,  6.12it/s]

[['interestingly', ['enough']], ['enough', ['interestingly', 'removed']], ['negative', ['reviews']], ['reviews', ['negative']]]
['interestingly', ['enough']]
['enough', ['interestingly', 'removed']]
['negative', ['reviews']]
['reviews', ['negative']]
Review Number :  48
[['bad', ['service']], ['service', ['bad']]]
['bad', ['service']]
['service', ['bad']]
Review Number :  49


 98%|████████████████████████████████████████████████████████████████████████████████▍ | 51/52 [00:13<00:00,  3.78it/s]

[['sheat', ['treated']], ['dirty', ['pigs']], ['pigs', ['dirty', 'training']], ['step', []], ['minefields', []]]
['sheat', ['treated']]
['dirty', ['pigs']]
['pigs', ['dirty', 'training']]
['step', []]
['minefields', []]
Review Number :  50
[['gncpresent', []]]
['gncpresent', []]
Review Number :  51





In [319]:
fin = get_sentiment(a, b, nlp)

  0%|                                                                                          | 0/199 [00:00<?, ?it/s]

Calculating Sentiment for:  professional


  1%|▍                                                                                 | 1/199 [00:01<03:34,  1.08s/it]

Calculating Sentiment for:  people


  1%|▊                                                                                 | 2/199 [00:05<10:14,  3.12s/it]

Calculating Sentiment for:  patient


  2%|█▏                                                                                | 3/199 [00:06<06:08,  1.88s/it]

Calculating Sentiment for:  kind


  2%|█▋                                                                                | 4/199 [00:07<06:03,  1.86s/it]

Calculating Sentiment for:  respectful


  3%|██                                                                                | 5/199 [00:08<04:27,  1.38s/it]

Calculating Sentiment for:  smooth


  3%|██▍                                                                               | 6/199 [00:09<03:40,  1.14s/it]

Calculating Sentiment for:  medical


  4%|██▉                                                                               | 7/199 [00:16<09:45,  3.05s/it]

Calculating Sentiment for:  checkup


  4%|███▎                                                                              | 8/199 [00:16<06:56,  2.18s/it]

Calculating Sentiment for:  reviews


  5%|███▋                                                                              | 9/199 [00:18<07:16,  2.30s/it]

Calculating Sentiment for:  adequately


  5%|████                                                                             | 10/199 [00:19<05:34,  1.77s/it]

Calculating Sentiment for:  anywhere


  6%|████▍                                                                            | 11/199 [00:20<04:36,  1.47s/it]

Calculating Sentiment for:  else


  6%|████▉                                                                            | 12/199 [00:20<03:32,  1.14s/it]

Calculating Sentiment for:  place


  7%|█████▎                                                                           | 13/199 [00:21<03:39,  1.18s/it]

Calculating Sentiment for:  really


  7%|█████▋                                                                           | 14/199 [00:23<03:51,  1.25s/it]

Calculating Sentiment for:  clean


  8%|██████                                                                           | 15/199 [00:23<03:12,  1.04s/it]

Calculating Sentiment for:  overall


  8%|██████▌                                                                          | 16/199 [00:25<03:31,  1.16s/it]

Calculating Sentiment for:  fine


  9%|██████▉                                                                          | 17/199 [00:25<02:52,  1.05it/s]

Calculating Sentiment for:  good


  9%|███████▎                                                                         | 18/199 [00:27<03:44,  1.24s/it]

Calculating Sentiment for:  experience


 10%|████████▏                                                                        | 20/199 [00:31<03:55,  1.32s/it]

Calculating Sentiment for:  tip


 11%|████████▌                                                                        | 21/199 [00:31<03:05,  1.04s/it]

Calculating Sentiment for:  u
Calculating Sentiment for:  checkupdont


 11%|████████▉                                                                        | 22/199 [00:31<02:26,  1.21it/s]

Calculating Sentiment for:  late
Calculating Sentiment for:  back


 12%|█████████▊                                                                       | 24/199 [00:32<01:29,  1.96it/s]

Calculating Sentiment for:  day


 13%|██████████▏                                                                      | 25/199 [00:32<01:28,  1.96it/s]

Calculating Sentiment for:  complete


 13%|██████████▌                                                                      | 26/199 [00:33<01:38,  1.75it/s]

Calculating Sentiment for:  rest


 14%|██████████▉                                                                      | 27/199 [00:33<01:38,  1.75it/s]

Calculating Sentiment for:  gateentrance


 14%|███████████▍                                                                     | 28/199 [00:34<01:36,  1.78it/s]

Calculating Sentiment for:  entrancesecurity


 15%|███████████▊                                                                     | 29/199 [00:34<01:35,  1.79it/s]

Calculating Sentiment for:  checkcounter


 16%|████████████▌                                                                    | 31/199 [00:37<02:01,  1.39it/s]

Calculating Sentiment for:  stickerpass
Calculating Sentiment for:  walk


 16%|█████████████                                                                    | 32/199 [00:37<01:34,  1.78it/s]

Calculating Sentiment for:  gatedont
Calculating Sentiment for:  mei


 17%|█████████████▊                                                                   | 34/199 [00:37<01:02,  2.66it/s]

Calculating Sentiment for:  scan


 18%|██████████████▋                                                                  | 36/199 [00:37<00:47,  3.46it/s]

Calculating Sentiment for:  dontbother
Calculating Sentiment for:  end


 19%|███████████████                                                                  | 37/199 [00:38<01:10,  2.31it/s]

Calculating Sentiment for:  hours


 19%|███████████████▍                                                                 | 38/199 [00:39<01:08,  2.35it/s]

Calculating Sentiment for:  realise


 20%|███████████████▊                                                                 | 39/199 [00:39<01:19,  2.01it/s]

Calculating Sentiment for:  youre


 20%|████████████████▎                                                                | 40/199 [00:40<01:10,  2.25it/s]

Calculating Sentiment for:  last


 21%|████████████████▋                                                                | 41/199 [00:40<01:02,  2.54it/s]

Calculating Sentiment for:  person


 21%|█████████████████                                                                | 42/199 [00:40<01:03,  2.46it/s]

Calculating Sentiment for:  line


 22%|█████████████████▉                                                               | 44/199 [00:41<00:50,  3.07it/s]

Calculating Sentiment for:  doctors
Calculating Sentiment for:  fair


 23%|██████████████████▋                                                              | 46/199 [00:41<00:40,  3.77it/s]

Calculating Sentiment for:  choice


 24%|███████████████████▏                                                             | 47/199 [00:43<01:57,  1.29it/s]

Calculating Sentiment for:  waste
Calculating Sentiment for:  time


 24%|███████████████████▌                                                             | 48/199 [00:45<02:34,  1.02s/it]

Calculating Sentiment for:  nsf


 25%|███████████████████▉                                                             | 49/199 [00:46<02:53,  1.16s/it]

Calculating Sentiment for:  extremely


 25%|████████████████████▎                                                            | 50/199 [00:47<02:28,  1.01it/s]

Calculating Sentiment for:  long


 26%|████████████████████▊                                                            | 51/199 [00:48<02:46,  1.13s/it]

Calculating Sentiment for:  staff


 26%|█████████████████████▏                                                           | 52/199 [00:54<05:51,  2.39s/it]

Calculating Sentiment for:  bad


 27%|█████████████████████▌                                                           | 53/199 [00:59<07:58,  3.28s/it]

Calculating Sentiment for:  personal


 27%|█████████████████████▉                                                           | 54/199 [01:00<06:30,  2.70s/it]

Calculating Sentiment for:  experienceeveryone


 28%|██████████████████████▍                                                          | 55/199 [01:01<04:46,  1.99s/it]

Calculating Sentiment for:  helpful


 29%|███████████████████████▏                                                         | 57/199 [01:01<02:36,  1.10s/it]

Calculating Sentiment for:  initiative


 29%|███████████████████████▌                                                         | 58/199 [01:02<01:59,  1.18it/s]

Calculating Sentiment for:  help
Calculating Sentiment for:  unfriendly


 30%|████████████████████████                                                         | 59/199 [01:02<01:43,  1.36it/s]

Calculating Sentiment for:  guards


 30%|████████████████████████▍                                                        | 60/199 [01:03<01:37,  1.43it/s]

Calculating Sentiment for:  job


 31%|████████████████████████▊                                                        | 61/199 [01:03<01:18,  1.76it/s]

Calculating Sentiment for:  stuff


 31%|█████████████████████████▏                                                       | 62/199 [01:04<01:23,  1.64it/s]

Calculating Sentiment for:  suppose


 32%|█████████████████████████▋                                                       | 63/199 [01:04<01:34,  1.44it/s]

Calculating Sentiment for:  absolutely


 32%|██████████████████████████                                                       | 64/199 [01:05<01:22,  1.64it/s]

Calculating Sentiment for:  atrocious


 33%|██████████████████████████▍                                                      | 65/199 [01:05<01:15,  1.77it/s]

Calculating Sentiment for:  woman
Calculating Sentiment for:  keeps


 34%|███████████████████████████▎                                                     | 67/199 [01:06<01:06,  1.99it/s]

Calculating Sentiment for:  tone


 35%|████████████████████████████                                                     | 69/199 [01:07<01:06,  1.94it/s]

Calculating Sentiment for:  talks
Calculating Sentiment for:  always


 35%|████████████████████████████▍                                                    | 70/199 [01:08<00:59,  2.16it/s]

Calculating Sentiment for:  mask


 36%|█████████████████████████████▎                                                   | 72/199 [01:08<00:44,  2.87it/s]

Calculating Sentiment for:  expressions
Calculating Sentiment for:  screening


 37%|█████████████████████████████▋                                                   | 73/199 [01:09<00:55,  2.25it/s]

Calculating Sentiment for:  station


 37%|██████████████████████████████                                                   | 74/199 [01:09<00:53,  2.31it/s]

Calculating Sentiment for:  rude


 38%|██████████████████████████████▌                                                  | 75/199 [01:11<01:32,  1.34it/s]

Calculating Sentiment for:  serious


 38%|██████████████████████████████▉                                                  | 76/199 [01:11<01:19,  1.55it/s]

Calculating Sentiment for:  impatient


 39%|███████████████████████████████▋                                                 | 78/199 [01:12<01:03,  1.89it/s]

Calculating Sentiment for:  undesirably


 40%|████████████████████████████████▏                                                | 79/199 [01:13<01:21,  1.48it/s]

Calculating Sentiment for:  times
Calculating Sentiment for:  cmpb


 40%|████████████████████████████████▌                                                | 80/199 [01:14<01:11,  1.67it/s]

Calculating Sentiment for:  sent


 41%|████████████████████████████████▉                                                | 81/199 [01:14<01:02,  1.87it/s]

Calculating Sentiment for:  son


 41%|█████████████████████████████████▍                                               | 82/199 [01:15<01:11,  1.64it/s]

Calculating Sentiment for:  preenlistment


 42%|█████████████████████████████████▊                                               | 83/199 [01:15<01:15,  1.53it/s]

Calculating Sentiment for:  enlistmentcheckup


 43%|██████████████████████████████████▌                                              | 85/199 [01:16<00:55,  2.04it/s]

Calculating Sentiment for:  guard
Calculating Sentiment for:  clear


 43%|███████████████████████████████████                                              | 86/199 [01:17<00:57,  1.98it/s]

Calculating Sentiment for:  instructions


 44%|███████████████████████████████████▊                                             | 88/199 [01:17<00:42,  2.58it/s]

Calculating Sentiment for:  alight
Calculating Sentiment for:  drive


 45%|████████████████████████████████████▋                                            | 90/199 [01:18<00:36,  2.98it/s]

Calculating Sentiment for:  train
Calculating Sentiment for:  army


 46%|█████████████████████████████████████                                            | 91/199 [01:18<00:32,  3.30it/s]

Calculating Sentiment for:  inconvenient


 46%|█████████████████████████████████████▍                                           | 92/199 [01:19<00:43,  2.43it/s]

Calculating Sentiment for:  locations


 47%|██████████████████████████████████████▎                                          | 94/199 [01:20<00:44,  2.35it/s]

Calculating Sentiment for:  ever


 48%|██████████████████████████████████████▋                                          | 95/199 [01:20<00:45,  2.26it/s]

Calculating Sentiment for:  terrible
Calculating Sentiment for:  directions


 48%|███████████████████████████████████████                                          | 96/199 [01:21<00:50,  2.02it/s]

Calculating Sentiment for:  whole


 49%|███████████████████████████████████████▍                                         | 97/199 [01:21<00:45,  2.24it/s]

Calculating Sentiment for:  process


 49%|███████████████████████████████████████▉                                         | 98/199 [01:22<00:48,  2.07it/s]

Calculating Sentiment for:  take


 50%|████████████████████████████████████████▎                                        | 99/199 [01:23<00:56,  1.76it/s]

Calculating Sentiment for:  ok


 50%|████████████████████████████████████████▏                                       | 100/199 [01:24<01:26,  1.15it/s]

Calculating Sentiment for:  maybe


 51%|████████████████████████████████████████▌                                       | 101/199 [01:24<01:07,  1.46it/s]

Calculating Sentiment for:  sikit


 52%|█████████████████████████████████████████▍                                      | 103/199 [01:25<00:45,  2.11it/s]

Calculating Sentiment for:  typical
Calculating Sentiment for:  tio


 52%|█████████████████████████████████████████▊                                      | 104/199 [01:26<00:44,  2.11it/s]

Calculating Sentiment for:  switch


 53%|██████████████████████████████████████████▏                                     | 105/199 [01:26<00:44,  2.10it/s]

Calculating Sentiment for:  vocationmo


 53%|██████████████████████████████████████████▌                                     | 106/199 [01:26<00:44,  2.08it/s]

Calculating Sentiment for:  hand


 54%|███████████████████████████████████████████▍                                    | 108/199 [01:27<00:31,  2.91it/s]

Calculating Sentiment for:  probably


 55%|███████████████████████████████████████████▊                                    | 109/199 [01:27<00:31,  2.83it/s]

Calculating Sentiment for:  visit
Calculating Sentiment for:  contrary


 55%|████████████████████████████████████████████▏                                   | 110/199 [01:28<00:28,  3.09it/s]

Calculating Sentiment for:  others


 56%|█████████████████████████████████████████████                                   | 112/199 [01:28<00:24,  3.62it/s]

Calculating Sentiment for:  sure


 57%|█████████████████████████████████████████████▍                                  | 113/199 [01:29<00:42,  2.01it/s]

Calculating Sentiment for:  conditions
Calculating Sentiment for:  severe


 57%|█████████████████████████████████████████████▊                                  | 114/199 [01:30<00:40,  2.07it/s]

Calculating Sentiment for:  officer


 58%|██████████████████████████████████████████████▏                                 | 115/199 [01:30<00:36,  2.29it/s]

Calculating Sentiment for:  fare


 58%|██████████████████████████████████████████████▋                                 | 116/199 [01:30<00:35,  2.37it/s]

Calculating Sentiment for:  well


 59%|███████████████████████████████████████████████                                 | 117/199 [01:31<00:34,  2.37it/s]

Calculating Sentiment for:  specialistletter


 59%|███████████████████████████████████████████████▍                                | 118/199 [01:31<00:36,  2.22it/s]

Calculating Sentiment for:  ask


 60%|███████████████████████████████████████████████▊                                | 119/199 [01:34<01:27,  1.09s/it]

Calculating Sentiment for:  question


 60%|████████████████████████████████████████████████▏                               | 120/199 [01:34<01:17,  1.02it/s]

Calculating Sentiment for:  ignore


 61%|████████████████████████████████████████████████▋                               | 121/199 [01:35<01:08,  1.14it/s]

Calculating Sentiment for:  nice


 61%|█████████████████████████████████████████████████                               | 122/199 [01:36<01:13,  1.05it/s]

Calculating Sentiment for:  friendly


 62%|█████████████████████████████████████████████████▍                              | 123/199 [01:36<00:56,  1.35it/s]

Calculating Sentiment for:  great


 62%|█████████████████████████████████████████████████▊                              | 124/199 [01:38<01:05,  1.15it/s]

Calculating Sentiment for:  medic


 63%|██████████████████████████████████████████████████▎                             | 125/199 [01:38<00:57,  1.28it/s]

Calculating Sentiment for:  check


 63%|██████████████████████████████████████████████████▋                             | 126/199 [01:40<01:15,  1.03s/it]

Calculating Sentiment for:  constantly


 64%|███████████████████████████████████████████████████                             | 127/199 [01:40<00:59,  1.21it/s]

Calculating Sentiment for:  patience


 64%|███████████████████████████████████████████████████▍                            | 128/199 [01:41<00:51,  1.38it/s]

Calculating Sentiment for:  bully
Calculating Sentiment for:  meh


 66%|████████████████████████████████████████████████████▋                           | 131/199 [01:41<00:27,  2.49it/s]

Calculating Sentiment for:  pretty


 66%|█████████████████████████████████████████████████████                           | 132/199 [01:42<00:25,  2.58it/s]

Calculating Sentiment for:  lousy
Calculating Sentiment for:  service


 67%|█████████████████████████████████████████████████████▍                          | 133/199 [01:43<00:41,  1.60it/s]

Calculating Sentiment for:  dk


 68%|██████████████████████████████████████████████████████▎                         | 135/199 [01:43<00:27,  2.34it/s]

Calculating Sentiment for:  also
Calculating Sentiment for:  taiji


 68%|██████████████████████████████████████████████████████▋                         | 136/199 [01:44<00:25,  2.50it/s]

Calculating Sentiment for:  negative


 69%|███████████████████████████████████████████████████████                         | 137/199 [01:45<00:36,  1.71it/s]

Calculating Sentiment for:  share


 69%|███████████████████████████████████████████████████████▍                        | 138/199 [01:45<00:32,  1.90it/s]

Calculating Sentiment for:  quick


 70%|███████████████████████████████████████████████████████▉                        | 139/199 [01:45<00:27,  2.16it/s]

Calculating Sentiment for:  opinion


 70%|████████████████████████████████████████████████████████▎                       | 140/199 [01:46<00:35,  1.67it/s]

Calculating Sentiment for:  perhaps


 71%|████████████████████████████████████████████████████████▋                       | 141/199 [01:47<00:29,  2.00it/s]

Calculating Sentiment for:  varies


 71%|█████████████████████████████████████████████████████████                       | 142/199 [01:47<00:27,  2.08it/s]

Calculating Sentiment for:  trip


 72%|█████████████████████████████████████████████████████████▍                      | 143/199 [01:47<00:22,  2.45it/s]

Calculating Sentiment for:  ultimately


 72%|█████████████████████████████████████████████████████████▉                      | 144/199 [01:47<00:19,  2.79it/s]

Calculating Sentiment for:  definitely


 73%|██████████████████████████████████████████████████████████▎                     | 145/199 [01:48<00:20,  2.60it/s]

Calculating Sentiment for:  memorable


 73%|██████████████████████████████████████████████████████████▋                     | 146/199 [01:48<00:21,  2.48it/s]

Calculating Sentiment for:  sessions


 74%|███████████████████████████████████████████████████████████                     | 147/199 [01:49<00:32,  1.61it/s]

Calculating Sentiment for:  chargeinconvenient


 74%|███████████████████████████████████████████████████████████▍                    | 148/199 [01:50<00:35,  1.42it/s]

Calculating Sentiment for:  inconvenientlocation


 75%|████████████████████████████████████████████████████████████▎                   | 150/199 [01:52<00:30,  1.62it/s]

Calculating Sentiment for:  cookhouse
Calculating Sentiment for:  meagre


 76%|████████████████████████████████████████████████████████████▋                   | 151/199 [01:52<00:22,  2.10it/s]

Calculating Sentiment for:  pay


 77%|█████████████████████████████████████████████████████████████▌                  | 153/199 [01:52<00:16,  2.85it/s]

Calculating Sentiment for:  officerattitude


 77%|█████████████████████████████████████████████████████████████▉                  | 154/199 [01:53<00:15,  2.94it/s]

Calculating Sentiment for:  use
Calculating Sentiment for:  many


 78%|██████████████████████████████████████████████████████████████▋                 | 156/199 [01:53<00:12,  3.36it/s]

Calculating Sentiment for:  cool
Calculating Sentiment for:  gold


 79%|███████████████████████████████████████████████████████████████▌                | 158/199 [01:53<00:09,  4.16it/s]

Calculating Sentiment for:  star
Calculating Sentiment for:  public


 80%|███████████████████████████████████████████████████████████████▉                | 159/199 [01:54<00:10,  3.72it/s]

Calculating Sentiment for:  even


 80%|████████████████████████████████████████████████████████████████▎               | 160/199 [01:55<00:15,  2.52it/s]

Calculating Sentiment for:  bored


 81%|████████████████████████████████████████████████████████████████▋               | 161/199 [01:55<00:20,  1.89it/s]

Calculating Sentiment for:  cold


 81%|█████████████████████████████████████████████████████████████████▏              | 162/199 [01:56<00:19,  1.89it/s]

Calculating Sentiment for:  air


 82%|█████████████████████████████████████████████████████████████████▉              | 164/199 [01:57<00:14,  2.44it/s]

Calculating Sentiment for:  accessible
Calculating Sentiment for:  poor


 83%|██████████████████████████████████████████████████████████████████▋             | 166/199 [01:58<00:16,  2.04it/s]

Calculating Sentiment for:  customerservice
Calculating Sentiment for:  worst


 84%|███████████████████████████████████████████████████████████████████▏            | 167/199 [01:58<00:12,  2.58it/s]

Calculating Sentiment for:  real


 84%|███████████████████████████████████████████████████████████████████▌            | 168/199 [01:58<00:11,  2.60it/s]

Calculating Sentiment for:  edgy


 85%|███████████████████████████████████████████████████████████████████▉            | 169/199 [01:59<00:12,  2.39it/s]

Calculating Sentiment for:  ziyuan


 85%|████████████████████████████████████████████████████████████████████▎           | 170/199 [01:59<00:12,  2.25it/s]

Calculating Sentiment for:  novel


 86%|████████████████████████████████████████████████████████████████████▋           | 171/199 [02:00<00:11,  2.53it/s]

Calculating Sentiment for:  writer


 86%|█████████████████████████████████████████████████████████████████████▏          | 172/199 [02:00<00:13,  2.00it/s]

Calculating Sentiment for:  highly


 87%|█████████████████████████████████████████████████████████████████████▌          | 173/199 [02:01<00:13,  1.94it/s]

Calculating Sentiment for:  inaccessible


 87%|█████████████████████████████████████████████████████████████████████▉          | 174/199 [02:02<00:13,  1.90it/s]

Calculating Sentiment for:  hard


 88%|██████████████████████████████████████████████████████████████████████▎         | 175/199 [02:02<00:12,  1.88it/s]

Calculating Sentiment for:  get


 88%|██████████████████████████████████████████████████████████████████████▊         | 176/199 [02:03<00:11,  1.96it/s]

Calculating Sentiment for:  mrt


 89%|███████████████████████████████████████████████████████████████████████▏        | 177/199 [02:03<00:10,  2.15it/s]

Calculating Sentiment for:  stations


 90%|███████████████████████████████████████████████████████████████████████▉        | 179/199 [02:03<00:06,  2.98it/s]

Calculating Sentiment for:  book
Calculating Sentiment for:  everyday


 90%|████████████████████████████████████████████████████████████████████████▎       | 180/199 [02:04<00:05,  3.23it/s]

Calculating Sentiment for:  troublesome


 91%|████████████████████████████████████████████████████████████████████████▊       | 181/199 [02:04<00:06,  2.84it/s]

Calculating Sentiment for:  thing


 91%|█████████████████████████████████████████████████████████████████████████▏      | 182/199 [02:05<00:11,  1.54it/s]

Calculating Sentiment for:  whats


 92%|█████████████████████████████████████████████████████████████████████████▌      | 183/199 [02:06<00:09,  1.69it/s]

Calculating Sentiment for:  sidenotecanteen


 92%|█████████████████████████████████████████████████████████████████████████▉      | 184/199 [02:07<00:10,  1.43it/s]

Calculating Sentiment for:  canteenb


 93%|██████████████████████████████████████████████████████████████████████████▎     | 185/199 [02:08<00:10,  1.33it/s]

Calculating Sentiment for:  officertalk


 94%|███████████████████████████████████████████████████████████████████████████▏    | 187/199 [02:08<00:05,  2.04it/s]

Calculating Sentiment for:  money
Calculating Sentiment for:  need


 94%|███████████████████████████████████████████████████████████████████████████▌    | 188/199 [02:09<00:05,  1.96it/s]

Calculating Sentiment for:  far


 95%|████████████████████████████████████████████████████████████████████████████▍   | 190/199 [02:09<00:03,  2.79it/s]

Calculating Sentiment for:  away
Calculating Sentiment for:  security


 96%|████████████████████████████████████████████████████████████████████████████▊   | 191/199 [02:10<00:02,  2.78it/s]

Calculating Sentiment for:  troopers


 96%|█████████████████████████████████████████████████████████████████████████████▏  | 192/199 [02:10<00:03,  2.04it/s]

Calculating Sentiment for:  attitude


 97%|█████████████████████████████████████████████████████████████████████████████▉  | 194/199 [02:11<00:01,  2.65it/s]

Calculating Sentiment for:  unbelievably


 98%|██████████████████████████████████████████████████████████████████████████████▍ | 195/199 [02:11<00:01,  2.86it/s]

Calculating Sentiment for:  interestingly
Calculating Sentiment for:  enough


 98%|██████████████████████████████████████████████████████████████████████████████▊ | 196/199 [02:12<00:01,  1.85it/s]

Calculating Sentiment for:  sheat


 99%|███████████████████████████████████████████████████████████████████████████████▌| 198/199 [02:13<00:00,  2.62it/s]

Calculating Sentiment for:  dirty
Calculating Sentiment for:  pigs


100%|████████████████████████████████████████████████████████████████████████████████| 199/199 [02:13<00:00,  1.49it/s]


In [328]:
fin

Unnamed: 0,Avg_sent,Descriptors,Freq
staff,0.875000,"professional ,unfriendly ,doing ,keep ,unfrien...",11
people,0.884956,"professional ,recommended ,there ,suggest ,her...",10
medical,0.917160,"checkup ,checkupdont ,screening ,conditions ,o...",10
bad,0.875912,"reviews ,its ,not ,that ,canteenb ,real ,cante...",8
reviews,0.878788,"other ,suggest ,nsf ,bad ,what ,negative ,nega...",6
...,...,...,...
process,0.923077,"whole ,expect",1
take,0.944444,"hours ,recommended",1
gateentrance,1.000000,checkcounter,1
maybe,1.000000,sikit,1


In [341]:
def get_tfidf_features(df, content_str = "Content", min_ = 2, max_ = 0.5, ngramrange = (1,2)):
    
    # Replace "" with nan's for removal
    #df[content_str].replace('', np.nan, inplace=True)
    #df.dropna(subset=[content_str], inplace=True)
    #stop_words = set(stopwords.words('english'))
    #df[content_str] = df[content_str].apply(lambda x: ''.join([word for word in x.split() if word not in (stop_words)]))
    
    review_list = df[content_str].to_list()
    #feat_count = dict()
    #feat_sent = dict()
    #nlp = stanza.Pipeline('en')

        
    #print(review_list)
    tfidf = TfidfVectorizer(min_df = min_, max_df = max_, ngramrange);
    features = tfidf.fit_transform(review_list);
    q = pd.DataFrame(features.todense(), columns=tfidf.get_feature_names())
    
    return list(q.columns)

In [342]:
def refine_features(originaldf, sentimentdf):
    tfidf_output = get_tfidf_features(originaldf)
    sentimentdf = sentimentdf.reset_index()
    ft_extract = set(sentimentdf['index']);
    tfidf_extract = set(tfidf_output)
    
    intersecting_features = ft_extract.intersection(tfidf_extract)
    
    return_df = sentimentdf
    return_df = return_df.loc[return_df['index'].isin(list(intersecting_features))]
    print("Number of extracted features:")
    print("Initial = ", len(ft_extract), " TFIDF = ", len(intersecting_features), " Final after intersection = ", return_df.shape[0])
    return return_df
    
    

In [343]:
refine_features(rdr, fin)

Number of extracted features:
Initial =  199  TFIDF =  176  Final after intersection =  176




Unnamed: 0,index,Avg_sent,Descriptors,Freq
0,staff,0.875000,"professional ,unfriendly ,doing ,keep ,unfrien...",11
1,people,0.884956,"professional ,recommended ,there ,suggest ,her...",10
2,medical,0.917160,"checkup ,checkupdont ,screening ,conditions ,o...",10
3,bad,0.875912,"reviews ,its ,not ,that ,canteenb ,real ,cante...",8
4,reviews,0.878788,"other ,suggest ,nsf ,bad ,what ,negative ,nega...",6
...,...,...,...,...
193,whole,1.000000,process,1
194,process,0.923077,"whole ,expect",1
195,take,0.944444,"hours ,recommended",1
197,maybe,1.000000,sikit,1
