# Sentiment Analysis

## 1. Importing Required Libraries

In [None]:
import pandas as pd # For data operations
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as sia # For sentiment analysis
import re # For regular expressions
from nltk.corpus import stopwords # For stopwords removal
from nltk.tokenize import word_tokenize # For tokenization
from wordcloud import WordCloud # For creating a wordcloud
from nltk.probability import FreqDist # For token frequency distribution
import matplotlib.pyplot as plt # For token frequency visualization
import seaborn as sns # For explorartory analysis
from nltk.stem.wordnet import WordNetLemmatizer # For noise removal
from nltk import pos_tag # For identifying word group
import plotly.express as pl # For sentiment visualization
import numpy as np # For array operations
from sklearn.naive_bayes import MultinomialNB # For creating a multinomial naive bayes model
from sklearn.naive_bayes import ComplementNB # For creating a compliment naive bayes model
from sklearn.naive_bayes import GaussianNB # For creating a gaussian naive bayes model
from sklearn.naive_bayes import BernoulliNB # For creating a bernoulli naive bayes model
from sklearn.linear_model import LogisticRegression, SGDClassifier # For creating a logistic regression and a stochastic gradient descent model
from sklearn.svm import LinearSVC # For creating a svc model
from sklearn.tree import DecisionTreeClassifier # For creating a decision tree model
from sklearn.ensemble import RandomForestClassifier # For creating a random forest tree model
from sklearn.model_selection import train_test_split # For data split
from sklearn.feature_extraction.text import CountVectorizer # For converting data into array
from sklearn.metrics import accuracy_score # For computing model accuracy
from sklearn.pipeline import Pipeline # For creating pipeline
from sklearn.preprocessing import MinMaxScaler # For scaling
from sklearn.feature_extraction.text import TfidfVectorizer # For counting term frequency in topic modelling
from sklearn.decomposition import NMF, LatentDirichletAllocation # For topic modelling using NMF and LDA
from dateutil.parser import parse # For parsing dates
import nltk # For language processing
import json # For creating a json file
from nltk.corpus import wordnet # For stemming
import string # For string operations
import spacy # For dominant topic modelling
import gensim # For gensim model
from gensim import corpora # For dictionary
import pyLDAvis # For plotting topics
import pyLDAvis.gensim_models as gensimv # For creating and plotting gensim topics
from gensim.models.coherencemodel import CoherenceModel # For creating coherence model
%matplotlib inline
import warnings # To disable warnings
warnings.filterwarnings('ignore')

## 2. Text Pre-processing

### 2.1 Reading The Dataset

In [None]:
data = pd.read_csv('sentiment_data.csv') # Data is read and stored into a variable named data

In [None]:
data.head(10) # Displaying first 10 rows

### 2.2 Extracting Column Names

In [None]:
columns = list(data.columns.values)
columns

### 2.3 Extracting Reviews For NPS Score

In [None]:
data_reason_for_nps_score = data[['ID','MEMBER_ID','REASONSCORE','NPS','SURVEYDATE','SURVEYCLINIC','CLINICCITY','CLINICSTATE']]
data_reason_for_nps_score = data_reason_for_nps_score[data_reason_for_nps_score['REASONSCORE'] != 'No Response']
data_reason_for_nps_score.rename({'REASONSCORE': 'reviews'}, axis=1, inplace=True)
data_reason_for_nps_score.rename({'SURVEYDATE': 'date'}, axis=1, inplace=True)
data_reason_for_nps_score.rename({'SURVEYCLINIC': 'clinic'}, axis=1, inplace=True)
data_reason_for_nps_score.rename({'CLINICCITY': 'city'}, axis=1, inplace=True)
data_reason_for_nps_score.rename({'CLINICSTATE': 'state'}, axis=1, inplace=True)
data_reason_for_nps_score['reviews'] = data_reason_for_nps_score['reviews'].str.replace('!st time','first time')

remove_punctuations_reason_for_nps_score = data_reason_for_nps_score[['reviews']]
remove_punctuations_reason_for_nps_score.replace("[^a-zA-Z]"," ",regex=True, inplace=True)
data_reason_for_nps_score[['reviews']] = remove_punctuations_reason_for_nps_score[['reviews']]

data_reason_for_nps_score = data_reason_for_nps_score.reset_index(drop=True)
data_reason_for_nps_score

In [None]:
plt.bar(range(0,len(data_reason_for_nps_score)), [x for x in data_reason_for_nps_score.NPS], width = 0.6)
plt.xlabel("Reviews Count") 
plt.ylabel("NPS Score")  
plt.title("Reason Score Plot - NPS Distribution") 
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
fig.savefig('test2png.png', dpi=100)
plt.show()

In [None]:
reason_for_nps_score_reviews = data_reason_for_nps_score

In [None]:
reason_for_nps_score_reviews.isna().sum() # Counting the number of missing values

In [None]:
# Removing missing values
reason_for_nps_score_reviews = reason_for_nps_score_reviews.dropna()
reason_for_nps_score_reviews.isna().sum()

In [None]:
reason_for_nps_score_contractions = reason_for_nps_score_reviews[reason_for_nps_score_reviews['reviews'].str.contains(r'can\'t|won\'t|isn\'t|doesn\'t|didn\'t|don\'t|haven\'t|couldn\'t|you\'ll|I\'ll|I\'ve|I\'m|it\'ll|it\'s|could\'ve|would\'ve|hadn\'t|hasn\'t|should\'ve|shouldn\'t|must\'ve|who\'s|they\'re|they\'ll|they\'ve|that\'s|there\'s')].value_counts().sum()
reason_for_nps_score_contractions

In [None]:
# Converting reviews to lower case
reason_for_nps_score_lower = reason_for_nps_score_reviews[['reviews']]
reason_for_nps_score_reviews['reviews'] = reason_for_nps_score_lower.applymap(str.lower)
reason_for_nps_score_reviews

In [None]:
# Creating tokens
reason_for_nps_score_token_list = []

for each in reason_for_nps_score_reviews['reviews']:
    token = word_tokenize(each)
    reason_for_nps_score_token_list.append(token)
reason_for_nps_score_token_list

In [None]:
# Creating a new dataframe to show reviews and tokens side by side
reason_for_nps_score_reviews['tokens'] = reason_for_nps_score_token_list
reason_for_nps_score_reviews

In [None]:
# Finding token frequency and appending the result into a list
reason_for_nps_score_frequency_list = []

for each in reason_for_nps_score_token_list:
    fdist = FreqDist(each)
    reason_for_nps_score_frequency_list.append(fdist)

# Plotting 15 records of tokens groups according to their frequency
for each in range(15):
    reason_for_nps_score_frequency_list[each].plot(30,cumulative=False)
    plt.show()

In [None]:
# Displaying most common tokens
fdist.most_common(4)

In [None]:
# Converting each token to lower case and removing it if it is present in the stopwords list
reason_for_nps_score_tokens_without_stopwords = []
stopwords_set = set(stopwords.words('english'))
for token in reason_for_nps_score_reviews.tokens:
    word = [term.lower() for term in token if term.lower() not in stopwords_set]
    reason_for_nps_score_tokens_without_stopwords.append(word)
    
# Creating a new dataframe to show reviews, tokens, and stemmed tokens side by side
reason_for_nps_score_reviews['tokens_without_stopwords'] = reason_for_nps_score_tokens_without_stopwords
reason_for_nps_score_reviews

In [None]:
# Visualizing tokens a word cloud with a maximum limit of 100
feedbacks = " ".join(review for review in reason_for_nps_score_reviews.reviews)
reason_for_nps_score_wordcloud = WordCloud(stopwords=stopwords_set, collocations = False, background_color = 'black', width = 4000,height = 3000).generate(feedbacks)
fig=plt.figure(figsize=(15, 10))
plt.imshow(reason_for_nps_score_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title('Reason Score Wordcloud - Tokens')
plt.show()

In [None]:
# Lemmatizing removes any noise present in a word as they are reduced to their root context (it is better than stemming)
reason_for_nps_score_tokens_after_lemmatizing = []

lemmatizer = WordNetLemmatizer()

for each in reason_for_nps_score_tokens_without_stopwords:
    lem = [lemmatizer.lemmatize(term) for term in each]
    reason_for_nps_score_tokens_after_lemmatizing.append(lem)

# Creating a new dataframe to showing the new data side by side with the previous one
reason_for_nps_score_reviews['fresh_tokens'] = reason_for_nps_score_tokens_after_lemmatizing
reason_for_nps_score_reviews

In [None]:
reason_for_nps_score_dictionary = corpora.Dictionary(reason_for_nps_score_tokens_after_lemmatizing)
reason_for_nps_score_doc_term_matrix = [reason_for_nps_score_dictionary.doc2bow(rev) for rev in reason_for_nps_score_tokens_after_lemmatizing]

In [None]:
# Creating the object for LDA model using gensim library
LDA = gensim.models.ldamodel.LdaModel

# Build LDA model
lda_model = LDA(corpus=reason_for_nps_score_doc_term_matrix, id2word=reason_for_nps_score_dictionary, num_topics=10, random_state=100, chunksize=1000, passes=50,iterations=100)

In [None]:
lda_model.print_topics()

In [None]:
reason_for_nps_score_topics_list = []

for index, topic in lda_model.show_topics(formatted=False, num_words= 2):
    reason_for_nps_score_topics_list.append([w[0] for w in topic])

reason_for_nps_score_topics = pd.DataFrame({'topics':reason_for_nps_score_topics_list})
reason_for_nps_score_topics

In [None]:
top_words_per_topic = []
for t in range(lda_model.num_topics):
    top_words_per_topic.extend([(t, ) + x for x in lda_model.show_topic(t, topn = 3)])

reason_for_nps_score_topics_words_based = pd.DataFrame(top_words_per_topic, columns=['topic_number', 'word', 'coherence_score'])
reason_for_nps_score_topics_words_based

In [None]:
topic_group_index = reason_for_nps_score_topics_words_based.groupby(['topic_number'])['coherence_score'].transform(max) == reason_for_nps_score_topics_words_based['coherence_score']
topic_category = reason_for_nps_score_topics_words_based[topic_group_index]
topic_category = topic_category.rename({'word': 'topic'}, axis=1)
topic_category.reset_index(drop=True)
topic_category

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda_model, reason_for_nps_score_doc_term_matrix, reason_for_nps_score_dictionary)

 

In [None]:
# Computing perplexity score and coherence score
coherence_model_lda = CoherenceModel(model=lda_model, texts=reason_for_nps_score_tokens_after_lemmatizing, dictionary=reason_for_nps_score_dictionary , coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nPerplexity: ', lda_model.log_perplexity(reason_for_nps_score_doc_term_matrix,total_docs=10000))  # a measure of how good the model is. lower the better.
print('\nCoherence Score: ', coherence_lda)

In [None]:
# Using pos tagging to figure out whether a token is verb, noun, etc
reason_for_nps_score_pos_tagged = [pos_tag(tokens) for tokens in reason_for_nps_score_tokens_after_lemmatizing] 
reason_for_nps_score_pos_tagged

In [None]:
# Removing any unneccesary words from the tokens, reducing them to their root context
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

In [None]:
reason_for_nps_score_reviews['feedback'] = reason_for_nps_score_reviews['reviews']
reason_for_nps_score_reviews['feedback'] = reason_for_nps_score_reviews['feedback'].apply(lambda v: nltk.pos_tag(nltk.word_tokenize(v)))

reason_for_nps_score_reviews['lemma'] = reason_for_nps_score_reviews['feedback'].transform(lambda value: ' '.join([lemmatizer.lemmatize(a[0],pos=get_wordnet_pos(a[1])) if get_wordnet_pos(a[1]) else a[0] for a in value]))
reason_for_nps_score_reviews

In [None]:
# Finding polarity score for each feedback
# It will show the polarity scores (positive, negative, neutral, and compound) for each feedback along with the feedback received
# Polarity scores will then be inserted into a list which will be accessed later to compute sentiment scores
reason_for_nps_score_polarity_score_list = []

for each in reason_for_nps_score_reviews.reviews:
    polarity_score = sia().polarity_scores(each)
    polarity_score['label'] = each
    reason_for_nps_score_polarity_score_list.append(polarity_score)
    print(polarity_score)

In [None]:
# Creating a new dataframe to showing the new data side by side with the previous one
# We are only interested in the compunt score (aggregated score)
reason_for_nps_score_reviews['polarity_score'] = pd.DataFrame(reason_for_nps_score_polarity_score_list)['compound']
reason_for_nps_score_reviews['positive_score'] = pd.DataFrame(reason_for_nps_score_polarity_score_list)['pos']
reason_for_nps_score_reviews['negative_score'] = pd.DataFrame(reason_for_nps_score_polarity_score_list)['neu']

reason_for_nps_score_reviews[['polarity_score']] = reason_for_nps_score_reviews[['polarity_score']].fillna(0)
reason_for_nps_score_reviews[['positive_score']] = reason_for_nps_score_reviews[['positive_score']].fillna(0)
reason_for_nps_score_reviews[['negative_score']] = reason_for_nps_score_reviews[['negative_score']].fillna(0)
reason_for_nps_score_reviews

In [None]:
# Creating a function to classify sentiment scores based on the polarity score
def sentiment_label(row):
    if row['polarity_score'] > 0 :
        sentiment_score = "Positive" # Positive sentiment
    if row['polarity_score'] < 0 :
        sentiment_score = "Negative" # Negative sentiment
    if row['polarity_score'] == 0:
        sentiment_score = "Neutral" # Neutral sentiment
    return sentiment_score

def sentiment(row):
    if row['polarity_score'] > 0 :
        sentiment_score = 1 # Positive sentiment
    if row['polarity_score'] < 0 :
        sentiment_score = -1 # Negative sentiment
    if row['polarity_score'] == 0:
        sentiment_score = 0 # Neutral sentiment
    return sentiment_score

In [None]:
# Creating a new column which will contain sentiment scores based on the polarity score of each feedback
reason_for_nps_score_reviews['sentiment'] = reason_for_nps_score_reviews.apply(sentiment, axis=1)
reason_for_nps_score_reviews['sentiment_label'] = reason_for_nps_score_reviews.apply(sentiment_label, axis=1)
reason_for_nps_score_reviews

In [None]:
# Fixing polarity scores and related labels
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.match('^(?=.*rude)|(?=.*worst)|(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*wasted)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*dismissed)|(?=.*upsetting)|(?=.*out of line)|(?=.*uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment'] = 2
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.match('^(?=.*rude)|(?=.*worst)|(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*wasted)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*dismissed)|(?=.*upsetting)|(?=.*out of line)|(?=.*uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment_label'] = 'Extreme'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']>0) & (reason_for_nps_score_reviews['sentiment_label']=='Extreme'), 'polarity_score']*=-1

reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'sentiment']=1
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'sentiment_label']='Positive'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['polarity_score']<0), 'polarity_score']*=-1

reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (reason_for_nps_score_reviews['NPS']>=5), 'sentiment'] = 1
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (reason_for_nps_score_reviews['NPS']>=5), 'sentiment_label'] = 'Positive'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']<0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['reviews'].str.contains('listen')) & (reason_for_nps_score_reviews['polarity_score']<0), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'sentiment']=1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment']==1), 'sentiment_label']='Positive'
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']<0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']==0) & (reason_for_nps_score_reviews['sentiment_label']=='Extreme'), 'polarity_score'] = 0
reason_for_nps_score_reviews

In [None]:
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*waited)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*ignored)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*waiting)|(?=.*talked too much)|(?=.*please)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*never)|(?=.*difficult)|(?=.*unable)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*was not)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no one)|(?=.*lacking)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*were not)|(?=.*not very)|(?=.*not answered)|(?=.*no answer)|(?=.*conflict)|(?=.*didnt treat)|(?=.*didn t treat)|(?=.*did not treat)|(?=.*lack)|(?=.*lacking)|(?=.*lacked)|(?=.*need to)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*far away)') & (reason_for_nps_score_reviews['NPS']<=4), 'sentiment'] = -1
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*waited)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*ignored)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*waiting)|(?=.*talked too much)|(?=.*please)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*never)|(?=.*difficult)|(?=.*unable)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*was not)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no one)|(?=.*lacking)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*were not)|(?=.*not very)|(?=.*not answered)|(?=.*no answer)|(?=.*conflict)|(?=.*didnt treat)|(?=.*didn t treat)|(?=.*did not treat)|(?=.*lack)|(?=.*lacking)|(?=.*lacked)|(?=.*need to)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*far away)') & (reason_for_nps_score_reviews['NPS']<=4), 'sentiment_label'] = 'Negative'
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*waited)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*ignored)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*waiting)|(?=.*talked too much)|(?=.*please)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*never)|(?=.*difficult)|(?=.*unable)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*was not)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no one)|(?=.*lacking)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*were not)|(?=.*not very)|(?=.*not answered)|(?=.*no answer)|(?=.*conflict)|(?=.*didnt treat)|(?=.*didn t treat)|(?=.*did not treat)|(?=.*lack)|(?=.*lacking)|(?=.*lacked)|(?=.*need to)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*far away)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*too long)|(?=.*forgot)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment'] = -1
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*waited)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*ignored)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*waiting)|(?=.*talked too much)|(?=.*please)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*never)|(?=.*difficult)|(?=.*unable)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*was not)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no one)|(?=.*lacking)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*were not)|(?=.*not very)|(?=.*not answered)|(?=.*no answer)|(?=.*conflict)|(?=.*didnt treat)|(?=.*didn t treat)|(?=.*did not treat)|(?=.*lack)|(?=.*lacking)|(?=.*lacked)|(?=.*need to)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*far away)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*too long)|(?=.*forgot)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment_label'] = 'Negative'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']>0) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'polarity_score']*=-1

In [None]:
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)'), 'sentiment'] = 1
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)'), 'sentiment_label'] = 'Positive'
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('^(?=convenient)') & (reason_for_nps_score_reviews['NPS']==6), 'sentiment_label'] = 'Positive'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']<0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1

In [None]:
#reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('courteous'), 'sentiment'] = 1
#reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['reviews'].str.contains('') , 'sentiment_label'] = 'Positive'
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']>0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1

In [None]:
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=9) & (reason_for_nps_score_reviews['NPS']<=10), 'nps_label'] = 'Promoter'
reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['NPS']<=8), 'nps_label'] = 'Passive'
reason_for_nps_score_reviews.loc[reason_for_nps_score_reviews['NPS']<=6, 'nps_label'] = 'Detractor'

In [None]:
reason_for_nps_score_reviews['nps_label'].value_counts()

In [None]:
reason_for_nps_score_reviews['sentiment_label'].value_counts()

In [None]:
reason_for_nps_score_reviews

In [None]:
reason_for_nps_score_complete_data = pd.DataFrame()
reason_for_nps_score_complete_data['ID'] = reason_for_nps_score_reviews['ID']
reason_for_nps_score_complete_data['reviews'] = reason_for_nps_score_reviews['reviews']
reason_for_nps_score_complete_data['date'] = reason_for_nps_score_reviews['date']
reason_for_nps_score_complete_data['nps_score'] = reason_for_nps_score_reviews['NPS']
reason_for_nps_score_complete_data['clinic'] = reason_for_nps_score_reviews['clinic']
reason_for_nps_score_complete_data['city'] = reason_for_nps_score_reviews['city']
reason_for_nps_score_complete_data['state'] = reason_for_nps_score_reviews['state']
reason_for_nps_score_complete_data['polarity_score'] = reason_for_nps_score_reviews['polarity_score']
reason_for_nps_score_complete_data['label'] = reason_for_nps_score_reviews['sentiment_label']
reason_for_nps_score_complete_data['nps_label'] = reason_for_nps_score_reviews['nps_label']
reason_for_nps_score_complete_data.to_csv("reason_for_nps_score_complete_data.csv")
reason_for_nps_score_complete_data

In [None]:
reason_for_nps_score_sentiment_count = reason_for_nps_score_reviews[['sentiment']].value_counts()
reason_for_nps_score_sentiment_count

In [None]:
reason_for_nps_score_reviews.to_csv("reason_for_nps_score_reviews.csv")

In [None]:
reason_for_nps_score_crosstab = pd.crosstab(reason_for_nps_score_reviews.sentiment_label, reason_for_nps_score_reviews.nps_label, normalize='index')
#reason_for_nps_score_crosstab.to_csv("reason_for_nps_score_crosstab.csv")
reason_for_nps_score_crosstab

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(reason_for_nps_score_reviews, x="sentiment_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis')
figure.show()

In [None]:
# Creating visualization using sns for exploratory analysis
sns.pairplot(reason_for_nps_score_reviews, hue = 'sentiment_label')

In [None]:
reason_for_nps_score_positive = reason_for_nps_score_reviews[reason_for_nps_score_reviews['sentiment_label'].str.contains(r'Positive')]
reason_for_nps_score_positive

In [None]:
reason_for_nps_score_positive_clinics = reason_for_nps_score_positive.sort_values(by="NPS", ascending=False)
reason_for_nps_score_positive_clinics = reason_for_nps_score_positive_clinics[reason_for_nps_score_positive_clinics['reviews'].str.contains(r'clinic|Clinic|CLINIC')]
reason_for_nps_score_positive_clinics

In [None]:
city = reason_for_nps_score_positive_clinics.set_index('clinic').to_dict()['city']
state = reason_for_nps_score_positive_clinics.set_index('clinic').to_dict()['state']
top_clinics = pd.DataFrame(reason_for_nps_score_positive_clinics.clinic.value_counts())
top_clinics = top_clinics.reset_index()
top_clinics.columns = ['clinic', 'mention']
top_clinics['city'] = top_clinics['clinic'].apply(lambda x: city.get(x))
top_clinics['state'] = top_clinics['clinic'].apply(lambda x: state.get(x))
top_clinics

In [None]:
top_clinics_json = json.dumps([{"clinic": clinic, "city": city, "state": state, "mention": mention} for clinic,city,state,mention in zip(top_clinics['clinic'], top_clinics['city'], top_clinics['state'], top_clinics['mention'])])
top_clinics_json

In [None]:
with open("top_clinics.json", "w") as top_clinics_file:
    top_clinics_file.write(top_clinics_json)
top_clinics_file.close()

In [None]:
reason_for_nps_score_positive_doctors = reason_for_nps_score_positive.sort_values(by="NPS", ascending=False)
reason_for_nps_score_positive_doctors = reason_for_nps_score_positive_doctors[reason_for_nps_score_positive_doctors['reviews'].str.contains(r'doctor|dr')]
reason_for_nps_score_positive_doctors
for i in reason_for_nps_score_positive_doctors.reviews:
    print(i)

In [None]:
reason_for_nps_score_negative = reason_for_nps_score_reviews[reason_for_nps_score_reviews['sentiment_label'].str.contains(r'Negative')]
reason_for_nps_score_negative

In [None]:
reason_for_nps_score_neutral = reason_for_nps_score_reviews[reason_for_nps_score_reviews['sentiment_label'].str.contains(r'Neutral')]
reason_for_nps_score_neutral

In [None]:
reason_for_nps_score_extreme = reason_for_nps_score_reviews[reason_for_nps_score_reviews['sentiment_label'].str.contains(r'Extreme')]
reason_for_nps_score_extreme

In [None]:
reason_for_nps_score_promoters = reason_for_nps_score_reviews[reason_for_nps_score_reviews['nps_label'].str.contains(r'Promoter')]
reason_for_nps_score_promoters.head(10)

In [None]:
len(reason_for_nps_score_promoters)

In [None]:
reason_for_nps_score_passive = reason_for_nps_score_reviews[reason_for_nps_score_reviews['nps_label'].str.contains(r'Passive')]
reason_for_nps_score_passive.head(10)

In [None]:
reason_for_nps_score_detractors = reason_for_nps_score_reviews[reason_for_nps_score_reviews['nps_label'].str.contains(r'Detractor')]
reason_for_nps_score_detractors.head(10)

In [None]:
reason_for_nps_score_reviews.to_csv("reason_for_nps_score_reviews.csv")

In [None]:
c_vec = CountVectorizer(stop_words=stopwords_set, ngram_range=(1,1))
# matrix of ngrams
ngrams = c_vec.fit_transform(reason_for_nps_score_reviews['reviews'])
# count frequency of ngrams
count_values = ngrams.toarray().sum(axis=0)
# list of ngrams
vocab = c_vec.vocabulary_
reason_for_nps_score_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
reason_for_nps_score_grams['question_type'] = "reason score"
reason_for_nps_score_grams.to_csv("reason_for_nps_score_frequency.csv", encoding='utf-8', index=False)
reason_for_nps_score_grams

In [None]:
ngrams = c_vec.fit_transform(reason_for_nps_score_positive['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
reason_for_nps_score_positive_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
reason_for_nps_score_positive_grams.to_csv("reason_for_nps_score_positive_frequency.csv")
reason_for_nps_score_positive_grams

In [None]:
ngrams = c_vec.fit_transform(reason_for_nps_score_negative['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
reason_for_nps_score_negative_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
reason_for_nps_score_negative_grams.to_csv("reason_for_nps_score_negative_frequency.csv")
reason_for_nps_score_negative_grams

In [None]:
ngrams = c_vec.fit_transform(reason_for_nps_score_neutral['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
reason_for_nps_score_neutral_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
reason_for_nps_score_neutral_grams.to_csv("reason_for_nps_score_neutral_frequency.csv")
reason_for_nps_score_neutral_grams

In [None]:
reason_for_nps_score_grams_json = json.dumps([{"topic": topic, "frequency": frequency} for frequency,topic in zip(reason_for_nps_score_grams['frequency'], reason_for_nps_score_grams['word_group'])])
reason_for_nps_score_grams_json

In [None]:
with open("reason_for_nps_score_frequency.json", "w") as reason_for_nps_score_topics_file:
    reason_for_nps_score_topics_file.write(reason_for_nps_score_grams_json)
reason_for_nps_score_topics_file.close()

In [None]:
reason_for_nps_score_positive_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(reason_for_nps_score_positive_grams['frequency'], reason_for_nps_score_positive_grams['word_group'])])
reason_for_nps_score_positive_grams_json

In [None]:
with open("reason_for_nps_score_positive_frequency.json", "w") as reason_for_nps_score_positive_topics_file:
    reason_for_nps_score_positive_topics_file.write(reason_for_nps_score_positive_grams_json)
reason_for_nps_score_positive_topics_file.close()

In [None]:
reason_for_nps_score_negative_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(reason_for_nps_score_negative_grams['frequency'], reason_for_nps_score_negative_grams['word_group'])])
reason_for_nps_score_negative_grams_json

In [None]:
with open("reason_for_nps_score_negative_frequency.json", "w") as reason_for_nps_score_negative_topics_file:
    reason_for_nps_score_negative_topics_file.write(reason_for_nps_score_negative_grams_json)
reason_for_nps_score_negative_topics_file.close()

In [None]:
reason_for_nps_score_neutral_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(reason_for_nps_score_neutral_grams['frequency'], reason_for_nps_score_neutral_grams['word_group'])])
reason_for_nps_score_neutral_grams_json

In [None]:
with open("reason_for_nps_score_neutral_frequency.json", "w") as reason_for_nps_score_neutral_topics_file:
    reason_for_nps_score_neutral_topics_file.write(reason_for_nps_score_neutral_grams_json)
reason_for_nps_score_neutral_topics_file.close()

In [None]:
reason_for_nps_score_top_positive = reason_for_nps_score_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=False).head(5)
reason_for_nps_score_top_positive

In [None]:
reason_for_nps_score_extreme_data = reason_for_nps_score_reviews[reason_for_nps_score_reviews['sentiment_label'].str.contains(r'Extreme')]
reason_for_nps_score_extreme_data['sentiment_label'].value_counts()

In [None]:
reason_for_nps_score_top_negative = reason_for_nps_score_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=True).head(5)
reason_for_nps_score_top_negative

In [None]:
reason_for_nps_score_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(reason_for_nps_score_reviews['ID'], reason_for_nps_score_reviews['reviews'], reason_for_nps_score_reviews['sentiment_label'], reason_for_nps_score_reviews['polarity_score'], reason_for_nps_score_reviews['NPS'], reason_for_nps_score_reviews['nps_label'], reason_for_nps_score_reviews['date'], reason_for_nps_score_reviews['clinic'], reason_for_nps_score_reviews['city'], reason_for_nps_score_reviews['state'])])
reason_for_nps_score_json

In [None]:
with open("reason_for_nps_score_reviews.json", "w") as reason_for_nps_score_json_file:
    reason_for_nps_score_json_file.write(reason_for_nps_score_json)
reason_for_nps_score_json_file.close()

In [None]:
reason_for_nps_score_positive_length = len(reason_for_nps_score_positive)
reason_for_nps_score_negative_length = len(reason_for_nps_score_negative)
reason_for_nps_score_neutral_length = len(reason_for_nps_score_neutral)
reason_for_nps_score_extreme_length = len(reason_for_nps_score_extreme)
reason_for_nps_score_total_promoters = int(len(reason_for_nps_score_promoters))
reason_for_nps_score_total_passive = int(len(reason_for_nps_score_passive))
reason_for_nps_score_total_detractors = int(len(reason_for_nps_score_detractors))
reason_for_nps_score_total_length = len(reason_for_nps_score_reviews)

reason_for_nps_score_json_total = json.dumps([{"total_positive": reason_for_nps_score_positive_length, "total_negative": reason_for_nps_score_negative_length, "total_neutral": reason_for_nps_score_neutral_length, "total_extreme": reason_for_nps_score_extreme_length, "total_promoters": reason_for_nps_score_total_promoters, "total_passive": reason_for_nps_score_total_passive, "total_detractors": reason_for_nps_score_total_detractors, "total_comments": reason_for_nps_score_total_length}])
reason_for_nps_score_json_total

In [None]:
with open("reason_for_nps_score_total_data.json", "w") as reason_for_nps_score_total_file:
    reason_for_nps_score_total_file.write(reason_for_nps_score_json_total)
reason_for_nps_score_total_file.close()

In [None]:
reason_for_nps_score_top_five_positive_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(reason_for_nps_score_top_positive['reviews'], reason_for_nps_score_top_positive['sentiment_label'], reason_for_nps_score_top_positive['polarity_score'])])
reason_for_nps_score_top_five_positive_json

In [None]:
reason_for_nps_score_top_five_negative_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(reason_for_nps_score_top_negative['reviews'], reason_for_nps_score_top_negative['sentiment_label'], reason_for_nps_score_top_negative['polarity_score'])])
reason_for_nps_score_top_five_negative_json

In [None]:
with open("reason_for_nps_score_top_five_positive.json", "w") as reason_for_nps_score_top_positive_file:
    reason_for_nps_score_top_positive_file.write(reason_for_nps_score_top_five_positive_json)
reason_for_nps_score_top_positive_file.close()
with open("reason_for_nps_score_top_five_negative.json", "w") as reason_for_nps_score_top_negative_file:
    reason_for_nps_score_top_negative_file.write(reason_for_nps_score_top_five_negative_json)
reason_for_nps_score_top_negative_file.close()

In [None]:
reason_for_nps_score_positive_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(reason_for_nps_score_positive['ID'], reason_for_nps_score_positive['reviews'], reason_for_nps_score_positive['sentiment_label'], reason_for_nps_score_positive['polarity_score'], reason_for_nps_score_positive['NPS'], reason_for_nps_score_positive['nps_label'], reason_for_nps_score_positive['date'], reason_for_nps_score_positive['clinic'], reason_for_nps_score_positive['city'], reason_for_nps_score_positive['state'])])
reason_for_nps_score_positive_json

In [None]:
with open("reason_for_nps_score_positive.json", "w") as reason_for_nps_score_positive_file:
    reason_for_nps_score_positive_file.write(reason_for_nps_score_positive_json)
reason_for_nps_score_positive_file.close()

In [None]:
reason_for_nps_score_negative_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(reason_for_nps_score_negative['ID'], reason_for_nps_score_negative['reviews'], reason_for_nps_score_negative['sentiment_label'], reason_for_nps_score_negative['polarity_score'], reason_for_nps_score_negative['NPS'], reason_for_nps_score_negative['nps_label'], reason_for_nps_score_negative['date'], reason_for_nps_score_negative['clinic'], reason_for_nps_score_negative['city'], reason_for_nps_score_negative['state'])])
reason_for_nps_score_negative_json

In [None]:
with open("reason_for_nps_score_negative.json", "w") as reason_for_nps_score_negative_file:
    reason_for_nps_score_negative_file.write(reason_for_nps_score_negative_json)
reason_for_nps_score_negative_file.close()

In [None]:
reason_for_nps_score_neutral_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(reason_for_nps_score_neutral['ID'], reason_for_nps_score_neutral['reviews'], reason_for_nps_score_neutral['sentiment_label'], reason_for_nps_score_neutral['polarity_score'], reason_for_nps_score_neutral['NPS'], reason_for_nps_score_neutral['nps_label'], reason_for_nps_score_neutral['date'], reason_for_nps_score_neutral['clinic'], reason_for_nps_score_neutral['city'], reason_for_nps_score_neutral['state'])])
reason_for_nps_score_neutral_json

In [None]:
with open("reason_for_nps_score_neutral.json", "w") as reason_for_nps_score_neutral_file:
    reason_for_nps_score_neutral_file.write(reason_for_nps_score_neutral_json)
reason_for_nps_score_neutral_file.close()

In [None]:
reason_for_nps_score_extreme_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(reason_for_nps_score_extreme['ID'], reason_for_nps_score_extreme['reviews'], reason_for_nps_score_extreme['sentiment_label'], reason_for_nps_score_extreme['polarity_score'], reason_for_nps_score_extreme['NPS'], reason_for_nps_score_extreme['nps_label'], reason_for_nps_score_extreme['date'], reason_for_nps_score_extreme['clinic'], reason_for_nps_score_extreme['city'], reason_for_nps_score_extreme['state'])])
reason_for_nps_score_extreme_json

In [None]:
with open("reason_for_nps_score_extreme.json", "w") as reason_for_nps_score_extreme_file:
    reason_for_nps_score_extreme_file.write(reason_for_nps_score_extreme_json)
reason_for_nps_score_extreme_file.close()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(reason_for_nps_score_reviews, x="sentiment_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis')
figure.show()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(reason_for_nps_score_reviews, x="nps_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis - NPS Label')
figure.show()

### 2.4 Extracting Positive Reviews

In [None]:
data_satisfied = data[['ID','MEMBER_ID','POSITIVEREVIEWS','NPS','SURVEYDATE','SURVEYCLINIC','CLINICCITY','CLINICSTATE']]
data_satisfied = data_satisfied[data_satisfied['POSITIVEREVIEWS'] != 'No Response']
data_satisfied.rename({'POSITIVEREVIEWS': 'reviews'}, axis=1, inplace=True)
data_satisfied.rename({'SURVEYDATE': 'date'}, axis=1, inplace=True)
data_satisfied.rename({'SURVEYCLINIC': 'clinic'}, axis=1, inplace=True)
data_satisfied.rename({'CLINICCITY': 'city'}, axis=1, inplace=True)
data_satisfied.rename({'CLINICSTATE': 'state'}, axis=1, inplace=True)
remove_punctuations_satisfied = data_satisfied[['reviews']]
remove_punctuations_satisfied.replace("[^a-zA-Z]"," ",regex=True, inplace=True)
data_satisfied[['reviews']] = remove_punctuations_satisfied[['reviews']]

data_satisfied = data_satisfied.reset_index(drop=True)
data_satisfied

In [None]:
satisfied_reviews = data_satisfied

In [None]:
satisfied_reviews.isna().sum() # Counting the number of missing values

In [None]:
# Removing missing values
satisfied_reviews = satisfied_reviews.dropna()
satisfied_reviews.isna().sum()

In [None]:
satisfied_contractions = satisfied_reviews[satisfied_reviews['reviews'].str.contains(r'can\'t|won\'t|isn\'t|doesn\'t|didn\'t|don\'t|haven\'t|couldn\'t|you\'ll|I\'ll|I\'ve|I\'m|it\'ll|it\'s|could\'ve|would\'ve|hadn\'t|hasn\'t|should\'ve|shouldn\'t|must\'ve|who\'s|they\'re|they\'ll|they\'ve|that\'s|there\'s')].value_counts().sum()
satisfied_contractions

In [None]:
# Converting reviews to lower case
satisfied_lower = satisfied_reviews[['reviews']]
satisfied_reviews['reviews'] = satisfied_lower.applymap(str.lower)
satisfied_reviews

In [None]:
# Creating tokens
satisfied_token_list = []

for each in satisfied_reviews['reviews']:
    token = word_tokenize(each)
    satisfied_token_list.append(token)
satisfied_token_list

In [None]:
# Creating a new dataframe to show reviews and tokens side by side
satisfied_reviews['tokens'] = satisfied_token_list
satisfied_reviews

In [None]:
# Finding token frequency and appending the result into a list
satisfied_frequency_list = []

for each in satisfied_token_list:
    fdist = FreqDist(each)
    satisfied_frequency_list.append(fdist)

# Plotting 15 records of tokens groups according to their frequency
for each in range(15):
    satisfied_frequency_list[each].plot(30,cumulative=False)
    plt.show()

In [None]:
# Displaying most common tokens
fdist.most_common(4)

In [None]:
# Converting each token to lower case and removing it if it is present in the stopwords list
satisfied_tokens_without_stopwords = []
stopwords_set = set(stopwords.words('english'))
for token in satisfied_reviews.tokens:
    word = [term.lower() for term in token if term.lower() not in stopwords_set]
    satisfied_tokens_without_stopwords.append(word)
    
# Creating a new dataframe to show reviews, tokens, and stemmed tokens side by side
satisfied_reviews['tokens_without_stopwords'] = satisfied_tokens_without_stopwords
satisfied_reviews

In [None]:
# Visualizing tokens a word cloud with a maximum limit of 100
feedbacks = " ".join(review for review in satisfied_reviews.reviews)
satisfied_wordcloud = WordCloud(stopwords=stopwords_set, collocations = False, background_color = 'white', width = 4000,height = 3000).generate(feedbacks)
fig=plt.figure(figsize=(15, 10))
plt.imshow(satisfied_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title('What Went Well Wordcloud - Tokens')
plt.show()

In [None]:
# Lemmatizing removes any noise present in a word as they are reduced to their root context (it is better than stemming)
satisfied_tokens_after_lemmatizing = []

lemmatizer = WordNetLemmatizer()

for each in satisfied_tokens_without_stopwords:
    lem = [lemmatizer.lemmatize(term) for term in each]
    satisfied_tokens_after_lemmatizing.append(lem)

# Creating a new dataframe to showing the new data side by side with the previous one
satisfied_reviews['fresh_tokens'] = satisfied_tokens_after_lemmatizing
satisfied_reviews

In [None]:
# Using pos tagging to figure out whether a token is verb, noun, etc
satisfied_pos_tagged = [pos_tag(tokens) for tokens in satisfied_tokens_after_lemmatizing] 
satisfied_pos_tagged

In [None]:
satisfied_reviews['feedback'] = satisfied_reviews['reviews']
satisfied_reviews['feedback'] = satisfied_reviews['feedback'].apply(lambda v: nltk.pos_tag(nltk.word_tokenize(v)))

satisfied_reviews['lemma'] = satisfied_reviews['feedback'].transform(lambda value: ' '.join([lemmatizer.lemmatize(a[0],pos=get_wordnet_pos(a[1])) if get_wordnet_pos(a[1]) else a[0] for a in value]))
satisfied_reviews

In [None]:
# Finding polarity score for each feedback
# It will show the polarity scores (positive, negative, neutral, and compound) for each feedback along with the feedback received
# Polarity scores will then be inserted into a list which will be accessed later to compute sentiment scores
satisfied_polarity_score_list = []

for each in satisfied_reviews.reviews:
    polarity_score = sia().polarity_scores(each)
    polarity_score['label'] = each
    satisfied_polarity_score_list.append(polarity_score)
    print(polarity_score)

In [None]:
# Creating a new dataframe to showing the new data side by side with the previous one
# We are only interested in the compunt score (aggregated score)
satisfied_reviews['polarity_score'] = pd.DataFrame(satisfied_polarity_score_list)['compound']
satisfied_reviews['positive_score'] = pd.DataFrame(satisfied_polarity_score_list)['pos']
satisfied_reviews['negative_score'] = pd.DataFrame(satisfied_polarity_score_list)['neu']

satisfied_reviews[['polarity_score']] = satisfied_reviews[['polarity_score']].fillna(0)
satisfied_reviews[['positive_score']] = satisfied_reviews[['positive_score']].fillna(0)
satisfied_reviews[['negative_score']] = satisfied_reviews[['negative_score']].fillna(0)
satisfied_reviews

In [None]:
# Creating a new column which will contain sentiment scores based on the polarity score of each feedback
satisfied_reviews['sentiment'] = satisfied_reviews.apply(sentiment, axis=1)
satisfied_reviews['sentiment_label'] = satisfied_reviews.apply(sentiment_label, axis=1)
satisfied_reviews.head(10)

In [None]:
satisfied_reviews.loc[satisfied_reviews['reviews'].str.match('^(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*was horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*upsetting)|(?=.*out of line)|(?=.*i felt uncomfortable)|(?=.*made me feel uncomfortable)|(?=.*i feel uncomfortable)|(?=.*was uncomfortable)|(?=.*i feel uncomfortable)|(?=.*made me feel uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment'] = 2
satisfied_reviews.loc[satisfied_reviews['reviews'].str.match('^(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*was horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*upsetting)|(?=.*out of line)|(?=.*i felt uncomfortable)|(?=.*made me feel uncomfortable)|(?=.*i feel uncomfortable)|(?=.*was uncomfortable)|(?=.*i feel uncomfortable)|(?=.*made me feel uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment_label'] = 'Extreme'
satisfied_reviews.loc[(satisfied_reviews['polarity_score']>0) & (satisfied_reviews['sentiment_label']=='Extreme'), 'polarity_score']*=-1

satisfied_reviews.loc[(satisfied_reviews['NPS']>=7) & (satisfied_reviews['sentiment_label']=='Negative'), 'sentiment']=1
satisfied_reviews.loc[(satisfied_reviews['NPS']>=7) & (satisfied_reviews['sentiment_label']=='Negative'), 'sentiment_label']='Positive'
satisfied_reviews.loc[(satisfied_reviews['NPS']>=7) & (satisfied_reviews['polarity_score']<0), 'polarity_score']*=-1

satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (satisfied_reviews['NPS']>=5), 'sentiment'] = 1
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (satisfied_reviews['NPS']>=5), 'sentiment_label'] = 'Positive'
satisfied_reviews.loc[(satisfied_reviews['polarity_score']<0) & (satisfied_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['reviews'].str.contains('listen')) & (reason_for_nps_score_reviews['polarity_score']<0), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'sentiment']=1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment']==1), 'sentiment_label']='Positive'
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']<0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']==0) & (reason_for_nps_score_reviews['sentiment_label']=='Extreme'), 'polarity_score'] = 0
satisfied_reviews

In [None]:
# Fixing polarity scores and related labels
#satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('not sure|don t know'), 'sentiment'] = 0
#satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('nothing') & (satisfied_reviews['NPS']<=4), 'sentiment'] = 0
#satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('not sure|don t know'), 'sentiment_label'] = 'Neutral'
#satisfied_reviews.loc[(satisfied_reviews['sentiment_label']=='Neutral') & (satisfied_reviews['sentiment']==0), 'polarity_score']=0
#satisfied_reviews

In [None]:
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)') & (satisfied_reviews['NPS']<=4), 'sentiment'] = -1
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)') & (satisfied_reviews['NPS']<=4), 'sentiment_label'] = 'Negative'
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment'] = -1
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment_label'] = 'Negative'
satisfied_reviews.loc[(satisfied_reviews['polarity_score']>0) & (satisfied_reviews['sentiment_label']=='Negative'), 'polarity_score']*=-1

In [None]:
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)|(?=.*is wonderful)|(?=.*is great)|(?=.*got one  same day)|(?=.*nothing was rushed)|(?=.*got me in and out)|(?=.*very caring)|(?=.*was awesome)|(?=.*received a very in depth review)|(?=.*quick in and out)|(?=.*in a timely manner)|(?=.*very warm   friendly)|(?=.*courteous and professional)|(?=.*was very courteous)|(?=.*always so great)|(?=.*was awesome)|(?=.*seen by doctor and have)|(?=.*things were on time)|(?=.*established care  listened to my issues)|(?=.*clearly reviewed my information)|(?=.*everything was done well)|(?=.*i didn t even wait)|(?=.*was so friendly)|(?=.*took your time to take care)|(?=.*which is great)|(?=.*they are so nice)|(?=.*doctor discussed my concerns)|(?=.*staff was friendly)|(?=.*they make me feel welcome)|(?=.*was friendly)|(?=.*greeted me warmly)|(?=.*provider spent time asking questions)|(?=.*spoke about all of my concerns)|(?=.*no long waiting times)|(?=.*very personable)|(?=.*so smooth)|(?=.*down to earth)|(?=.*spent a lot of time with me)|(?=.*were so attentive)|(?=.*was caring)|(?=.*was gentle)|(?=.*was incredibly caring)|(?=.*was treated great)|(?=.*i m comfortable)|(?=.* helped me to understand)|(?=.*does a great job)|(?=.*is so kind)|(?=.*made me feel comfortable)|(?=.*i was being listened)|(?=.*i was greeted)|(?=.*when she took blood it was fast)|(?=.*didn t get pushy)|(?=.*gave me an accurate diagnosis)|(?=.*explain things that i didn t understand)|(?=.*is professional)|(?=.*willing to take the time with you)|(?=.*nice staff and everyone listened)|(?=.*took the time to answer)|(?=.*is friendly and listens)|(?=.*were able to get me in quickly)|(?=.*was fabulous)|(?=.*took the time to make me comfortable)|(?=.*she listened to)|(?=.*they were so caring)|(?=.*they were thorough)|(?=.*never felt that i was rushed)|(?=.*was extremely personable)|(?=.*is very patient and caring)|(?=.*is awesome)'), 'sentiment'] = 1
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)|(?=.*is wonderful)|(?=.*is great)|(?=.*got one  same day)|(?=.*nothing was rushed)|(?=.*got me in and out)|(?=.*very caring)|(?=.*was awesome)|(?=.*received a very in depth review)|(?=.*quick in and out)|(?=.*in a timely manner)|(?=.*very warm   friendly)|(?=.*courteous and professional)|(?=.*was very courteous)|(?=.*always so great)|(?=.*was awesome)|(?=.*seen by doctor and have)|(?=.*things were on time)|(?=.*established care  listened to my issues)|(?=.*clearly reviewed my information)|(?=.*everything was done well)|(?=.*i didn t even wait)|(?=.*was so friendly)|(?=.*took your time to take care)|(?=.*which is great)|(?=.*they are so nice)|(?=.*doctor discussed my concerns)|(?=.*staff was friendly)|(?=.*they make me feel welcome)|(?=.*was friendly)|(?=.*greeted me warmly)|(?=.*provider spent time asking questions)|(?=.*spoke about all of my concerns)|(?=.*no long waiting times)|(?=.*very personable)|(?=.*so smooth)|(?=.*down to earth)|(?=.*spent a lot of time with me)|(?=.*were so attentive)|(?=.*was caring)|(?=.*was gentle)|(?=.*was incredibly caring)|(?=.*was treated great)|(?=.*i m comfortable)|(?=.* helped me to understand)|(?=.*does a great job)|(?=.*is so kind)|(?=.*made me feel comfortable)|(?=.*i was being listened)|(?=.*i was greeted)|(?=.*when she took blood it was fast)|(?=.*didn t get pushy)|(?=.*gave me an accurate diagnosis)|(?=.*explain things that i didn t understand)|(?=.*is professional)|(?=.*willing to take the time with you)|(?=.*nice staff and everyone listened)|(?=.*took the time to answer)|(?=.*is friendly and listens)|(?=.*were able to get me in quickly)|(?=.*was fabulous)|(?=.*took the time to make me comfortable)|(?=.*she listened to)|(?=.*they were so caring)|(?=.*they were thorough)|(?=.*never felt that i was rushed)|(?=.*was extremely personable)|(?=.*is very patient and caring)|(?=.*is awesome)'), 'sentiment_label'] = 'Positive'
satisfied_reviews.loc[satisfied_reviews['reviews'].str.contains('^(?=convenient)') & (satisfied_reviews['NPS']==6), 'sentiment_label'] = 'Positive'
satisfied_reviews.loc[(satisfied_reviews['polarity_score']<0) & (satisfied_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1

In [None]:
satisfied_reviews.loc[(satisfied_reviews['NPS']>=9) & (satisfied_reviews['NPS']<=10), 'nps_label'] = 'Promoter'
satisfied_reviews.loc[(satisfied_reviews['NPS']>=7) & (satisfied_reviews['NPS']<=8), 'nps_label'] = 'Passive'
satisfied_reviews.loc[satisfied_reviews['NPS']<=6, 'nps_label'] = 'Detractors'

In [None]:
satisfied_reviews['nps_label'].value_counts()

In [None]:
satisfied_complete_data = pd.DataFrame()
satisfied_complete_data['ID'] = satisfied_reviews['ID']
satisfied_complete_data['reviews'] = satisfied_reviews['reviews']
satisfied_complete_data['date'] = satisfied_reviews['date']
satisfied_complete_data['nps_score'] = satisfied_reviews['NPS']
satisfied_complete_data['clinic'] = satisfied_reviews['clinic']
satisfied_complete_data['city'] = satisfied_reviews['city']
satisfied_complete_data['state'] = satisfied_reviews['state']
satisfied_complete_data['polarity_score'] = satisfied_reviews['polarity_score']
satisfied_complete_data['label'] = satisfied_reviews['sentiment_label']
satisfied_complete_data['nps_label'] = satisfied_reviews['nps_label']
satisfied_complete_data.to_csv("satisfied_complete_data.csv")
satisfied_complete_data

In [None]:
satisfied_sentiment_count = satisfied_reviews[['sentiment']].value_counts()
satisfied_sentiment_count

In [None]:
satisfied_reviews.to_csv("satisfied_reviews.csv")

In [None]:
satisfied_crosstab = pd.crosstab(satisfied_reviews.sentiment_label, satisfied_reviews.nps_label)
satisfied_crosstab.to_csv("satisfied_crosstab.csv")
satisfied_crosstab

In [None]:
# Creating visualization using sns for exploratory analysis
sns.pairplot(satisfied_reviews, hue = 'sentiment_label')

In [None]:
satisfied_positive = satisfied_reviews[satisfied_reviews['sentiment_label'].str.contains(r'Positive')]
satisfied_positive

In [None]:
satisfied_negative = satisfied_reviews[satisfied_reviews['sentiment_label'].str.contains(r'Negative')]
satisfied_negative

In [None]:
satisfied_neutral = satisfied_reviews[satisfied_reviews['sentiment_label'].str.contains(r'Neutral')]
satisfied_neutral

In [None]:
satisfied_extreme = satisfied_reviews[satisfied_reviews['sentiment_label'].str.contains(r'Extreme')]
satisfied_extreme

In [None]:
satisfied_promoters = satisfied_reviews[satisfied_reviews['nps_label'].str.contains(r'Promoter')]
satisfied_promoters.head(10)

In [None]:
satisfied_passive = satisfied_reviews[satisfied_reviews['nps_label'].str.contains(r'Passive')]
satisfied_passive.head(10)

In [None]:
satisfied_detractors = satisfied_reviews[satisfied_reviews['nps_label'].str.contains(r'Detractor')]
satisfied_detractors.head(10)

In [None]:
satisfied_reviews.to_csv("satisfied_reviews.csv")

In [None]:
ngrams = c_vec.fit_transform(satisfied_reviews['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
satisfied_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
satisfied_grams['question_type'] = "what went well"
satisfied_grams.to_csv("satisfied_frequency.csv")
satisfied_grams

In [None]:
ngrams = c_vec.fit_transform(satisfied_positive['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
satisfied_positive_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
satisfied_positive_grams.to_csv("satisfied_positive_frequency.csv")
satisfied_positive_grams

In [None]:
ngrams = c_vec.fit_transform(satisfied_negative['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
satisfied_negative_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
satisfied_negative_grams.to_csv("satisfied_negative_frequency.csv")
satisfied_negative_grams

In [None]:
ngrams = c_vec.fit_transform(satisfied_neutral['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
satisfied_neutral_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
satisfied_neutral_grams.to_csv("satisfied_neutral_frequency.csv")
satisfied_neutral_grams

In [None]:
satisfied_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(satisfied_grams['frequency'], satisfied_grams['word_group'])])
satisfied_grams_json

In [None]:
with open("satisfied_frequency.json", "w") as satisfied_topics_file:
    satisfied_topics_file.write(satisfied_grams_json)
satisfied_topics_file.close()

In [None]:
satisfied_positive_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(satisfied_positive_grams['frequency'], satisfied_positive_grams['word_group'])])
satisfied_positive_grams_json

In [None]:
with open("satisfied_positive_frequency.json", "w") as satisfied_positive_topics_file:
    satisfied_positive_topics_file.write(satisfied_positive_grams_json)
satisfied_positive_topics_file.close()

In [None]:
satisfied_negative_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(satisfied_negative_grams['frequency'], satisfied_negative_grams['word_group'])])
satisfied_negative_grams_json

In [None]:
with open("satisfied_negative_frequency.json", "w") as satisfied_negative_topics_file:
    satisfied_negative_topics_file.write(satisfied_negative_grams_json)
satisfied_negative_topics_file.close()

In [None]:
satisfied_neutral_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(satisfied_neutral_grams['frequency'], satisfied_neutral_grams['word_group'])])
satisfied_neutral_grams_json

In [None]:
with open("satisfied_neutral_frequency.json", "w") as satisfied_neutral_topics_file:
    satisfied_neutral_topics_file.write(satisfied_neutral_grams_json)
satisfied_neutral_topics_file.close()

In [None]:
satisfied_top_positive = satisfied_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=False).head(5)
satisfied_top_positive

In [None]:
satisfied_top_negative = satisfied_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=True).head(5)
satisfied_top_negative

In [None]:
satisfied_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(satisfied_reviews['ID'], satisfied_reviews['reviews'], satisfied_reviews['sentiment_label'], satisfied_reviews['polarity_score'], satisfied_reviews['NPS'], satisfied_reviews['nps_label'], satisfied_reviews['date'], satisfied_reviews['clinic'], satisfied_reviews['city'], satisfied_reviews['state'])])
satisfied_json

In [None]:
with open("satisfied_reviews.json", "w") as satisfied_json_file:
    satisfied_json_file.write(satisfied_json)
satisfied_json_file.close()

In [None]:
satisfied_positive_length = len(satisfied_positive)
satisfied_negative_length = len(satisfied_negative)
satisfied_neutral_length = len(satisfied_neutral)
satisfied_extreme_length = len(satisfied_extreme)
satisfied_total_promoters = int(len(satisfied_promoters))
satisfied_total_passive = int(len(satisfied_passive))
satisfied_total_detractors = int(len(satisfied_detractors))
satisfied_total_length = len(satisfied_reviews)

satisfied_json_total = json.dumps([{"total_positive": satisfied_positive_length, "total_negative": satisfied_negative_length, "total_neutral": satisfied_neutral_length, "total_extreme": reason_for_nps_score_extreme_length, "total_promoters": satisfied_total_promoters, "total_passive": satisfied_total_passive, "total_detractors": satisfied_total_detractors, "total_comments": satisfied_total_length}])
satisfied_json_total

In [None]:
with open("satisfied_total_data.json", "w") as satisfied_total_file:
    satisfied_total_file.write(satisfied_json_total)
satisfied_total_file.close()

In [None]:
satisfied_top_five_positive_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(satisfied_top_positive['reviews'], satisfied_top_positive['sentiment_label'], satisfied_top_positive['polarity_score'])])
satisfied_top_five_positive_json

In [None]:
satisfied_top_five_negative_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(satisfied_top_negative['reviews'], satisfied_top_negative['sentiment_label'], satisfied_top_negative['polarity_score'])])
satisfied_top_five_negative_json

In [None]:
with open("satisfied_top_five_positive.json", "w") as satisfied_top_positive_file:
    satisfied_top_positive_file.write(satisfied_top_five_positive_json)
satisfied_top_positive_file.close()
with open("satisfied_top_five_negative.json", "w") as satisfied_top_negative_file:
    satisfied_top_negative_file.write(satisfied_top_five_negative_json)
satisfied_top_negative_file.close()

In [None]:
satisfied_positive_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(satisfied_positive['ID'], satisfied_positive['reviews'], satisfied_positive['sentiment_label'], satisfied_positive['polarity_score'], satisfied_positive['NPS'], satisfied_positive['nps_label'], satisfied_positive['date'], satisfied_positive['clinic'], satisfied_positive['city'], satisfied_positive['state'])])
satisfied_positive_json

In [None]:
with open("satisfied_positive.json", "w") as satisfied_positive_file:
    satisfied_positive_file.write(satisfied_positive_json)
satisfied_positive_file.close()

In [None]:
satisfied_negative_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(satisfied_negative['ID'], satisfied_negative['reviews'], satisfied_negative['sentiment_label'], satisfied_negative['polarity_score'], satisfied_negative['NPS'], satisfied_negative['nps_label'], satisfied_negative['date'], satisfied_negative['clinic'], satisfied_negative['city'], satisfied_negative['state'])])
satisfied_negative_json

In [None]:
with open("satisfied_negative.json", "w") as satisfied_negative_file:
    satisfied_negative_file.write(satisfied_negative_json)
satisfied_negative_file.close()

In [None]:
satisfied_neutral_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(satisfied_neutral['ID'], satisfied_neutral['reviews'], satisfied_neutral['sentiment_label'], satisfied_neutral['polarity_score'], satisfied_neutral['NPS'], satisfied_neutral['nps_label'], satisfied_neutral['date'], satisfied_neutral['clinic'], satisfied_neutral['city'], satisfied_neutral['state'])])
satisfied_neutral_json

In [None]:
with open("satisfied_neutral.json", "w") as satisfied_neutral_file:
    satisfied_neutral_file.write(satisfied_neutral_json)
satisfied_neutral_file.close()

In [None]:
satisfied_extreme_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(satisfied_extreme['ID'], satisfied_extreme['reviews'], satisfied_extreme['sentiment_label'], satisfied_extreme['polarity_score'], satisfied_extreme['NPS'], satisfied_extreme['nps_label'], satisfied_extreme['date'], satisfied_extreme['clinic'], satisfied_extreme['city'], satisfied_extreme['state'])])
satisfied_extreme_json

In [None]:
with open("satisfied_extreme.json", "w") as satisfied_extreme_file:
    satisfied_extreme_file.write(satisfied_extreme_json)
satisfied_extreme_file.close()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(satisfied_reviews, x="sentiment_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis')
figure.show()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(satisfied_reviews, x="nps_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis - NPS Label')
figure.show()

### 2.5 Extracting Negative Reviews

In [None]:
data_what_not_satisfied = data[['ID','MEMBER_ID','NEGATIVEREVIEWS','NPS','SURVEYDATE','SURVEYCLINIC','CLINICCITY','CLINICSTATE']]
data_what_not_satisfied = data_what_not_satisfied[data_what_not_satisfied['NEGATIVEREVIEWS'] != 'No Response']
data_what_not_satisfied.rename({'NEGATIVEREVIEWS': 'reviews'}, axis=1, inplace=True)
data_what_not_satisfied.rename({'SURVEYDATE': 'date'}, axis=1, inplace=True)
data_what_not_satisfied.rename({'SURVEYCLINIC': 'clinic'}, axis=1, inplace=True)
data_what_not_satisfied.rename({'CLINICCITY': 'city'}, axis=1, inplace=True)
data_what_not_satisfied.rename({'CLINICSTATE': 'state'}, axis=1, inplace=True)
remove_punctuations_dissatisfied = data_what_not_satisfied[['reviews']]
remove_punctuations_dissatisfied.replace("[^a-zA-Z]"," ",regex=True, inplace=True)
data_what_not_satisfied[['reviews']] = remove_punctuations_dissatisfied[['reviews']]

data_what_not_satisfied = data_what_not_satisfied.reset_index(drop=True)
data_what_not_satisfied

In [None]:
dissatisfied_reviews = data_what_not_satisfied

In [None]:
dissatisfied_reviews.isna().sum() # Counting the number of missing values

In [None]:
# Removing missing values
dissatisfied_reviews = dissatisfied_reviews.dropna()
dissatisfied_reviews.isna().sum()

In [None]:
dissatisfied_contractions = dissatisfied_reviews[dissatisfied_reviews['reviews'].str.contains(r'can\'t|won\'t|isn\'t|doesn\'t|didn\'t|don\'t|haven\'t|couldn\'t|you\'ll|I\'ll|I\'ve|I\'m|it\'ll|it\'s|could\'ve|would\'ve|hadn\'t|hasn\'t|should\'ve|shouldn\'t|must\'ve|who\'s|they\'re|they\'ll|they\'ve|that\'s|there\'s')].value_counts().sum()
dissatisfied_contractions

In [None]:
# Converting reviews to lower case
dissatisfied_lower = dissatisfied_reviews[['reviews']]
dissatisfied_reviews['reviews'] = dissatisfied_lower.applymap(str.lower)
dissatisfied_reviews

In [None]:
# Creating tokens
dissatisfied_token_list = []

for each in dissatisfied_reviews['reviews']:
    token = word_tokenize(each)
    dissatisfied_token_list.append(token)
dissatisfied_token_list

In [None]:
# Creating a new dataframe to show reviews and tokens side by side
dissatisfied_reviews['tokens'] = dissatisfied_token_list
dissatisfied_reviews

In [None]:
# Finding token frequency and appending the result into a list
dissatisfied_frequency_list = []

for each in dissatisfied_token_list:
    fdist = FreqDist(each)
    dissatisfied_frequency_list.append(fdist)

# Plotting 15 records of tokens groups according to their frequency
for each in range(15):
    dissatisfied_frequency_list[each].plot(30,cumulative=False)
    plt.show()
    

In [None]:
# Displaying most common tokens
fdist.most_common(4)

In [None]:
# Converting each token to lower case and removing it if it is present in the stopwords list
dissatisfied_tokens_without_stopwords = []
stopwords_set = set(stopwords.words('english'))
for token in dissatisfied_reviews.tokens:
    word = [term.lower() for term in token if term.lower() not in stopwords_set]
    dissatisfied_tokens_without_stopwords.append(word)
    
# Creating a new dataframe to show reviews, tokens, and stemmed tokens side by side
dissatisfied_reviews['tokens_without_stopwords'] = dissatisfied_tokens_without_stopwords
dissatisfied_reviews

In [None]:
# Visualizing tokens a word cloud with a maximum limit of 100
feedbacks = " ".join(review for review in dissatisfied_reviews.reviews)
dissatisfied_wordcloud = WordCloud(stopwords=stopwords_set, collocations = False, background_color = 'red', width = 4000,height = 3000).generate(feedbacks)
fig=plt.figure(figsize=(15, 10))
plt.imshow(dissatisfied_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title('What Did Not Went Well Wordcloud - Tokens')
plt.show()

In [None]:
# Lemmatizing removes any noise present in a word as they are reduced to their root context (it is better than stemming)
dissatisfied_tokens_after_lemmatizing = []

lemmatizer = WordNetLemmatizer()

for each in dissatisfied_tokens_without_stopwords:
    lem = [lemmatizer.lemmatize(term) for term in each]
    dissatisfied_tokens_after_lemmatizing.append(lem)

# Creating a new dataframe to showing the new data side by side with the previous one
dissatisfied_reviews['fresh_tokens'] = dissatisfied_tokens_after_lemmatizing
dissatisfied_reviews

In [None]:
# Using pos tagging to figure out whether a token is verb, noun, etc
dissatisfied_pos_tagged = [pos_tag(tokens) for tokens in dissatisfied_tokens_after_lemmatizing] 
dissatisfied_pos_tagged

In [None]:
dissatisfied_reviews['feedback'] = dissatisfied_reviews['reviews']
dissatisfied_reviews['feedback'] = dissatisfied_reviews['feedback'].apply(lambda v: nltk.pos_tag(nltk.word_tokenize(v)))

dissatisfied_reviews['lemma'] = dissatisfied_reviews['feedback'].transform(lambda value: ' '.join([lemmatizer.lemmatize(a[0],pos=get_wordnet_pos(a[1])) if get_wordnet_pos(a[1]) else a[0] for a in value]))
dissatisfied_reviews

In [None]:
# Finding polarity score for each feedback
# It will show the polarity scores (positive, negative, neutral, and compund) for each feedback along with the feedback received
# Polarity scores will then be inserted into a list which will be accessed later to compute sentiment scores
dissatisfied_polarity_score_list = []

for each in dissatisfied_reviews.reviews:
    polarity_score = sia().polarity_scores(each)
    polarity_score['label'] = each
    dissatisfied_polarity_score_list.append(polarity_score)
    print(polarity_score)

In [None]:
# Creating a new dataframe to showing the new data side by side with the previous one
# We are only interested in the compunt score (aggregated score)
dissatisfied_reviews['polarity_score'] = pd.DataFrame(dissatisfied_polarity_score_list)['compound']
dissatisfied_reviews['positive_score'] = pd.DataFrame(dissatisfied_polarity_score_list)['pos']
dissatisfied_reviews['negative_score'] = pd.DataFrame(dissatisfied_polarity_score_list)['neu']

dissatisfied_reviews[['polarity_score']] = dissatisfied_reviews[['polarity_score']].fillna(0)
dissatisfied_reviews[['positive_score']] = dissatisfied_reviews[['positive_score']].fillna(0)
dissatisfied_reviews[['negative_score']] = dissatisfied_reviews[['negative_score']].fillna(0)
dissatisfied_reviews

In [None]:
# Creating a new column which will contain sentiment scores based on the polarity score of each feedback
dissatisfied_reviews['sentiment_label'] = dissatisfied_reviews.apply(sentiment_label, axis=1)
dissatisfied_reviews['sentiment'] = dissatisfied_reviews.apply(sentiment, axis=1)
dissatisfied_reviews.head(10)

In [None]:
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.match('^(?=.*rude)|(?=.*worst)|(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*wasted)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*dismissed)|(?=.*upsetting)|(?=.*out of line)|(?=.*uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment'] = 2
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.match('^(?=.*rude)|(?=.*worst)|(?=.*frustrated)|(?=.*angry)|(?=.*terrible)|(?=.*disrespectful)|(?=.*irresponsibly)|(?=.*irresponsible)|(?=.*improper)|(?=.*wasted)|(?=.*violate)|(?=.*violations)|(?=.*aren t worth)|(?=.*undress)|(?=.*didnt treat me)|(?=.*horrible)|(?=kill)|(?=.*danger)|(?=.*dangerous)|(?=rough)|(?=.*dismissed)|(?=.*upsetting)|(?=.*out of line)|(?=.*uncomfortable)|(?=.*can t breathe)|(?=.*breast)|(?=.*was misdiagnosed)|(?=.*did not examine me)|(?=.*forced to participate)|(?=.*forced into it)'), 'sentiment_label'] = 'Extreme'  
dissatisfied_reviews.loc[(dissatisfied_reviews['polarity_score']>0) & (satisfied_reviews['sentiment_label']=='Extreme'), 'polarity_score']*=-1

dissatisfied_reviews.loc[(dissatisfied_reviews['NPS']>=7) & (dissatisfied_reviews['sentiment_label']=='Negative'), 'sentiment']=1
dissatisfied_reviews.loc[(dissatisfied_reviews['NPS']>=7) & (dissatisfied_reviews['sentiment_label']=='Negative'), 'sentiment_label']='Positive'
dissatisfied_reviews.loc[(dissatisfied_reviews['NPS']>=7) & (dissatisfied_reviews['polarity_score']<0), 'polarity_score']*=-1

dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (dissatisfied_reviews['NPS']>=5), 'sentiment'] = 1
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*good)|(?=.*great)|(?=.*awesome)|(?=.*very much)|(?=.*friendly)|(?=.*appreciate)|(?=.*compassionate)|(?=.*went well)|(?=.*very thorough)|(?=.*assisted)|(?=.*easy)|(?=.*enjoyed)|(?=.*i like)|(?=.*i liked)|(?=.*nice)|(?=.*love)|(?=.*caring)|(?=.*impressed)|(?=.*knowledgeable)|(?=.*very comfortable)|(?=.*very professional)|(?=.*achieved)|(?=.*courteous)|(?=.*listen to what i had to say)|(?=.*right amount of time)|(?=.*on time)|(?=.*personable)|(?=.*efficient)|(?=.*fantastic)|(?=.*punctual)|(?=.*go over issues)|(?=.*got me in same day)|(?=.*all my concerns)|(?=.*everything)|(?=.*sat and talked)') & (dissatisfied_reviews['NPS']>=5), 'sentiment_label'] = 'Positive'
dissatisfied_reviews.loc[(dissatisfied_reviews['polarity_score']<0) & (dissatisfied_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['reviews'].str.contains('listen')) & (reason_for_nps_score_reviews['polarity_score']<0), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment_label']=='Negative'), 'sentiment']=1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['NPS']>=7) & (reason_for_nps_score_reviews['sentiment']==1), 'sentiment_label']='Positive'
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']<0) & (reason_for_nps_score_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1
#reason_for_nps_score_reviews.loc[(reason_for_nps_score_reviews['polarity_score']==0) & (reason_for_nps_score_reviews['sentiment_label']=='Extreme'), 'polarity_score'] = 0
dissatisfied_reviews

In [None]:
# Fixing polarity scores and related labels
#dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('not sure|don t know'), 'sentiment'] = 0
#dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('nothing') & (dissatisfied_reviews['NPS']<=4), 'sentiment'] = 0
#dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('not sure|don t know'), 'sentiment_label'] = 'Neutral'
#dissatisfied_reviews.loc[(dissatisfied_reviews['sentiment_label']=='Neutral') & (dissatisfied_reviews['sentiment']==0), 'polarity_score']=0
#dissatisfied_reviews

In [None]:
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)') & (dissatisfied_reviews['NPS']<=4), 'sentiment'] = -1
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)') & (dissatisfied_reviews['NPS']<=4), 'sentiment_label'] = 'Negative'
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*too long)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment'] = -1
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*wasn t treated)|(?=.*not as caring)|(?=.*did not think)|(?=.*didn t think)|(?=.*did not get)|(?=.*didn t get)|(?=.*felt rushed)|(?=.*was rushed)|(?=.*seemed rushed)|(?=.*did not fix)|(?=.*didn t fix)|(?=.*didn t listen)|(?=.*did not listen)|(?=.*unprofessional)|(?=.*don t think)|(?=.*do not think)|(?=.*talked too much)|(?=.*took longer)|(?=.*didn t show)|(?=.*did not show)|(?=.*didn t look)|(?=.*did not look)|(?=.*not being treated)|(?=.*confused)|(?=.*did not recognize)|(?=.*didn t recognize)|(?=.*do not like)|(?=.*don t like)|(?=.*wasn t greeted)|(?=.*was not greeted)|(?=.*need to improve)|(?=.*no solution)|(?=.*didn t understand)|(?=.*did not understand)|(?=.*unorganized)|(?=.*no follow up)|(?=.*doesn t seem)|(?=.*does not seem)|(?=.*would not call)|(?=.*wouldn t call)|(?=.*not answered)|(?=.*no answer)|(?=.*not near as caring)|(?=.*already expressed)|(?=.*dirty)|(?=.*don t have resources)|(?=.*dont lie)|(?=.*too long)|(?=.*give proper)|(?=.*not treating)|(?=.*worthless)|(?=.*doesn t look well)|(?=.*don t get)|(?=.*way to long)|(?=.*not willing)|(?=.*no examination)|(?=.*failed)|(?=.*didn t follow up)|(?=.*doesn t follow up)|(?=.*was cold)|(?=.*little compassion)|(?=.*inexperienced)|(?=.*didn t even)|(?=.*needs to be cleaned)|(?=.*forgot to call)|(?=.*did not hear)|(?=.*superficial)|(?=.*afraid to touch)|(?=.*did not trust)|(?=.*far too long)|(?=.*don t have any resources)|(?=.*not happy)|(?=.*made me wait)|(?=.*awful)|(?=.*dirty)|(?=.*nobody grteeted)|(?=.*won t call)|(?=.*did not work)|(?=.*entire problem was)|(?=.*didn t even acknowledge)|(?=.*doesn t care)|(?=.*incompetent)|(?=.*not knowledgeable)|(?=.*didn t make appointment right)|(?=.*should not have)|(?=.*had to wait awhile)|(?=.*way too long)|(?=.*frustrating)|(?=.*they don t do)|(?=.*than the person you have hired)|(?=.*wasn t very clean)|(?=.*made me very sick)|(?=.*we were told)|(?=.*what is going on)|(?=.*don t recommend)|(?=.*only thing they cared about)|(?=.*did not seem to care)|(?=.*pretty disappointed)|(?=.*mess up my medicine)|(?=.*very demeaning)|(?=.*unfriendly)|(?=.*very disappointed)|(?=.*didn t have any good)|(?=.*wasn t much true interest)|(?=.*not acceptable)|(?=.*does not listen)|(?=.*did not listen)|(?=.*would not refill)|(?=.*not happening)|(?=.*not great)|(?=.*new very little)|(?=.*to long of a time)|(?=.*poor instructions)|(?=.*poor service)|(?=.*was charged an excessive amount)|(?=.*thrilled i was able)|(?=.*little slow)'), 'sentiment_label'] = 'Negative'
dissatisfied_reviews.loc[(dissatisfied_reviews['polarity_score']>0) & (dissatisfied_reviews['sentiment_label']=='Negative'), 'polarity_score']*=-1

In [None]:
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)'), 'sentiment'] = 1
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=.*amazing)|(?=.*can t think of anything)|(?=.*completely satisfied)|(?=.*no complaints)|(?=.*perfect)|(?=.*was fine)|(?=.*was nice)|(?=.*very happy)|(?=.*well communication)|(?=.*enough time with me)|(?=.*not a single thing)|(?=.*couldn t ask for anything more)|(?=.*very satisfied)|(?=.*nothing to say)|(?=.*nothing to report)|(?=.*very pleasant)|(?=.*was well)|(?=.*completely happy)|(?=.*was pleased)|(?=.*all employees use)|(?=.*super nice)|(?=.*never had a doctor spend)|(?=.*very pleased)|(?=.*it was okay)|(?=.*was quick)|(?=.*convenient high quality)|(?=.*very welcoming)|(?=.*provide great care)|(?=.*very nice)|(?=.*treat you like family)|(?=.*always on time)|(?=.*very helpful)|(?=.*i am fortunate)|(?=.*best coarse of action)|(?=.*very convenient)|(?=.*i iike the doctor)|(?=.*great service)|(?=.*wasn t too long)|(?=.*extremely convenient)|(?=.*service is good)|(?=.*i like being able to)|(?=.*overall i liked)|(?=.*paladinas the city s health)|(?=.*i enjoy the convenience)|(?=.*respect for my time)|(?=.*was easy to schedule)|(?=.*short waiting times)|(?=.*no waiting time)|(?=.*no hassle)|(?=.*very friendly)|(?=.*answered all my questions)|(?=.*very professional)|(?=.*so far so good)|(?=.*always seen on time)|(?=.*never feel rushed)|(?=.*friendly  timely)|(?=.*fast service)|(?=.*very accommodating)|(?=.*pleasent experience   nice staff)|(?=.*helpful and financially beneficial)|(?=.*ease of appointments)|(?=.*has a genuine concern)|(?=.*you feel that you are being heard)|(?=.*quick  affordable)|(?=.*convenient  fast  professional)|(?=.*excellent  thorough providers)|(?=.*very detailed check up)|(?=.*were kind)|(?=.*high quality of care)|(?=.*doctor listened to patient)|(?=.*doing a better job)|(?=.*ease of making an appointment)|(?=.*are just wonderful)|(?=.*very attentive)|(?=.*no wait times)|(?=.*felt comfortable)|(?=.*treated like a human a)|(?=.*felt welcoming)|(?=.*i am satisfied)|(?=.*quality of care and time)|(?=.*i enjoy)|(?=.*quick scheduling)|(?=.*was vey kind)|(?=.*was very kind)|(?=.*it was helpful)|(?=.*pretty thorough)|(?=.*was really thorough)|(?=.*listened to my concerns)|(?=.*listened to what i had to say)|(?=.*listened to all concerns)|(?=.*very responsive)|(?=.*very informative)|(?=.*cared about my well being)|(?=.*i feel welcomed)|(?=.*made me feel important)|(?=.*made me feel better)|(?=.*was very productive)|(?=.*gave me a lot of information)|(?=.*timely and kind)|(?=.*positive experience)|(?=.*personalized care)|(?=.*thank you)|(?=.*with professional courtesy)|(?=.*very positive environment)|(?=.*were wonderful)|(?=.*was thorough)|(?=.*non judgemental)|(?=.*very receptive)|(?=.*did not seem rushed)|(?=.*asked thorough questions)|(?=.* i felt confident)|(?=.*very little wait time)|(?=.*felt like an actual patient)|(?=.*she really cares)|(?=.*he really cares)|(?=.*treated me like a person)|(?=.*listened carefully and thoughtfully)|(?=.*was responsive)|(?=.*explained things really well)|(?=.*met me with respect)|(?=.*listened to my feedback)|(?=.*i felt heard)|(?=.*went very well)|(?=.*quick and thorough)|(?=.*made me comfortable)|(?=.*was explained in detail)|(?=.*kind and listened)|(?=.*got me in earlier)|(?=.*outstanding service)|(?=.*listened to my needs)|(?=.*very well prepared)|(?=.*i was welcomed)|(?=.*listened  welcoming)|(?=.*most important patient)|(?=.*spent a tremendous amount of time)|(?=.*very kind and polite)|(?=.*pleasant and thorough)|(?=.*pleasant and helpful)|(?=.*very prompt)|(?=.*very timely)|(?=.*asked good questions)|(?=.*is the best)|(?=.*pleasant and professional)|(?=.*was understanding)|(?=.*felt welcome and listened)|(?=.*everything went well)|(?=.*was pleasant)|(?=.*staff greeted me)|(?=.*everyone is sincere)|(?=.*polite and helpful)|(?=.*interactive and listened)|(?=.*found answers to my questions)|(?=.*kind and considerate)|(?=.*listened and responded)|(?=.*easy to schedule)|(?=.*always listen to my concerns)|(?=.*were professional)|(?=.*satisfied my needs)|(?=.*i was important)|(?=.*did not rush me)|(?=.*were addressed)|(?=.*was very respectful)|(?=.*didn t feel rushed)|(?=.*ease and timeliness)|(?=.*very patient and attentive)|(?=.*made sure i understood)|(?=.*took time to explain)|(?=.*very personal and understanding)|(?=.*very honest)|(?=.*very cared)|(?=.*always prepared)|(?=.*well cared)|(?=.*best dr visit)|(?=.*did not feel rushed)|(?=.*very quick)|(?=.*is a pleasure)|(?=.*patient centered)|(?=.*doctor really cares)|(?=.*spent as much time)|(?=.*expressed interested in me)|(?=.*took time to really talk)|(?=.*helped me with my issues)|(?=.*was incredibly calm)|(?=.*very warmly)|(?=.*listened to me first)|(?=.*without any wait time)|(?=.*always quick)|(?=.*was organized)|(?=.*very kind)|(?=.*i was able to make an appointment)|(?=.*helped clear up mysteries)|(?=.*was exceptional)|(?=.*we did not have to wait)|(?=.*i really like)|(?=.*made a personal connection)|(?=.*spent time in reviewing)|(?=.*spent quality time)|(?=.*gave me the information i needed)|(?=.*very personal)|(?=.*did not rush)|(?=.*have time to deal)|(?=.*was extremely thorough)|(?=.*has really helped)|(?=.*extra care was taken)|(?=.*took extra care)|(?=.*showed an interest)|(?=.*care and compassion)|(?=.*took his time)|(?=.*is nice  and caring)|(?=.*took the time to explain)|(?=.*talked to me and listened)|(?=.*made the patient feel comfortable)|(?=.*addressed the issue)|(?=.*listen and spent time)|(?=.*took her time with me)|(?=.*addressed my issue)|(?=.*i am so pleased)|(?=.*took the time to listen)|(?=.*i felt important  like a person)|(?=.*concise and responsive)|(?=.*didn t wait long)|(?=.*helped me understand)|(?=.*very well)|(?=.*very personal service)|(?=.*they followed the guidelines)|(?=.*started in a timely)|(?=.*was wonderful)|(?=.*very thoughtful)|(?=.*was genuinely interested)|(?=.*ran smoothly)|(?=.*i felt my concerns were important)|(?=.*assist was great)|(?=.*welcoming staff)|(?=.*they were very interested)|(?=.*i am treated)|(?=.*was treated with respect)|(?=.*can recommend to co workers)'), 'sentiment_label'] = 'Positive'
dissatisfied_reviews.loc[dissatisfied_reviews['reviews'].str.contains('^(?=convenient)') & (dissatisfied_reviews['NPS']==6), 'sentiment_label'] = 'Positive'
dissatisfied_reviews.loc[(dissatisfied_reviews['polarity_score']<0) & (dissatisfied_reviews['sentiment_label']=='Positive'), 'polarity_score']*=-1

In [None]:
dissatisfied_reviews.loc[(dissatisfied_reviews['NPS']>=9) & (dissatisfied_reviews['NPS']<=10), 'nps_label'] = 'Promoter'
dissatisfied_reviews.loc[(dissatisfied_reviews['NPS']>=7) & (dissatisfied_reviews['NPS']<=8), 'nps_label'] = 'Passive'
dissatisfied_reviews.loc[dissatisfied_reviews['NPS']<=6, 'nps_label'] = 'Detractors'

In [None]:
dissatisfied_reviews['nps_label'].value_counts()

In [None]:
dissatisfied_complete_data = pd.DataFrame()
dissatisfied_complete_data['ID'] = dissatisfied_reviews['ID']
dissatisfied_complete_data['reviews'] = dissatisfied_reviews['reviews']
dissatisfied_complete_data['date'] = dissatisfied_reviews['date']
dissatisfied_complete_data['nps_score'] = dissatisfied_reviews['NPS']
dissatisfied_complete_data['clinic'] = dissatisfied_reviews['clinic']
dissatisfied_complete_data['city'] = dissatisfied_reviews['city']
dissatisfied_complete_data['state'] = dissatisfied_reviews['state']
dissatisfied_complete_data['polarity_score'] = dissatisfied_reviews['polarity_score']
dissatisfied_complete_data['label'] = dissatisfied_reviews['sentiment_label']
dissatisfied_complete_data['nps_label'] = dissatisfied_reviews['nps_label']
dissatisfied_complete_data.to_csv("dissatisfied_complete_data.csv")
dissatisfied_complete_data

In [None]:
dissatisfied_sentiment_count = dissatisfied_reviews[['sentiment']].value_counts()
dissatisfied_sentiment_count

In [None]:
dissatisfied_reviews.to_csv("dissatisfied_reviews.csv")

In [None]:
dissatisfied_crosstab = pd.crosstab(dissatisfied_reviews.sentiment_label, dissatisfied_reviews.nps_label)
dissatisfied_crosstab.to_csv("dissatisfied_crosstab.csv")
dissatisfied_crosstab

In [None]:
# Creating visualization using sns for exploratory analysis
sns.pairplot(dissatisfied_reviews, hue = 'sentiment_label')

In [None]:
dissatisfied_positive = dissatisfied_reviews[dissatisfied_reviews['sentiment_label'].str.contains(r'Positive')]
dissatisfied_positive

In [None]:
dissatisfied_negative = dissatisfied_reviews[dissatisfied_reviews['sentiment_label'].str.contains(r'Negative')]
dissatisfied_negative

In [None]:
dissatisfied_neutral = dissatisfied_reviews[dissatisfied_reviews['sentiment_label'].str.contains(r'Neutral')]
dissatisfied_neutral

In [None]:
dissatisfied_extreme = dissatisfied_reviews[dissatisfied_reviews['sentiment_label'].str.contains(r'Extreme')]
dissatisfied_extreme

In [None]:
dissatisfied_promoters = dissatisfied_reviews[dissatisfied_reviews['nps_label'].str.contains(r'Promoter')]
dissatisfied_promoters.head(10)

In [None]:
dissatisfied_passive = dissatisfied_reviews[dissatisfied_reviews['nps_label'].str.contains(r'Passive')].head(10)
dissatisfied_passive

In [None]:
dissatisfied_detractors = dissatisfied_reviews[dissatisfied_reviews['nps_label'].str.contains(r'Detractor')]
dissatisfied_detractors.head(10)

In [None]:
dissatisfied_reviews.to_csv("dissatisfied_reviews.csv")

In [None]:
ngrams = c_vec.fit_transform(dissatisfied_reviews['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
dissatisfied_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
dissatisfied_grams['question_type'] = "what did not went well"
dissatisfied_grams.to_csv("dissatisfied_frequency.csv")
dissatisfied_grams

In [None]:
ngrams = c_vec.fit_transform(dissatisfied_positive['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
dissatisfied_positive_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
dissatisfied_positive_grams.to_csv("dissatisfied_positive_frequency.csv")
dissatisfied_positive_grams

In [None]:
ngrams = c_vec.fit_transform(dissatisfied_negative['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
dissatisfied_negative_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
dissatisfied_negative_grams.to_csv("dissatisfied_negative_frequency.csv")
dissatisfied_negative_grams

In [None]:
ngrams = c_vec.fit_transform(dissatisfied_neutral['reviews'])
count_values = ngrams.toarray().sum(axis=0)
vocab = c_vec.vocabulary_
dissatisfied_neutral_grams = pd.DataFrame(sorted([(count_values[i],k) for k,i in vocab.items()], reverse=True)).rename(columns={0: 'frequency', 1:'word_group'})
dissatisfied_neutral_grams.to_csv("dissatisfied_neutral_frequency.csv")
dissatisfied_neutral_grams

In [None]:
dissatisfied_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(dissatisfied_grams['frequency'], dissatisfied_grams['word_group'])])
dissatisfied_grams_json

In [None]:
with open("dissatisfied_frequency.json", "w") as dissatisfied_topics_file:
    dissatisfied_topics_file.write(dissatisfied_grams_json)
dissatisfied_topics_file.close()

In [None]:
dissatisfied_positive_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(dissatisfied_positive_grams['frequency'], dissatisfied_positive_grams['word_group'])])
dissatisfied_positive_grams_json

In [None]:
with open("dissatisfied_positive_frequency.json", "w") as dissatisfied_positive_topics_file:
    dissatisfied_positive_topics_file.write(dissatisfied_positive_grams_json)
dissatisfied_positive_topics_file.close()

In [None]:
dissatisfied_negative_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(dissatisfied_negative_grams['frequency'], dissatisfied_negative_grams['word_group'])])
dissatisfied_negative_grams_json

In [None]:
with open("dissatisfied_negative_frequency.json", "w") as dissatisfied_negative_topics_file:
    dissatisfied_negative_topics_file.write(dissatisfied_negative_grams_json)
dissatisfied_negative_topics_file.close()

In [None]:
dissatisfied_neutral_grams_json = json.dumps([{"word_group": topic, "frequency": frequency} for frequency,topic in zip(dissatisfied_neutral_grams['frequency'], dissatisfied_neutral_grams['word_group'])])
dissatisfied_neutral_grams_json

In [None]:
with open("dissatisfied_neutral_frequency.json", "w") as dissatisfied_neutral_topics_file:
    dissatisfied_neutral_topics_file.write(dissatisfied_neutral_grams_json)
dissatisfied_neutral_topics_file.close()

In [None]:
dissatisfied_top_positive = dissatisfied_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=False).head(5)
dissatisfied_top_positive

In [None]:
dissatisfied_top_negative = dissatisfied_reviews[['reviews','polarity_score','sentiment_label']].sort_values(by='polarity_score', ascending=True).head(5)
dissatisfied_top_negative

In [None]:
dissatisfied_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(dissatisfied_reviews['ID'], dissatisfied_reviews['reviews'], dissatisfied_reviews['sentiment_label'], dissatisfied_reviews['polarity_score'], dissatisfied_reviews['NPS'], dissatisfied_reviews['nps_label'], dissatisfied_reviews['date'], dissatisfied_reviews['clinic'], dissatisfied_reviews['city'], dissatisfied_reviews['state'])])
dissatisfied_json

In [None]:
with open("dissatisfied_reviews.json", "w") as dissatisfied_json_file:
    dissatisfied_json_file.write(dissatisfied_json)
dissatisfied_json_file.close()

In [None]:
dissatisfied_positive_length = len(satisfied_positive)
dissatisfied_negative_length = len(satisfied_negative)
dissatisfied_neutral_length = len(satisfied_neutral)
dissatisfied_extreme_length = len(dissatisfied_extreme)
dissatisfied_total_promoters = int(len(dissatisfied_promoters))
dissatisfied_total_passive = int(len(dissatisfied_passive))
dissatisfied_total_detractors = int(len(dissatisfied_detractors))
dissatisfied_total_length = len(dissatisfied_reviews)

dissatisfied_json_total = json.dumps([{"total_positive": dissatisfied_positive_length, "total_negative": dissatisfied_negative_length, "total_neutral": dissatisfied_neutral_length, "total_extreme": dissatisfied_extreme_length, "total_promoters": dissatisfied_total_promoters, "total_passive": dissatisfied_total_passive, "total_detractors": dissatisfied_total_detractors, "total_comments": dissatisfied_total_length}])
dissatisfied_json_total

In [None]:
with open("dissatisfied_total_data.json", "w") as dissatisfied_total_file:
    dissatisfied_total_file.write(dissatisfied_json_total)
dissatisfied_total_file.close()

In [None]:
dissatisfied_top_five_positive_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(dissatisfied_top_positive['reviews'], dissatisfied_top_positive['sentiment_label'], dissatisfied_top_positive['polarity_score'])])
dissatisfied_top_five_positive_json

In [None]:
dissatisfied_top_five_negative_json = json.dumps([{"review": review, "label": label, "polarity_score": polarity_score} for review,label,polarity_score in zip(dissatisfied_top_negative['reviews'], dissatisfied_top_negative['sentiment_label'], dissatisfied_top_negative['polarity_score'])])
dissatisfied_top_five_negative_json

In [None]:
with open("dissatisfied_top_five_positive.json", "w") as dissatisfied_top_positive_file:
    dissatisfied_top_positive_file.write(dissatisfied_top_five_positive_json)
dissatisfied_top_positive_file.close()
with open("dissatisfied_top_five_negative.json", "w") as dissatisfied_top_negative_file:
    dissatisfied_top_negative_file.write(dissatisfied_top_five_negative_json)
dissatisfied_top_negative_file.close()

In [None]:
dissatisfied_positive_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(dissatisfied_positive['ID'], dissatisfied_positive['reviews'], dissatisfied_positive['sentiment_label'], dissatisfied_positive['polarity_score'], dissatisfied_positive['NPS'], dissatisfied_positive['nps_label'], dissatisfied_positive['date'], dissatisfied_positive['clinic'], dissatisfied_positive['city'], dissatisfied_positive['state'])])
dissatisfied_positive_json

In [None]:
with open("dissatisfied_positive.json", "w") as dissatisfied_positive_file:
    dissatisfied_positive_file.write(dissatisfied_positive_json)
dissatisfied_positive_file.close()

In [None]:
dissatisfied_negative_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(dissatisfied_negative['ID'], dissatisfied_negative['reviews'], dissatisfied_negative['sentiment_label'], dissatisfied_negative['polarity_score'], dissatisfied_negative['NPS'], dissatisfied_negative['nps_label'], dissatisfied_negative['date'], dissatisfied_negative['clinic'], dissatisfied_negative['city'], dissatisfied_negative['state'])])
dissatisfied_negative_json

In [None]:
with open("dissatisfied_negative.json", "w") as dissatisfied_negative_file:
    dissatisfied_negative_file.write(dissatisfied_negative_json)
dissatisfied_negative_file.close()

In [None]:
dissatisfied_neutral_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(dissatisfied_neutral['ID'], dissatisfied_neutral['reviews'], dissatisfied_neutral['sentiment_label'], dissatisfied_neutral['polarity_score'], dissatisfied_neutral['NPS'], dissatisfied_neutral['nps_label'], dissatisfied_neutral['date'], dissatisfied_neutral['clinic'], dissatisfied_neutral['city'], dissatisfied_neutral['state'])])
dissatisfied_neutral_json

In [None]:
with open("dissatisfied_neutral.json", "w") as dissatisfied_neutral_file:
    dissatisfied_neutral_file.write(dissatisfied_neutral_json)
dissatisfied_neutral_file.close()

In [None]:
dissatisfied_extreme_json = json.dumps([{"ID": id, "review": review, "label": label, "polarity_score": polarity_score, "nps_score": nps_score, "nps_label": nps_label, "date": date, "clinic": clinic, "city": city, "state": state} for id,review,label,polarity_score,nps_score,nps_label,date,clinic,city,state in zip(dissatisfied_extreme['ID'], dissatisfied_extreme['reviews'], dissatisfied_extreme['sentiment_label'], dissatisfied_extreme['polarity_score'], dissatisfied_extreme['NPS'], dissatisfied_extreme['nps_label'], dissatisfied_extreme['date'], dissatisfied_extreme['clinic'], dissatisfied_extreme['city'], dissatisfied_extreme['state'])])
dissatisfied_extreme_json

In [None]:
with open("dissatisfied_extreme.json", "w") as dissatisfied_extreme_file:
    dissatisfied_extreme_file.write(dissatisfied_extreme_json)
dissatisfied_extreme_file.close()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(dissatisfied_reviews, x="sentiment_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis')
figure.show()

In [None]:
# Creating a histogram to show postive, negative, and neutral sentiment distribution
figure = pl.histogram(dissatisfied_reviews, x="nps_label")
figure.update_traces(marker_color="indianred",marker_line_color='yellow',marker_line_width=2.0)
figure.update_layout(title_text='Feedback Sentiment Analysis')
figure.show()

## 3. Topic Modelling

## 3.1 Topic Modelling For Reason Score Column

In [None]:
topic_list = reason_for_nps_score_tokens_after_lemmatizing #+ satisfied_tokens_after_lemmatizing + dissatisfied_tokens_after_lemmatizing
topic_dictionary = gensim.corpora.Dictionary(topic_list)
topic_dictionary.filter_extremes(no_below=10, no_above=0.35)
topic_dictionary.compactify()
topic_doc_term_matrix = [topic_dictionary.doc2bow(rev) for rev in topic_list]

In [None]:
# Creating the object for LDA model using gensim library

LDA = gensim.models.ldamodel.LdaModel

# Build LDA model
lda_model = LDA(corpus=topic_doc_term_matrix, id2word=topic_dictionary, num_topics=50, random_state=0, chunksize=1000, passes=50,iterations=100)
lda_model.print_topics()

In [None]:
trending_topics_list = []
topic_number_list = []

for index, topic in lda_model.show_topics(formatted=False, num_topics=30, num_words= 10):
    trending_topics_list.append([w[0] for w in topic])
    topic_number_list.append(index)

trending_topics = pd.DataFrame({'topic_number':topic_number_list, 'topics':trending_topics_list})
trending_topics

In [None]:
#topic_data_list = reason_for_nps_score_topics.topics.values[1]
#topic_data_list = ','.join(map(str, topic_data_list)) 
#updated_topic_list  = str(topic_data_list).replace(',', '|')
#updated_topic_list

In [None]:
top_words_per_topic = []
for t in range(lda_model.num_topics):
    top_words_per_topic.extend([(t, ) + x for x in lda_model.show_topic(t, topn = 3)])

topics_words_based = pd.DataFrame(top_words_per_topic, columns=['topic_number', 'word', 'coherence_score'])
topics_words_based

In [None]:
data_combine = pd.concat([reason_for_nps_score_reviews, satisfied_reviews, dissatisfied_reviews])
topic_sentiment_label = []
topic_sentiment_value = []
for index,i in enumerate(topics_words_based.word):
    if i in np.concatenate(reason_for_nps_score_reviews.fresh_tokens.values, axis=0):
        topic_sentiment_label.append(reason_for_nps_score_reviews.sentiment_label.iloc[index])
        topic_sentiment_value.append(reason_for_nps_score_reviews.sentiment.iloc[index])

In [None]:
topic_sentiments = topics_words_based
topic_sentiments['sentiment'] = topic_sentiment_label
#topic_sentiments['sentiment_value'] = topic_sentiment_value
topic_sentiments = topic_sentiments.groupby(['topic_number'])['sentiment'].apply(lambda x: x.value_counts().index[0]).reset_index()
topic_sentiments

In [None]:
topic_score_group = topics_words_based.groupby(['topic_number'])['coherence_score'].sum().reset_index()
topic_score_group

In [None]:
topic_wise_score = topic_score_group.set_index('topic_number').to_dict()['coherence_score']
#topic_wise_sentiment = topic_sentiments.set_index('topic_number').to_dict()['sentiment']
top_trending_topics = trending_topics
top_trending_topics['coherence_score'] = top_trending_topics['topic_number'].apply(lambda x: topic_wise_score.get(x))
#top_trending_topics['sentiment'] = top_trending_topics['topic_number'].apply(lambda x: topic_wise_sentiment.get(x))
top_trending_topics.to_csv("top_trending_topics.csv")
top_trending_topics

In [None]:
topics_data = top_trending_topics
topics_data['topics'] = topics_data['topics'].astype(str)
topics_data['topics'] = topics_data.topics.str.lstrip('[').str.rstrip(']')
topics_data

In [None]:
topics_data.loc[topics_data['topics'].str.contains('need') & topics_data['topics'].str.contains('help'), 'topic_keyword'] = "Assistance"
topics_data.loc[topics_data['topics'].str.contains('listened|team|doctor|dr|welcoming|willing|accommodating|listening|listen|explain|knowledgeable|awesome|practitioner|nice|cared|great|good|fantastic|thorough|courteous|friendly'), 'topic_keyword'] = "Provider"
topics_data.loc[topics_data['topics'].str.contains('prescription|pharmacy|medication|diagnosis|medicine|check|pain'), 'topic_keyword'] = "Prescription"
topics_data.loc[topics_data['topics'].str.contains('clinic|parking|visit'), 'topic_keyword'] = "Clinic"
topics_data.loc[topics_data['topics'].str.contains('received|happy|nothing|well|everything|pleased|pleasant|smooth'), 'topic_keyword'] = "Satisfied"
topics_data.loc[topics_data['topics'].str.contains('quick|slow|response'), 'topic_keyword'] = "Response"
topics_data.loc[topics_data['topics'].str.contains('busy|taking|waiting|time|hour|call|late|minute'), 'topic_keyword'] = "Time"
topics_data.loc[topics_data['topics'].str.contains('blood|vaccine|vein|needle|draw'), 'topic_keyword'] = "Blood"
topics_data.loc[topics_data['topics'].str.contains('rude|worst|bad|problem|never|bad'), 'topic_keyword'] = "Dissatisfied"
topics_data.loc[topics_data['topics'].str.contains('x') & topics_data['topics'].str.contains('ray'), 'topic_keyword'] = "Prescription"
topics_data.to_csv("topics_data.csv")
topics_data

In [None]:
topics_data = top_trending_topics
topics_data['topics'] = topics_data['topics'].astype(str)
topics_data['topics'] = topics_data.topics.str.strip('[]')
topics_data.to_csv("topics_data.csv")
topics_data

In [None]:
topics_data.topic_keyword.value_counts()

In [None]:
topics_data

In [None]:
top_topics_json = json.dumps([{"topic": topic, "coherence_score": coherence_score, "topic_keyword": topic_keyword} for topic,coherence_score,topic_keyword in zip(topics_data['topics'], topics_data['coherence_score'], topics_data['topic_keyword'])])
top_topics_json

In [None]:
with open("top_topics.json", "w") as top_topics_file:
    top_topics_file.write(top_topics_json)
top_topics_file.close()

In [None]:
total_surveys = len(data_combine.ID.unique())
total_surveys_json = json.dumps([{"total_surveys": total_surveys}])
total_surveys_json

In [None]:
with open("total_surveys.json", "w") as total_surveys_file:
    total_surveys_file.write(total_surveys_json)
total_surveys_file.close()

In [None]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda_model, topic_doc_term_matrix, topic_dictionary)
vis

In [None]:
def format_topics_sentences(ldamodel=lda_model, corpus=topic_doc_term_matrix, texts=topic_list):
    sent_topics_df = pd.DataFrame()

    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    contents = pd.Series(texts)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return(sent_topics_df)

In [None]:
reason_for_nps_score_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=topic_doc_term_matrix, texts=topic_list)
reason_for_nps_score_dominant_topic = reason_for_nps_score_topic_sents_keywords.reset_index()
reason_for_nps_score_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']

reason_for_nps_score_dominant_topic.head(10)

In [None]:
reason_for_nps_score_reviews_topics = reason_for_nps_score_reviews
reason_for_nps_score_reviews_topics['document_number'] = reason_for_nps_score_dominant_topic['Document_No']
reason_for_nps_score_reviews_topics['topic_number'] = reason_for_nps_score_dominant_topic['Dominant_Topic']
reason_for_nps_score_reviews_topics['topic_contribution'] = reason_for_nps_score_dominant_topic['Topic_Perc_Contrib']
reason_for_nps_score_reviews_topics['topic_keywords'] = reason_for_nps_score_dominant_topic['Keywords']
reason_for_nps_score_reviews_topics.reset_index(drop=True, inplace=True)
reason_for_nps_score_reviews_topics.head(10)

In [None]:
reason_for_nps_score_reviews_topics.to_csv("reason_for_nps_score_reviews_topics.csv")

In [None]:
reason_for_nps_score_topics_sentiment_crosstab = pd.crosstab(reason_for_nps_score_reviews.topic_number, reason_for_nps_score_reviews_topics.sentiment_label)
reason_for_nps_score_topics_sentiment_crosstab

In [None]:
reason_for_nps_score_sentiment_columns = ['Extreme', 'Negative', 'Neutral', 'Positive']
reason_for_nps_score_topics_sentiment_crosstab['sentiment_label'] = reason_for_nps_score_topics_sentiment_crosstab.index
reason_for_nps_score_topics_sentiment_crosstab.rename({'sentiment_label': 'topic_number'}, axis=1, inplace=True)
reason_for_nps_score_topics_sentiment_crosstab['total'] = reason_for_nps_score_topics_sentiment_crosstab[reason_for_nps_score_sentiment_columns].sum(axis=1)
reason_for_nps_score_topics_sentiment_crosstab.reset_index(drop=True, inplace=True)
reason_for_nps_score_topics_sentiment_crosstab['extreme_percent'] = ((reason_for_nps_score_topics_sentiment_crosstab.Extreme.values)/(reason_for_nps_score_topics_sentiment_crosstab.total.values))*100
reason_for_nps_score_topics_sentiment_crosstab['negative_percent'] = ((reason_for_nps_score_topics_sentiment_crosstab.Negative.values)/(reason_for_nps_score_topics_sentiment_crosstab.total.values))*100
reason_for_nps_score_topics_sentiment_crosstab['neutral_percent'] = ((reason_for_nps_score_topics_sentiment_crosstab.Neutral.values)/(reason_for_nps_score_topics_sentiment_crosstab.total.values))*100
reason_for_nps_score_topics_sentiment_crosstab['positive_percent'] = ((reason_for_nps_score_topics_sentiment_crosstab.Positive.values)/(reason_for_nps_score_topics_sentiment_crosstab.total.values))*100
reason_for_nps_score_sentiment_topics_merge = reason_for_nps_score_topics_sentiment_crosstab.merge(reason_for_nps_score_reviews_topics[['date','state','clinic','topic_number','topic_keywords']], left_on='topic_number', right_on ='topic_number', how = 'left')
reason_for_nps_score_sentiment_topics_merge.to_csv("reason_for_nps_score_topics_sentiment_crosstab.csv")
reason_for_nps_score_sentiment_topics_merge

In [None]:
reason_for_nps_score_topics_nps_crosstab = pd.crosstab(reason_for_nps_score_reviews.topic_number, reason_for_nps_score_reviews_topics.nps_label)
reason_for_nps_score_topics_nps_crosstab

In [None]:
reason_for_nps_score_nps_columns = ['Detractor', 'Passive', 'Promoter']
reason_for_nps_score_topics_nps_crosstab['sentiment_label'] = reason_for_nps_score_topics_nps_crosstab.index
reason_for_nps_score_topics_nps_crosstab.rename({'sentiment_label': 'topic_number'}, axis=1, inplace=True)
reason_for_nps_score_topics_nps_crosstab['total'] = reason_for_nps_score_topics_nps_crosstab[reason_for_nps_score_nps_columns].sum(axis=1)
reason_for_nps_score_topics_nps_crosstab.reset_index(drop=True, inplace=True)
reason_for_nps_score_topics_nps_crosstab['detractor_percent'] = ((reason_for_nps_score_topics_nps_crosstab.Detractor.values)/(reason_for_nps_score_topics_nps_crosstab.total.values))*100
reason_for_nps_score_topics_nps_crosstab['passive_percent'] = ((reason_for_nps_score_topics_nps_crosstab.Passive.values)/(reason_for_nps_score_topics_nps_crosstab.total.values))*100
reason_for_nps_score_topics_nps_crosstab['promoter_percent'] = ((reason_for_nps_score_topics_nps_crosstab.Promoter.values)/(reason_for_nps_score_topics_nps_crosstab.total.values))*100
reason_for_nps_score_nps_topics_merge = reason_for_nps_score_topics_nps_crosstab.merge(reason_for_nps_score_reviews_topics[['date','state','clinic','topic_number','topic_keywords']], on='topic_number', how = 'left')
reason_for_nps_score_nps_topics_merge.to_csv("reason_for_nps_score_topics_nps_crosstab.csv")
reason_for_nps_score_nps_topics_merge

### 3.2 Topic Modelling For Went Well Column

In [None]:
topic_list = satisfied_tokens_after_lemmatizing
topic_dictionary = gensim.corpora.Dictionary(topic_list)
topic_dictionary.filter_extremes(no_below=10, no_above=0.35)
topic_dictionary.compactify()
topic_doc_term_matrix = [topic_dictionary.doc2bow(rev) for rev in topic_list]

In [None]:
# Creating the object for LDA model using gensim library

LDA = gensim.models.ldamodel.LdaModel

# Build LDA model
lda_model = LDA(corpus=topic_doc_term_matrix, id2word=topic_dictionary, num_topics=50, random_state=0, chunksize=1000, passes=50,iterations=100)
lda_model.print_topics()

In [None]:
trending_topics_list = []
topic_number_list = []

for index, topic in lda_model.show_topics(formatted=False, num_topics=30, num_words= 10):
    trending_topics_list.append([w[0] for w in topic])
    topic_number_list.append(index)

trending_topics = pd.DataFrame({'topic_number':topic_number_list, 'topics':trending_topics_list})
trending_topics

In [None]:
top_words_per_topic = []
for t in range(lda_model.num_topics):
    top_words_per_topic.extend([(t, ) + x for x in lda_model.show_topic(t, topn = 3)])

topics_words_based = pd.DataFrame(top_words_per_topic, columns=['topic_number', 'word', 'coherence_score'])
topics_words_based

In [None]:
satisfied_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=topic_doc_term_matrix, texts=topic_list)
satisfied_dominant_topic = satisfied_topic_sents_keywords.reset_index()
satisfied_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']

satisfied_dominant_topic.head(10)

In [None]:
satisfied_reviews_topics = satisfied_reviews
satisfied_reviews_topics['document_number'] = satisfied_dominant_topic['Document_No']
satisfied_reviews_topics['topic_number'] = satisfied_dominant_topic['Dominant_Topic']
satisfied_reviews_topics['topic_contribution'] = satisfied_dominant_topic['Topic_Perc_Contrib']
satisfied_reviews_topics['topic_keywords'] = satisfied_dominant_topic['Keywords']
satisfied_reviews_topics.reset_index(drop=True, inplace=True)
satisfied_reviews_topics.head(10)

In [None]:
satisfied_reviews_topics.to_csv("satisfied_reviews_topics.csv")

In [None]:
satisfied_topics_sentiment_crosstab = pd.crosstab(satisfied_reviews_topics.topic_keywords, satisfied_reviews_topics.sentiment_label)
satisfied_topics_sentiment_crosstab

In [None]:
satisfied_sentiment_columns = ['Extreme', 'Negative', 'Neutral', 'Positive']
satisfied_topics_sentiment_crosstab['sentiment_label'] = satisfied_topics_sentiment_crosstab.index
satisfied_topics_sentiment_crosstab.rename({'sentiment_label': 'topic_keywords'}, axis=1, inplace=True)
satisfied_topics_sentiment_crosstab['total'] = satisfied_topics_sentiment_crosstab[satisfied_sentiment_columns].sum(axis=1)
satisfied_topics_sentiment_crosstab.reset_index(drop=True, inplace=True)
satisfied_topics_sentiment_crosstab['extreme_percent'] = ((satisfied_topics_sentiment_crosstab.Extreme.values)/(satisfied_topics_sentiment_crosstab.total.values))*100
satisfied_topics_sentiment_crosstab['negative_percent'] = ((satisfied_topics_sentiment_crosstab.Negative.values)/(satisfied_topics_sentiment_crosstab.total.values))*100
satisfied_topics_sentiment_crosstab['neutral_percent'] = ((satisfied_topics_sentiment_crosstab.Neutral.values)/(satisfied_topics_sentiment_crosstab.total.values))*100
satisfied_topics_sentiment_crosstab['positive_percent'] = ((satisfied_topics_sentiment_crosstab.Positive.values)/(satisfied_topics_sentiment_crosstab.total.values))*100
satisfied_topics_sentiment_crosstab.to_csv("satisfied_topics_sentiment_crosstab.csv")
satisfied_topics_sentiment_crosstab

In [None]:
satisfied_topics_nps_crosstab = pd.crosstab(satisfied_reviews_topics.topic_keywords, satisfied_reviews_topics.nps_label)
satisfied_topics_nps_crosstab

In [None]:
satisfied_nps_columns = ['Detractors', 'Passive', 'Promoter']
satisfied_topics_nps_crosstab['sentiment_label'] = satisfied_topics_nps_crosstab.index
satisfied_topics_nps_crosstab.rename({'sentiment_label': 'topic_keywords'}, axis=1, inplace=True)
satisfied_topics_nps_crosstab['total'] = satisfied_topics_nps_crosstab[satisfied_nps_columns].sum(axis=1)
satisfied_topics_nps_crosstab.reset_index(drop=True, inplace=True)
satisfied_topics_nps_crosstab['detractor_percent'] = ((satisfied_topics_nps_crosstab.Detractors.values)/(satisfied_topics_nps_crosstab.total.values))*100
satisfied_topics_nps_crosstab['passive_percent'] = ((satisfied_topics_nps_crosstab.Passive.values)/(satisfied_topics_nps_crosstab.total.values))*100
satisfied_topics_nps_crosstab['promoter_percent'] = ((satisfied_topics_nps_crosstab.Promoter.values)/(satisfied_topics_nps_crosstab.total.values))*100
satisfied_topics_nps_crosstab.to_csv("satisfied_topics_nps_crosstab.csv")
satisfied_topics_nps_crosstab

### 3.3 Topic Modelling For Not Well Column

In [None]:
topic_list = dissatisfied_tokens_after_lemmatizing
topic_dictionary = gensim.corpora.Dictionary(topic_list)
topic_dictionary.filter_extremes(no_below=10, no_above=0.35)
topic_dictionary.compactify()
topic_doc_term_matrix = [topic_dictionary.doc2bow(rev) for rev in topic_list]

In [None]:
# Creating the object for LDA model using gensim library

LDA = gensim.models.ldamodel.LdaModel

# Build LDA model
lda_model = LDA(corpus=topic_doc_term_matrix, id2word=topic_dictionary, num_topics=50, random_state=0, chunksize=1000, passes=50,iterations=100)
lda_model.print_topics()

In [None]:
trending_topics_list = []
topic_number_list = []

for index, topic in lda_model.show_topics(formatted=False, num_topics=30, num_words= 10):
    trending_topics_list.append([w[0] for w in topic])
    topic_number_list.append(index)

trending_topics = pd.DataFrame({'topic_number':topic_number_list, 'topics':trending_topics_list})
trending_topics

In [None]:
top_words_per_topic = []
for t in range(lda_model.num_topics):
    top_words_per_topic.extend([(t, ) + x for x in lda_model.show_topic(t, topn = 3)])

topics_words_based = pd.DataFrame(top_words_per_topic, columns=['topic_number', 'word', 'coherence_score'])
topics_words_based

In [None]:
dissatisfied_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=topic_doc_term_matrix, texts=topic_list)
dissatisfied_dominant_topic = dissatisfied_topic_sents_keywords.reset_index()
dissatisfied_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']

dissatisfied_dominant_topic.head(10)

In [None]:
dissatisfied_reviews_topics = dissatisfied_reviews
dissatisfied_reviews_topics['document_number'] = dissatisfied_dominant_topic['Document_No']
dissatisfied_reviews_topics['topic_number'] = dissatisfied_dominant_topic['Dominant_Topic']
dissatisfied_reviews_topics['topic_contribution'] = dissatisfied_dominant_topic['Topic_Perc_Contrib']
dissatisfied_reviews_topics['topic_keywords'] = dissatisfied_dominant_topic['Keywords']
dissatisfied_reviews_topics.reset_index(drop=True, inplace=True)
dissatisfied_reviews_topics.head(10)

In [None]:
dissatisfied_reviews_topics.to_csv("dissatisfied_reviews_topics.csv")

In [None]:
dissatisfied_topics_sentiment_crosstab = pd.crosstab(dissatisfied_reviews_topics.topic_keywords, dissatisfied_reviews_topics.sentiment_label)
dissatisfied_topics_sentiment_crosstab

In [None]:
dissatisfied_sentiment_columns = ['Extreme', 'Negative', 'Neutral', 'Positive']
dissatisfied_topics_sentiment_crosstab['sentiment_label'] = dissatisfied_topics_sentiment_crosstab.index
dissatisfied_topics_sentiment_crosstab.rename({'sentiment_label': 'topic_keywords'}, axis=1, inplace=True)
dissatisfied_topics_sentiment_crosstab['total'] = dissatisfied_topics_sentiment_crosstab[dissatisfied_sentiment_columns].sum(axis=1)
dissatisfied_topics_sentiment_crosstab.reset_index(drop=True, inplace=True)
dissatisfied_topics_sentiment_crosstab['extreme_percent'] = ((dissatisfied_topics_sentiment_crosstab.Extreme.values)/(dissatisfied_topics_sentiment_crosstab.total.values))*100
dissatisfied_topics_sentiment_crosstab['negative_percent'] = ((dissatisfied_topics_sentiment_crosstab.Negative.values)/(dissatisfied_topics_sentiment_crosstab.total.values))*100
dissatisfied_topics_sentiment_crosstab['neutral_percent'] = ((dissatisfied_topics_sentiment_crosstab.Neutral.values)/(dissatisfied_topics_sentiment_crosstab.total.values))*100
dissatisfied_topics_sentiment_crosstab['positive_percent'] = ((dissatisfied_topics_sentiment_crosstab.Positive.values)/(dissatisfied_topics_sentiment_crosstab.total.values))*100
dissatisfied_topics_sentiment_crosstab.to_csv("dissatisfied_topics_sentiment_crosstab.csv")
dissatisfied_topics_sentiment_crosstab

In [None]:
dissatisfied_topics_nps_crosstab = pd.crosstab(dissatisfied_reviews_topics.topic_keywords, dissatisfied_reviews_topics.nps_label)
dissatisfied_topics_nps_crosstab

In [None]:
dissatisfied_nps_columns = ['Detractors', 'Passive', 'Promoter']
dissatisfied_topics_nps_crosstab['sentiment_label'] = dissatisfied_topics_nps_crosstab.index
dissatisfied_topics_nps_crosstab.rename({'sentiment_label': 'topic_keywords'}, axis=1, inplace=True)
dissatisfied_topics_nps_crosstab['total'] = dissatisfied_topics_nps_crosstab[dissatisfied_nps_columns].sum(axis=1)
dissatisfied_topics_nps_crosstab.reset_index(drop=True, inplace=True)
dissatisfied_topics_nps_crosstab['detractor_percent'] = ((dissatisfied_topics_nps_crosstab.Detractors.values)/(dissatisfied_topics_nps_crosstab.total.values))*100
dissatisfied_topics_nps_crosstab['passive_percent'] = ((dissatisfied_topics_nps_crosstab.Passive.values)/(dissatisfied_topics_nps_crosstab.total.values))*100
dissatisfied_topics_nps_crosstab['promoter_percent'] = ((dissatisfied_topics_nps_crosstab.Promoter.values)/(dissatisfied_topics_nps_crosstab.total.values))*100
dissatisfied_topics_nps_crosstab.to_csv("dissatisfied_topics_nps_crosstab.csv")
dissatisfied_topics_nps_crosstab

In [None]:
clinics_data = data_combine.sort_values(by="NPS", ascending=False)
clinics_data = clinics_data[clinics_data['reviews'].str.contains(r'clinic|Clinic|CLINIC')]
clinics_data

In [None]:
clinics_group = pd.DataFrame()
clinics_group['clinic'] = clinics_data['clinic']
clinics_group['city'] = clinics_data['city']
clinics_group['state'] = clinics_data['state']
clinics_group['NPS'] = clinics_data['NPS']
clinics_group = clinics_group.groupby(['clinic', 'city', 'state'])['NPS'].mean().reset_index()
clinics_group.to_csv("clinics_data.csv")
clinics_group

In [None]:
grams_combined = pd.concat([reason_for_nps_score_grams,satisfied_grams,dissatisfied_grams])
grams_combined.to_csv("grams_combined.csv")
grams_combined

In [None]:
from sklearn.manifold import TSNE
from bokeh.plotting import figure, output_file, show
from bokeh.models import Label
from bokeh.io import output_notebook
import matplotlib.colors as mcolors

In [None]:
topic_list = reason_for_nps_score_tokens_after_lemmatizing + satisfied_tokens_after_lemmatizing + dissatisfied_tokens_after_lemmatizing
topic_dictionary = gensim.corpora.Dictionary(topic_list)
topic_dictionary.filter_extremes(no_below=10, no_above=0.35)
topic_dictionary.compactify()
topic_doc_term_matrix = [topic_dictionary.doc2bow(rev) for rev in topic_list]

# Creating the object for LDA model using gensim library

LDA = gensim.models.ldamodel.LdaModel

# Build LDA model
lda_model = LDA(corpus=topic_doc_term_matrix, id2word=topic_dictionary, num_topics=50, random_state=0, chunksize=1000, passes=50,iterations=100)
lda_model.print_topics()

In [None]:
topic_weights = []
for i, row_list in enumerate(lda_model[topic_doc_term_matrix]):
    topic_weights.append([w for i, w in enumerate(row_list[0])])

In [None]:
arr = pd.DataFrame(topic_weights).fillna(0).values

# Keep the well separated points (optional)
arr = arr[np.amax(arr, axis=1) > 0.35]

# Dominant topic number in each doc
topic_num = np.argmax(arr, axis=1)

# tSNE Dimension Reduction
tsne_model = TSNE(n_components=2, verbose=1, random_state=0, angle=.99, init='pca')
tsne_lda = tsne_model.fit_transform(arr)

# Plot the Topic Clusters using Bokeh
output_notebook()
n_topics = 20
mycolors = np.array([color for name, color in mcolors.TABLEAU_COLORS.items()])
plot = figure(title="t-SNE Clustering of {} LDA Topics".format(n_topics), 
              plot_width=900, plot_height=700)
plot.scatter(x=tsne_lda[:,0], y=tsne_lda[:,1], color=mycolors[topic_num])
show(plot)

## 4 Predicting Sentiments

### 4.1 Preparing The Dataset

In [None]:
x_train = reason_for_nps_score_reviews['reviews'].values
y_train = reason_for_nps_score_reviews['sentiment'].values
x_train, x_test, y_train, y_test = train_test_split(x_train,y_train, stratify=y_train, test_size=0.25, random_state=0)

In [None]:
# Converting data to arrays
vec = CountVectorizer(stop_words='english')
x_train = vec.fit_transform(x_train).toarray()
x_test = vec.transform(x_test).toarray()

### 4.2 Model 1 - Logistic Regression Model

In [None]:
# Creating a logistic regression model, and checking its score
logistic_model = LogisticRegression()
logistic_model.fit(x_train, y_train)
logistic_model_score = logistic_model.score(x_test, y_test)
print("Model accuracy:", (round(logistic_model_score, 2)*100), "%")

In [None]:
# Creating a stochastic gradient descent model, and checking its score
sgdc_model = SGDClassifier()
sgdc_model.fit(x_train, y_train)
sgdc_model = sgdc_model.score(x_test, y_test)
print("Model accuracy:", (round(sgdc_model, 2)*100), "%")

In [None]:
data = logistic_model.predict(x_test)
data[1:600]

### 4.3 Model 2 - Multinomial Naive Bayes Model

In [None]:
# Creating a multinomial naive bayes model and checking its score
multinomial_model = MultinomialNB()
multinomial_model.fit(x_train, y_train)
multinomial_model_score = multinomial_model.score(x_test, y_test)
print("Model accuracy:", (round(multinomial_model_score, 2)*100), "%")

### 4.4 Model 3 - Multinomial Naive Bayes Model With Scaling

In [None]:
# Creating a Multinomial Naive Bayes Model, scaling it, and checking its score
multinomial_pipeline = Pipeline([('Normalizing',MinMaxScaler()),('MultinomialNB',MultinomialNB())])
multinomial_pipeline.fit(x_train, y_train) 
multinomial_pipeline_score = multinomial_pipeline.score(x_test, y_test)
print("Model accuracy:", (round(multinomial_pipeline_score, 2)*100), "%")

### 4.5 Model 4 - Complement Naive Bayes Model

In [None]:
# Creating a Complement Naive Bayes Model, and checking its score
complement_model = ComplementNB()
complement_model.fit(x_train, y_train)
complement_model_score = complement_model.score(x_test, y_test)
print("Model accuracy:", (round(complement_model_score, 2)*100), "%")

### 4.6 Model 5 - Gaussian Naive Bayes Model

In [None]:
# Creating a Complement Naive Bayes Model, and checking its score
gaussian_model = GaussianNB()
gaussian_model.fit(x_train, y_train)
gaussian_model_score = gaussian_model.score(x_test, y_test)
print("Model accuracy:", (round(gaussian_model_score, 2)*100), "%")

### 4.7 Model 6 - Bernoulli Naive Bayes Model

In [None]:
# Creating a Complement Naive Bayes Model, and checking its score
bernoulli_model = BernoulliNB()
bernoulli_model.fit(x_train, y_train)
bernoulli_model_score = bernoulli_model.score(x_test, y_test)
print("Model accuracy:", (round(bernoulli_model_score, 2)*100), "%")

### 4.8 Model 7 - Stochastic Gradient Descent Model 

In [None]:
# Creating a stochastic gradient descent model, and checking its score
from sklearn.linear_model import LogisticRegression, SGDClassifier # For creating a logistic regression and a stochastic gradient descent model

sgdc_model = SGDClassifier()
sgdc_model.fit(x_train, y_train)
sgdc_model = sgdc_model.score(x_test, y_test)
print("Model accuracy:", (round(sgdc_model, 2)*100), "%")

### 4.9 Model 8 - Linear Support Vector Classifier Model 

In [None]:
# Creating a linear support vector classifier model, and checking its score
lsvc_model =  LinearSVC()
lsvc_model.fit(x_train, y_train)
lsvc_model_score = lsvc_model.score(x_test, y_test)
print("Model accuracy:", (round(lsvc_model_score, 2)*100), "%")

### 4.10 Model 9 - Decision Tree Classifer Model 

In [None]:
# Creating a decsion tree classifier model, and checking its score
descion_tree_model = DecisionTreeClassifier()
descion_tree_model.fit(x_train, y_train)
descion_tree_model_score = descion_tree_model.score(x_test, y_test)
print("Model accuracy:", (round(descion_tree_model_score, 2)*100), "%")

### 4.11 Model 10 - Random Forest Tree Classifier Model 

In [None]:
# Creating a random forest tree classifier model, and checking its score
random_forest_tree_model = RandomForestClassifier()
random_forest_tree_model.fit(x_train, y_train)
random_forest_tree_model_score = random_forest_tree_model.score(x_test, y_test)
print("Model accuracy:", (round(random_forest_tree_model_score, 2)*100), "%")