references: https://www.kaggle.com/code/fengliplatform/customer-sentiment-analysis

In [1]:
# generate ticket priority
# based on overall sentiment score (VADER), polarity, and topic frequency

# complaint subjectivity (0-1), where one is extremely subjective, 
# the complaint polarity (-1 to 1)
# topic frequency is topic frequency/total

# priority = subjectivity - polarity + topic_frequency

# then abc ranking (20-30-50)

In [2]:
import pandas as pd
import json
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns

# import nltk
# nltk.download('vader_lexicon')

from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
def get_processed_df(csv_path):
    df = pd.read_csv(csv_path)
    return df

In [4]:
df = get_processed_df('process_csv_stage_2.csv')
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp
0,1,good morning name appreciate could help put st...,0,5,0,34
1,2,upgraded card tell agent upgrade anniversary d...,3,3,0,2
2,10,chase card report however fraudulent applicati...,6,2,0,16
3,11,try book ticket come across offer apply toward...,3,7,0,24
4,14,grand son give check deposit chase account fun...,4,4,0,0
...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,3,2,0,2
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39
21069,78310,familiar pay understand great risk provide con...,2,1,0,9
21070,78311,flawless credit chase credit card chase freedo...,7,7,0,6


In [5]:
# import ast

# def string_list(text):
#     text_list = ast.literal_eval(text)
#     return ' '.join(text_list)

# df['preprocessed_text'] = df['preprocessed_text'].apply(string_list)
# df

In [6]:
# subjectivity
def get_subjectivity(text):
   return TextBlob(text).sentiment.subjectivity

In [7]:
# polarity
sentiment_analyzer = SentimentIntensityAnalyzer()

def get_compound_polarity(text: str) -> float:
    return sentiment_analyzer.polarity_scores(text)['compound']

In [8]:
df['subjectivity'] = df['preprocessed_text'].apply(get_subjectivity)
df['compound_polarity'] = df['preprocessed_text'].apply(get_compound_polarity)

In [9]:
# topic frequency
# we choose topic model nmf
# topic frequency = topic_count / total_count

topic_frequencies = pd.DataFrame(df.value_counts('topic_nmf')/df.shape[0]).reset_index().rename(columns={'count':'topic_frequency'})
df = pd.merge(left = df, right=topic_frequencies,on='topic_nmf',how='inner').sort_values(by=['old_index'],ascending=True).reset_index(drop=True)

In [10]:
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
0,1,good morning name appreciate could help put st...,0,5,0,34,0.445238,0.8402,0.172361
1,2,upgraded card tell agent upgrade anniversary d...,3,3,0,2,0.900000,-0.5812,0.149535
2,10,chase card report however fraudulent applicati...,6,2,0,16,0.000000,-0.1446,0.078398
3,11,try book ticket come across offer apply toward...,3,7,0,24,0.471429,0.9058,0.181948
4,14,grand son give check deposit chase account fun...,4,4,0,0,0.753333,0.9215,0.099279
...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,3,2,0,2,0.291288,0.6369,0.078398
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39,0.324318,-0.9377,0.172361
21069,78310,familiar pay understand great risk provide con...,2,1,0,9,0.468801,0.0641,0.113326
21070,78311,flawless credit chase credit card chase freedo...,7,7,0,6,0.475247,0.9716,0.181948


In [11]:
df.sort_values(by=['subjectivity'],ascending=False)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
14154,47890,give authorization help bank dispute behalf un...,5,3,0,18,1.0,-0.6369,0.149535
7466,21889,chase bank mortgage account bing hold chase re...,2,6,0,38,1.0,-0.8689,0.104594
1105,3563,never own account,6,3,5,82,1.0,0.0000,0.149535
20614,75849,deal seller transfer fund though soon send mon...,0,4,0,9,1.0,-0.7367,0.099279
4436,12294,charge airport lounge service wonder included ...,8,1,0,60,1.0,0.8074,0.113326
...,...,...,...,...,...,...,...,...,...
4929,14986,allow vender charge account item order cause l...,8,7,0,1,0.0,0.2769,0.181948
20266,75037,buy fully unlock silver sell package deliver p...,8,5,0,20,0.0,-0.4215,0.172361
15516,54741,want add motion information file federal bankr...,2,5,0,35,0.0,0.4767,0.172361
11667,32897,account result fraud,6,3,5,21,0.0,-0.5859,0.149535


In [12]:
df.sort_values(by=['compound_polarity'],ascending=True)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
17655,64994,fraud ing scam use document fraud ing indiana ...,5,5,0,45,0.483770,-1.0000,0.172361
5295,15832,fraud ing scam use indiana document fraud ing ...,5,5,0,45,0.403480,-0.9999,0.172361
5289,15814,den ged credit card throw card company card co...,5,6,0,66,0.025000,-0.9996,0.104594
5580,17342,last vacation experience bank fraud begin shor...,5,1,0,48,0.342283,-0.9993,0.113326
9467,28689,open credit card chase approve amazon credit c...,5,2,0,7,0.448457,-0.9993,0.078398
...,...,...,...,...,...,...,...,...,...
4741,14476,forward message date wed subject fwd follow po...,0,5,0,46,0.504554,0.9999,0.172361
14251,48112,urgent president commissary majority owner pro...,2,5,0,64,0.360492,0.9999,0.172361
5880,18007,reason apply receive mile sign bonus chase den...,3,6,0,2,0.436166,0.9999,0.104594
4662,14281,apply approve brand card chase accord term con...,3,2,0,2,0.448617,0.9999,0.078398


In [13]:
df.sort_values(by=['topic_frequency'],ascending=False).groupby('topic_frequency').head(2)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
21071,78312,roughly year ago close account morgan chase ba...,7,7,0,73,0.52,-0.2023,0.181948
14818,54000,finance new car purchase chase bank auto deale...,0,7,0,11,0.421372,0.9619,0.181948
9961,29553,line draw reference allege loan reference loan...,2,5,0,55,0.474053,-0.5423,0.172361
10033,29625,saw take account via authorize aware transacti...,0,5,0,0,0.25,-0.8416,0.172361
2193,6014,chase southwest rapid reward credit card check...,4,3,0,68,0.338636,0.9274,0.149535
11654,32864,chase bank close new check save account withou...,4,3,0,8,0.444805,0.8271,0.149535
12698,40200,chase bank contact repeatedly least time per d...,0,1,0,27,0.416667,-0.6705,0.113326
20476,75490,sometime middle get letter chase state card sh...,0,1,0,48,0.316705,0.8358,0.113326
16331,58074,lender fail respond notice error request send ...,2,6,0,23,0.383929,-0.9545,0.104594
5075,15454,applied chase home loan also bank chase since ...,0,6,0,91,0.54,0.9849,0.104594


In [14]:
df['priority_score'] = df['subjectivity'] - df['compound_polarity'] + df['topic_frequency']
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,1,good morning name appreciate could help put st...,0,5,0,34,0.445238,0.8402,0.172361,-0.222600
1,2,upgraded card tell agent upgrade anniversary d...,3,3,0,2,0.900000,-0.5812,0.149535,1.630735
2,10,chase card report however fraudulent applicati...,6,2,0,16,0.000000,-0.1446,0.078398,0.222998
3,11,try book ticket come across offer apply toward...,3,7,0,24,0.471429,0.9058,0.181948,-0.252424
4,14,grand son give check deposit chase account fun...,4,4,0,0,0.753333,0.9215,0.099279,-0.068888
...,...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,3,2,0,2,0.291288,0.6369,0.078398,-0.267214
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39,0.324318,-0.9377,0.172361,1.434380
21069,78310,familiar pay understand great risk provide con...,2,1,0,9,0.468801,0.0641,0.113326,0.518027
21070,78311,flawless credit chase credit card chase freedo...,7,7,0,6,0.475247,0.9716,0.181948,-0.314405


In [15]:
sort_by_priority_score = df.sort_values(by=['priority_score'],ascending=False)
sort_by_priority_score

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
19414,71999,attempt collect debt discharge harass workplac...,2,6,0,15,1.000,-0.8807,0.104594,1.985294
2339,6340,issue chase southwest credit card four fraudul...,5,2,0,-1,1.000,-0.9022,0.078398,1.980598
7466,21889,chase bank mortgage account bing hold chase re...,2,6,0,38,1.000,-0.8689,0.104594,1.973494
11084,31616,chase bank customer many year check pre pay ca...,6,3,0,-1,0.875,-0.9435,0.149535,1.968035
18751,70637,debt pay chase charge debt issue irs send wron...,6,5,0,38,0.900,-0.8934,0.172361,1.965761
...,...,...,...,...,...,...,...,...,...,...
227,534,close credit card merchant refund credit card ...,6,2,0,72,0.000,0.9313,0.078398,-0.852902
3957,11287,rec ved amazon chase credit card day get cut s...,0,2,0,7,0.000,0.9413,0.078398,-0.862902
367,923,chase credit card close without tell chase ref...,3,2,0,2,0.000,0.9595,0.078398,-0.881102
16597,61254,apply approve southwest credit card use card c...,3,2,0,2,0.000,0.9657,0.078398,-0.887302


In [16]:
def abc_ranking(priority_rank_score):
    # priority_rank_score value is ranged from 0 to 1
    if priority_rank_score <=0.2:
        return '1' # high priority
    elif priority_rank_score > 0.2 and priority_rank_score <=0.5:
        return '2' # medium priority
    else: # priority_rank_score > 0.5 and priority_rank_score <=1
        return '3' # low priority

In [17]:
calculate_priority_df = sort_by_priority_score.reset_index(names=['current_index']).reset_index(names=['sort_index'])
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,0,19414,71999,attempt collect debt discharge harass workplac...,2,6,0,15,1.000,-0.8807,0.104594,1.985294
1,1,2339,6340,issue chase southwest credit card four fraudul...,5,2,0,-1,1.000,-0.9022,0.078398,1.980598
2,2,7466,21889,chase bank mortgage account bing hold chase re...,2,6,0,38,1.000,-0.8689,0.104594,1.973494
3,3,11084,31616,chase bank customer many year check pre pay ca...,6,3,0,-1,0.875,-0.9435,0.149535,1.968035
4,4,18751,70637,debt pay chase charge debt issue irs send wron...,6,5,0,38,0.900,-0.8934,0.172361,1.965761
...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,227,534,close credit card merchant refund credit card ...,6,2,0,72,0.000,0.9313,0.078398,-0.852902
21068,21068,3957,11287,rec ved amazon chase credit card day get cut s...,0,2,0,7,0.000,0.9413,0.078398,-0.862902
21069,21069,367,923,chase credit card close without tell chase ref...,3,2,0,2,0.000,0.9595,0.078398,-0.881102
21070,21070,16597,61254,apply approve southwest credit card use card c...,3,2,0,2,0.000,0.9657,0.078398,-0.887302


In [18]:
# abc ranking (20-30-50)
calculate_priority_df['priority_rank_score'] = (calculate_priority_df['sort_index']+1)/df.shape[0]
calculate_priority_df['priority'] = calculate_priority_df['priority_rank_score'].apply(abc_ranking)

In [19]:
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score,priority_rank_score,priority
0,0,19414,71999,attempt collect debt discharge harass workplac...,2,6,0,15,1.000,-0.8807,0.104594,1.985294,0.000047,1
1,1,2339,6340,issue chase southwest credit card four fraudul...,5,2,0,-1,1.000,-0.9022,0.078398,1.980598,0.000095,1
2,2,7466,21889,chase bank mortgage account bing hold chase re...,2,6,0,38,1.000,-0.8689,0.104594,1.973494,0.000142,1
3,3,11084,31616,chase bank customer many year check pre pay ca...,6,3,0,-1,0.875,-0.9435,0.149535,1.968035,0.000190,1
4,4,18751,70637,debt pay chase charge debt issue irs send wron...,6,5,0,38,0.900,-0.8934,0.172361,1.965761,0.000237,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,227,534,close credit card merchant refund credit card ...,6,2,0,72,0.000,0.9313,0.078398,-0.852902,0.999810,3
21068,21068,3957,11287,rec ved amazon chase credit card day get cut s...,0,2,0,7,0.000,0.9413,0.078398,-0.862902,0.999858,3
21069,21069,367,923,chase credit card close without tell chase ref...,3,2,0,2,0.000,0.9595,0.078398,-0.881102,0.999905,3
21070,21070,16597,61254,apply approve southwest credit card use card c...,3,2,0,2,0.000,0.9657,0.078398,-0.887302,0.999953,3


In [20]:
calculate_priority_df.value_counts('priority')

priority
3    10536
2     6322
1     4214
Name: count, dtype: int64

In [21]:
calculate_priority_df.value_counts('priority')/df.shape[0]*100

priority
3    50.000000
2    30.001898
1    19.998102
Name: count, dtype: float64

In [22]:
calculate_priority_df.set_index('current_index',inplace=True)
calculate_priority_df.drop(['sort_index','subjectivity','compound_polarity','topic_frequency','priority_score','priority_rank_score'],axis=1,inplace=True)

In [23]:
calculate_priority_df.index.name = None
calculate_priority_df.sort_index(axis=0)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,priority
0,1,good morning name appreciate could help put st...,0,5,0,34,3
1,2,upgraded card tell agent upgrade anniversary d...,3,3,0,2,1
2,10,chase card report however fraudulent applicati...,6,2,0,16,3
3,11,try book ticket come across offer apply toward...,3,7,0,24,3
4,14,grand son give check deposit chase account fun...,4,4,0,0,3
...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,3,2,0,2,3
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39,1
21069,78310,familiar pay understand great risk provide con...,2,1,0,9,2
21070,78311,flawless credit chase credit card chase freedo...,7,7,0,6,3


In [24]:
calculate_priority_df.to_csv('process_csv_stage_3.csv')