references: https://www.kaggle.com/code/fengliplatform/customer-sentiment-analysis

In [1]:
# generate ticket priority
# based on overall sentiment score (VADER), polarity, and topic frequency

# complaint subjectivity (0-1), where one is extremely subjective, 
# the complaint polarity (-1 to 1)
# topic frequency is topic frequency/total

# priority = subjectivity - polarity + topic_frequency

# then abc ranking (20-30-50)

In [2]:
import pandas as pd
import json
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns

# import nltk
# nltk.download('vader_lexicon')

from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
def get_processed_df(csv_path):
    df = pd.read_csv(csv_path)
    return df

In [4]:
df = get_processed_df('process_csv_stage_2.csv')
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp
0,1,"['good', 'morning', 'name', 'appreciate', 'cou...",22,11,0,0
1,2,"['upgraded', 'card', 'tell', 'agent', 'upgrade...",26,10,0,44
2,10,"['chase', 'card', 'report', 'however', 'fraudu...",33,12,0,0
3,11,"['try', 'book', 'ticket', 'come', 'across', 'o...",26,7,0,0
4,14,"['grand', 'son', 'give', 'check', 'deposit', '...",25,4,0,56
...,...,...,...,...,...,...
21067,78303,"['chase', 'card', 'customer', 'well', 'decade'...",26,12,0,0
21068,78309,"['wednesday', 'call', 'chas', 'visa', 'credit'...",2,9,0,26
21069,78310,"['familiar', 'pay', 'understand', 'great', 'ri...",34,5,0,12
21070,78311,"['flawless', 'credit', 'chase', 'credit', 'car...",7,10,0,1


In [5]:
import ast

def string_list(text):
    text_list = ast.literal_eval(text)
    return ' '.join(text_list)

df['preprocessed_text'] = df['preprocessed_text'].apply(string_list)
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp
0,1,good morning name appreciate could help put st...,22,11,0,0
1,2,upgraded card tell agent upgrade anniversary d...,26,10,0,44
2,10,chase card report however fraudulent applicati...,33,12,0,0
3,11,try book ticket come across offer apply toward...,26,7,0,0
4,14,grand son give check deposit chase account fun...,25,4,0,56
...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,26,12,0,0
21068,78309,wednesday call chas visa credit card provider ...,2,9,0,26
21069,78310,familiar pay understand great risk provide con...,34,5,0,12
21070,78311,flawless credit chase credit card chase freedo...,7,10,0,1


In [6]:
# subjectivity
def get_subjectivity(text):
   return TextBlob(text).sentiment.subjectivity

In [7]:
# polarity
sentiment_analyzer = SentimentIntensityAnalyzer()

def get_compound_polarity(text: str) -> float:
    return sentiment_analyzer.polarity_scores(text)['compound']

In [8]:
df['subjectivity'] = df['preprocessed_text'].apply(get_subjectivity)
df['compound_polarity'] = df['preprocessed_text'].apply(get_compound_polarity)

In [9]:
# topic frequency
# we choose topic model nmf
# topic frequency = topic_count / total_count

topic_frequencies = pd.DataFrame(df.value_counts('topic_nmf')/df.shape[0]).reset_index().rename(columns={'count':'topic_frequency'})
df = pd.merge(left = df, right=topic_frequencies,on='topic_nmf',how='inner').sort_values(by=['old_index'],ascending=True).reset_index(drop=True)

In [10]:
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
0,1,good morning name appreciate could help put st...,22,11,0,0,0.445238,0.8402,0.030799
1,2,upgraded card tell agent upgrade anniversary d...,26,10,0,44,0.900000,-0.5812,0.102031
2,10,chase card report however fraudulent applicati...,33,12,0,0,0.000000,-0.1446,0.077496
3,11,try book ticket come across offer apply toward...,26,7,0,0,0.471429,0.9058,0.087415
4,14,grand son give check deposit chase account fun...,25,4,0,56,0.753333,0.9215,0.038535
...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,26,12,0,0,0.291288,0.6369,0.077496
21068,78309,wednesday call chas visa credit card provider ...,2,9,0,26,0.324318,-0.9377,0.030135
21069,78310,familiar pay understand great risk provide con...,34,5,0,12,0.468801,0.0641,0.270928
21070,78311,flawless credit chase credit card chase freedo...,7,10,0,1,0.475247,0.9716,0.102031


In [11]:
df.sort_values(by=['subjectivity'],ascending=False)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
14154,47890,give authorization help bank dispute behalf un...,11,5,0,12,1.0,-0.6369,0.270928
7466,21889,chase bank mortgage account bing hold chase re...,21,9,0,18,1.0,-0.8689,0.030135
1105,3563,never own account,6,14,5,16,1.0,0.0000,0.008352
20614,75849,deal seller transfer fund though soon send mon...,22,5,0,12,1.0,-0.7367,0.270928
4436,12294,charge airport lounge service wonder included ...,26,1,0,0,1.0,0.8074,0.078778
...,...,...,...,...,...,...,...,...,...
4929,14986,allow vender charge account item order cause l...,2,7,0,1,0.0,0.2769,0.087415
20266,75037,buy fully unlock silver sell package deliver p...,2,9,0,6,0.0,-0.4215,0.030135
15516,54741,want add motion information file federal bankr...,21,9,0,19,0.0,0.4767,0.030135
11667,32897,account result fraud,6,5,5,13,0.0,-0.5859,0.270928


In [12]:
df.sort_values(by=['compound_polarity'],ascending=True)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
17655,64994,fraud ing scam use document fraud ing indiana ...,5,5,0,24,0.483770,-1.0000,0.270928
5295,15832,fraud ing scam use indiana document fraud ing ...,5,5,0,24,0.403480,-0.9999,0.270928
5289,15814,den ged credit card throw card company card co...,5,5,0,47,0.025000,-0.9996,0.270928
5580,17342,last vacation experience bank fraud begin shor...,28,5,0,-1,0.342283,-0.9993,0.270928
9467,28689,open credit card chase approve amazon credit c...,34,10,0,3,0.448457,-0.9993,0.102031
...,...,...,...,...,...,...,...,...,...
4741,14476,forward message date wed subject fwd follow po...,31,11,0,20,0.504554,0.9999,0.030799
14251,48112,urgent president commissary majority owner pro...,21,1,0,39,0.360492,0.9999,0.078778
5880,18007,reason apply receive mile sign bonus chase den...,26,10,0,5,0.436166,0.9999,0.102031
4662,14281,apply approve brand card chase accord term con...,26,10,0,0,0.448617,0.9999,0.102031


In [13]:
df.sort_values(by=['topic_frequency'],ascending=False).groupby('topic_frequency').head(2)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
15730,55791,never bounce check insufficient fund bank bill...,22,5,0,31,0.465584,-0.6801,0.270928
5951,18170,make deposit chase high school check account s...,22,5,0,61,0.546667,0.7003,0.270928
13322,43022,morgan chase close credit card account without...,29,10,0,0,0.52914,0.5384,0.102031
1978,5566,update without warn chase take away rewards po...,3,10,0,0,0.565321,-0.9621,0.102031
12493,35320,another name list credit report also chase acc...,33,7,0,37,0.0,0.6369,0.087415
10484,30104,account close bankruptcy file,14,7,2,19,0.0,0.0,0.087415
13260,42918,receive letter state chase require submit inco...,31,1,0,-1,0.625,-0.296,0.078778
12606,39989,complaint regard chase failure remove dispute ...,9,1,0,6,0.351923,-0.9801,0.078778
1456,4382,hello open credit application rand credit twic...,29,12,0,4,0.45,0.8126,0.077496
8551,25119,well discover credit card use without knowledg...,29,12,0,16,0.4,0.8623,0.077496


In [14]:
df['priority_score'] = df['subjectivity'] - df['compound_polarity'] + df['topic_frequency']
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,1,good morning name appreciate could help put st...,22,11,0,0,0.445238,0.8402,0.030799,-0.364163
1,2,upgraded card tell agent upgrade anniversary d...,26,10,0,44,0.900000,-0.5812,0.102031,1.583231
2,10,chase card report however fraudulent applicati...,33,12,0,0,0.000000,-0.1446,0.077496,0.222096
3,11,try book ticket come across offer apply toward...,26,7,0,0,0.471429,0.9058,0.087415,-0.346957
4,14,grand son give check deposit chase account fun...,25,4,0,56,0.753333,0.9215,0.038535,-0.129632
...,...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,26,12,0,0,0.291288,0.6369,0.077496,-0.268116
21068,78309,wednesday call chas visa credit card provider ...,2,9,0,26,0.324318,-0.9377,0.030135,1.292153
21069,78310,familiar pay understand great risk provide con...,34,5,0,12,0.468801,0.0641,0.270928,0.675629
21070,78311,flawless credit chase credit card chase freedo...,7,10,0,1,0.475247,0.9716,0.102031,-0.394322


In [15]:
sort_by_priority_score = df.sort_values(by=['priority_score'],ascending=False)
sort_by_priority_score

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
11084,31616,chase bank customer many year check pre pay ca...,6,5,0,-1,0.875,-0.9435,0.270928,2.089428
6871,20225,notice unusual unauthorized transaction accoun...,9,5,0,12,1.000,-0.8011,0.270928,2.072028
3760,10863,try contact dispute dept morgan chase bank pdt...,17,5,0,-1,0.850,-0.9501,0.270928,2.071028
6883,20250,chase card close charge card fraud likely use ...,9,5,0,0,1.000,-0.7910,0.270928,2.061928
17899,69422,noticed bank take money one account pay bill p...,22,5,0,1,0.900,-0.8885,0.270928,2.059428
...,...,...,...,...,...,...,...,...,...,...
14645,53822,around end receive chase amazon reward credit ...,29,8,0,3,0.000,0.9081,0.012291,-0.895809
9305,28248,inquiry credit report customer service could h...,33,12,0,4,0.000,0.9814,0.077496,-0.903904
14723,53903,dear chase bank submit claim credit card scamm...,22,12,0,51,0.000,0.9847,0.077496,-0.907204
18694,70579,apply amazon com reward visa card promise amaz...,26,8,0,3,0.000,0.9246,0.012291,-0.912309


In [16]:
def abc_ranking(priority_rank_score):
    # priority_rank_score value is ranged from 0 to 1
    if priority_rank_score <=0.2:
        return '1' # high priority
    elif priority_rank_score > 0.2 and priority_rank_score <=0.5:
        return '2' # medium priority
    else: # priority_rank_score > 0.5 and priority_rank_score <=1
        return '3' # low priority

In [17]:
calculate_priority_df = sort_by_priority_score.reset_index(names=['current_index']).reset_index(names=['sort_index'])
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,0,11084,31616,chase bank customer many year check pre pay ca...,6,5,0,-1,0.875,-0.9435,0.270928,2.089428
1,1,6871,20225,notice unusual unauthorized transaction accoun...,9,5,0,12,1.000,-0.8011,0.270928,2.072028
2,2,3760,10863,try contact dispute dept morgan chase bank pdt...,17,5,0,-1,0.850,-0.9501,0.270928,2.071028
3,3,6883,20250,chase card close charge card fraud likely use ...,9,5,0,0,1.000,-0.7910,0.270928,2.061928
4,4,17899,69422,noticed bank take money one account pay bill p...,22,5,0,1,0.900,-0.8885,0.270928,2.059428
...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,14645,53822,around end receive chase amazon reward credit ...,29,8,0,3,0.000,0.9081,0.012291,-0.895809
21068,21068,9305,28248,inquiry credit report customer service could h...,33,12,0,4,0.000,0.9814,0.077496,-0.903904
21069,21069,14723,53903,dear chase bank submit claim credit card scamm...,22,12,0,51,0.000,0.9847,0.077496,-0.907204
21070,21070,18694,70579,apply amazon com reward visa card promise amaz...,26,8,0,3,0.000,0.9246,0.012291,-0.912309


In [18]:
# abc ranking (20-30-50)
calculate_priority_df['priority_rank_score'] = (calculate_priority_df['sort_index']+1)/df.shape[0]
calculate_priority_df['priority'] = calculate_priority_df['priority_rank_score'].apply(abc_ranking)

In [19]:
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score,priority_rank_score,priority
0,0,11084,31616,chase bank customer many year check pre pay ca...,6,5,0,-1,0.875,-0.9435,0.270928,2.089428,0.000047,1
1,1,6871,20225,notice unusual unauthorized transaction accoun...,9,5,0,12,1.000,-0.8011,0.270928,2.072028,0.000095,1
2,2,3760,10863,try contact dispute dept morgan chase bank pdt...,17,5,0,-1,0.850,-0.9501,0.270928,2.071028,0.000142,1
3,3,6883,20250,chase card close charge card fraud likely use ...,9,5,0,0,1.000,-0.7910,0.270928,2.061928,0.000190,1
4,4,17899,69422,noticed bank take money one account pay bill p...,22,5,0,1,0.900,-0.8885,0.270928,2.059428,0.000237,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,14645,53822,around end receive chase amazon reward credit ...,29,8,0,3,0.000,0.9081,0.012291,-0.895809,0.999810,3
21068,21068,9305,28248,inquiry credit report customer service could h...,33,12,0,4,0.000,0.9814,0.077496,-0.903904,0.999858,3
21069,21069,14723,53903,dear chase bank submit claim credit card scamm...,22,12,0,51,0.000,0.9847,0.077496,-0.907204,0.999905,3
21070,21070,18694,70579,apply amazon com reward visa card promise amaz...,26,8,0,3,0.000,0.9246,0.012291,-0.912309,0.999953,3


In [20]:
calculate_priority_df.value_counts('priority')

priority
3    10536
2     6322
1     4214
Name: count, dtype: int64

In [21]:
calculate_priority_df.value_counts('priority')/df.shape[0]*100

priority
3    50.000000
2    30.001898
1    19.998102
Name: count, dtype: float64

In [22]:
calculate_priority_df.set_index('current_index',inplace=True)
calculate_priority_df.drop(['sort_index','subjectivity','compound_polarity','topic_frequency','priority_score','priority_rank_score'],axis=1,inplace=True)

In [26]:
calculate_priority_df.index.name = None
calculate_priority_df.sort_index(axis=0)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,priority
0,1,good morning name appreciate could help put st...,22,11,0,0,3
1,2,upgraded card tell agent upgrade anniversary d...,26,10,0,44,1
2,10,chase card report however fraudulent applicati...,33,12,0,0,3
3,11,try book ticket come across offer apply toward...,26,7,0,0,3
4,14,grand son give check deposit chase account fun...,25,4,0,56,3
...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,26,12,0,0,3
21068,78309,wednesday call chas visa credit card provider ...,2,9,0,26,1
21069,78310,familiar pay understand great risk provide con...,34,5,0,12,2
21070,78311,flawless credit chase credit card chase freedo...,7,10,0,1,3


In [27]:
calculate_priority_df.to_csv('process_csv_stage_3.csv')