**Generating Ticket Priority**
- Factors:
    - **Subjectivity**: sentiment.subjectivity of the text in TextBlob API
    - **Polarity**: compound polarity score by SentimentIntensityAnalyzer from NLTK based on VADER lexicon 
    - **Topic Frequency**: quotient of number of documents under the topic by total number of documents
- Priority Score: 
    - Priority Score = Subjectivity - Polarity + Topic Frequency
- Priority Level:
    - Order by Priority Score
    - Perform ABC Ranking: Top 20% have a priority of 1, Next 30% have a priority of 2, Last 50% have a priority of 3

In [1]:
import pandas as pd

# import nltk
# nltk.download('vader_lexicon')

from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# load dataset

In [2]:
def get_processed_df(csv_path):
    df = pd.read_csv(csv_path)
    return df

In [3]:
df = get_processed_df('process_csv_stage_2.csv')
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp
0,1,good morning name appreciate could help put st...,2,2,0,34
1,2,upgraded card tell agent upgrade anniversary d...,5,6,0,2
2,10,chase card report however fraudulent applicati...,2,4,0,16
3,11,try book ticket come across offer apply toward...,5,4,0,24
4,14,grand son give check deposit chase account fun...,6,1,0,0
...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,5,6,0,2
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39
21069,78310,familiar pay understand great risk provide con...,3,2,0,9
21070,78311,flawless credit chase credit card chase freedo...,4,0,0,6


# calculate priority score

In [4]:
# subjectivity
def get_subjectivity(text):
   return TextBlob(text).sentiment.subjectivity

In [5]:
# polarity (adapted from https://www.kaggle.com/code/fengliplatform/customer-sentiment-analysis)
sentiment_analyzer = SentimentIntensityAnalyzer()

def get_compound_polarity(text: str) -> float:
    return sentiment_analyzer.polarity_scores(text)['compound']

In [6]:
df['subjectivity'] = df['preprocessed_text'].apply(get_subjectivity)
df['compound_polarity'] = df['preprocessed_text'].apply(get_compound_polarity)

In [7]:
# topic frequency of topics generated by LDA 
# topic frequency = topic_count / total_count
chosen_topic_model = 'topic_lda'
topic_frequencies = pd.DataFrame(df.value_counts(chosen_topic_model)/df.shape[0]).reset_index().rename(columns={'count':'topic_frequency'})
df = pd.merge(left = df, right=topic_frequencies,on=chosen_topic_model,how='inner').sort_values(by=['old_index'],ascending=True).reset_index(drop=True)

In [8]:
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
0,1,good morning name appreciate could help put st...,2,2,0,34,0.445238,0.8402,0.089550
1,2,upgraded card tell agent upgrade anniversary d...,5,6,0,2,0.900000,-0.5812,0.092113
2,10,chase card report however fraudulent applicati...,2,4,0,16,0.000000,-0.1446,0.089550
3,11,try book ticket come across offer apply toward...,5,4,0,24,0.471429,0.9058,0.092113
4,14,grand son give check deposit chase account fun...,6,1,0,0,0.753333,0.9215,0.163392
...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,5,6,0,2,0.291288,0.6369,0.092113
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39,0.324318,-0.9377,0.204822
21069,78310,familiar pay understand great risk provide con...,3,2,0,9,0.468801,0.0641,0.035592
21070,78311,flawless credit chase credit card chase freedo...,4,0,0,6,0.475247,0.9716,0.069618


In [9]:
df.sort_values(by=['subjectivity'],ascending=False)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
14154,47890,give authorization help bank dispute behalf un...,1,2,0,18,1.0,-0.6369,0.086655
7466,21889,chase bank mortgage account bing hold chase re...,7,2,0,38,1.0,-0.8689,0.067720
1105,3563,never own account,0,3,5,82,1.0,0.0000,0.190537
20614,75849,deal seller transfer fund though soon send mon...,8,2,0,9,1.0,-0.7367,0.204822
4436,12294,charge airport lounge service wonder included ...,5,0,0,60,1.0,0.8074,0.092113
...,...,...,...,...,...,...,...,...,...
4929,14986,allow vender charge account item order cause l...,1,4,0,1,0.0,0.2769,0.086655
20266,75037,buy fully unlock silver sell package deliver p...,5,5,0,20,0.0,-0.4215,0.092113
15516,54741,want add motion information file federal bankr...,7,5,0,35,0.0,0.4767,0.067720
11667,32897,account result fraud,1,3,5,21,0.0,-0.5859,0.086655


In [10]:
df.sort_values(by=['compound_polarity'],ascending=True)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
17655,64994,fraud ing scam use document fraud ing indiana ...,3,5,0,45,0.483770,-1.0000,0.035592
5295,15832,fraud ing scam use indiana document fraud ing ...,3,5,0,45,0.403480,-0.9999,0.035592
5289,15814,den ged credit card throw card company card co...,3,5,0,66,0.025000,-0.9996,0.035592
5580,17342,last vacation experience bank fraud begin shor...,1,6,0,48,0.342283,-0.9993,0.086655
9467,28689,open credit card chase approve amazon credit c...,1,0,0,7,0.448457,-0.9993,0.086655
...,...,...,...,...,...,...,...,...,...
4741,14476,forward message date wed subject fwd follow po...,8,0,0,46,0.504554,0.9999,0.204822
14251,48112,urgent president commissary majority owner pro...,2,4,0,64,0.360492,0.9999,0.089550
5880,18007,reason apply receive mile sign bonus chase den...,5,6,0,2,0.436166,0.9999,0.092113
4662,14281,apply approve brand card chase accord term con...,5,6,0,2,0.448617,0.9999,0.092113


In [11]:
df.sort_values(by=['topic_frequency'],ascending=False).groupby('topic_frequency').head(2)

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency
10536,30202,charge buying game unable download game repeat...,8,0,0,36,0.4,-0.2732,0.204822
12867,40579,charge account make purchase chase branch tell...,8,5,0,5,0.2,-0.9231,0.204822
8945,26050,try remove authorize user chase account chase ...,0,3,0,81,0.4,-0.7269,0.190537
19833,72447,wife mortgage home chase property address chas...,0,0,0,11,0.418531,0.9612,0.190537
2037,5696,chase bank delete phone number account could c...,6,3,0,0,0.2,-0.4939,0.163392
1988,5584,owner operator chase bank problem pretty much ...,6,2,0,0,0.5625,0.7506,0.163392
20107,73714,purchase side side unit include half windshiel...,5,0,0,60,0.388889,0.802,0.092113
4016,11399,respectfully ask cfp intervene prevent chase c...,5,6,0,5,0.323214,0.6164,0.092113
21061,78293,hire local attorney help restructure personal ...,2,5,0,15,0.274781,0.4215,0.08955
17702,65084,violate usc continue collection activity cease...,2,4,6,88,0.444444,-0.6249,0.08955


In [12]:
df['priority_score'] = df['subjectivity'] - df['compound_polarity'] + df['topic_frequency']
df

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,1,good morning name appreciate could help put st...,2,2,0,34,0.445238,0.8402,0.089550,-0.305412
1,2,upgraded card tell agent upgrade anniversary d...,5,6,0,2,0.900000,-0.5812,0.092113,1.573313
2,10,chase card report however fraudulent applicati...,2,4,0,16,0.000000,-0.1446,0.089550,0.234150
3,11,try book ticket come across offer apply toward...,5,4,0,24,0.471429,0.9058,0.092113,-0.342259
4,14,grand son give check deposit chase account fun...,6,1,0,0,0.753333,0.9215,0.163392,-0.004774
...,...,...,...,...,...,...,...,...,...,...
21067,78303,chase card customer well decade offer multiple...,5,6,0,2,0.291288,0.6369,0.092113,-0.253499
21068,78309,wednesday call chas visa credit card provider ...,8,5,0,39,0.324318,-0.9377,0.204822,1.466840
21069,78310,familiar pay understand great risk provide con...,3,2,0,9,0.468801,0.0641,0.035592,0.440293
21070,78311,flawless credit chase credit card chase freedo...,4,0,0,6,0.475247,0.9716,0.069618,-0.426735


In [13]:
sort_by_priority_score = df.sort_values(by=['priority_score'],ascending=False)
sort_by_priority_score

Unnamed: 0,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
3810,10981,make shipment chase bank say would arrive busi...,8,1,0,0,1.000,-0.8625,0.204822,2.067322
11084,31616,chase bank customer many year check pre pay ca...,0,2,0,-1,0.875,-0.9435,0.190537,2.009037
3760,10863,try contact dispute dept morgan chase bank pdt...,8,5,0,-1,0.850,-0.9501,0.204822,2.004922
650,1601,account cancel identity problem closing accoun...,8,3,0,66,1.000,-0.7845,0.204822,1.989322
2339,6340,issue chase southwest credit card four fraudul...,1,4,0,-1,1.000,-0.9022,0.086655,1.988855
...,...,...,...,...,...,...,...,...,...,...
4145,11663,receive copy credit report notice information ...,2,4,1,79,0.000,0.9313,0.089550,-0.841750
11623,32807,approve chase ink prefer credit promotion poin...,5,4,0,2,0.000,0.9341,0.092113,-0.841987
20684,76015,response letter attach state initiate inquiry ...,2,4,0,3,0.000,0.9501,0.089550,-0.860550
16597,61254,apply approve southwest credit card use card c...,5,6,0,2,0.000,0.9657,0.092113,-0.873587


# calculate priority level

In [14]:
def abc_ranking(priority_rank_score):
    # priority_rank_score value is ranged from 0 to 1
    if priority_rank_score <=0.2:
        return '1' # high priority
    elif priority_rank_score > 0.2 and priority_rank_score <=0.5:
        return '2' # medium priority
    else: # priority_rank_score > 0.5 and priority_rank_score <=1
        return '3' # low priority

In [15]:
calculate_priority_df = sort_by_priority_score.reset_index(names=['current_index']).reset_index(names=['sort_index'])
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score
0,0,3810,10981,make shipment chase bank say would arrive busi...,8,1,0,0,1.000,-0.8625,0.204822,2.067322
1,1,11084,31616,chase bank customer many year check pre pay ca...,0,2,0,-1,0.875,-0.9435,0.190537,2.009037
2,2,3760,10863,try contact dispute dept morgan chase bank pdt...,8,5,0,-1,0.850,-0.9501,0.204822,2.004922
3,3,650,1601,account cancel identity problem closing accoun...,8,3,0,66,1.000,-0.7845,0.204822,1.989322
4,4,2339,6340,issue chase southwest credit card four fraudul...,1,4,0,-1,1.000,-0.9022,0.086655,1.988855
...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,4145,11663,receive copy credit report notice information ...,2,4,1,79,0.000,0.9313,0.089550,-0.841750
21068,21068,11623,32807,approve chase ink prefer credit promotion poin...,5,4,0,2,0.000,0.9341,0.092113,-0.841987
21069,21069,20684,76015,response letter attach state initiate inquiry ...,2,4,0,3,0.000,0.9501,0.089550,-0.860550
21070,21070,16597,61254,apply approve southwest credit card use card c...,5,6,0,2,0.000,0.9657,0.092113,-0.873587


In [16]:
# abc ranking (20-30-50)
calculate_priority_df['priority_rank_score'] = (calculate_priority_df['sort_index']+1)/df.shape[0]
calculate_priority_df['priority'] = calculate_priority_df['priority_rank_score'].apply(abc_ranking)

In [17]:
calculate_priority_df

Unnamed: 0,sort_index,current_index,old_index,preprocessed_text,topic_lda,topic_nmf,topic_t2v,topic_btp,subjectivity,compound_polarity,topic_frequency,priority_score,priority_rank_score,priority
0,0,3810,10981,make shipment chase bank say would arrive busi...,8,1,0,0,1.000,-0.8625,0.204822,2.067322,0.000047,1
1,1,11084,31616,chase bank customer many year check pre pay ca...,0,2,0,-1,0.875,-0.9435,0.190537,2.009037,0.000095,1
2,2,3760,10863,try contact dispute dept morgan chase bank pdt...,8,5,0,-1,0.850,-0.9501,0.204822,2.004922,0.000142,1
3,3,650,1601,account cancel identity problem closing accoun...,8,3,0,66,1.000,-0.7845,0.204822,1.989322,0.000190,1
4,4,2339,6340,issue chase southwest credit card four fraudul...,1,4,0,-1,1.000,-0.9022,0.086655,1.988855,0.000237,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21067,21067,4145,11663,receive copy credit report notice information ...,2,4,1,79,0.000,0.9313,0.089550,-0.841750,0.999810,3
21068,21068,11623,32807,approve chase ink prefer credit promotion poin...,5,4,0,2,0.000,0.9341,0.092113,-0.841987,0.999858,3
21069,21069,20684,76015,response letter attach state initiate inquiry ...,2,4,0,3,0.000,0.9501,0.089550,-0.860550,0.999905,3
21070,21070,16597,61254,apply approve southwest credit card use card c...,5,6,0,2,0.000,0.9657,0.092113,-0.873587,0.999953,3


In [18]:
calculate_priority_df.value_counts('priority')

priority
3    10536
2     6322
1     4214
Name: count, dtype: int64

In [19]:
calculate_priority_df.value_counts('priority')/df.shape[0]*100

priority
3    50.000000
2    30.001898
1    19.998102
Name: count, dtype: float64

# export dataframe as the final dataset

In [20]:
calculate_priority_df.set_index('current_index',inplace=True)
calculate_priority_df.drop(['sort_index','subjectivity','compound_polarity','topic_frequency','priority_score','priority_rank_score'],axis=1,inplace=True)
abandoned_topic_models = ['topic_nmf','topic_t2v','topic_btp']
calculate_priority_df.drop(abandoned_topic_models,axis=1,inplace=True)

In [21]:
calculate_priority_df.index.name = None
calculate_priority_df.sort_index(axis=0,inplace=True)

In [22]:
calculate_priority_df.to_csv('process_csv_stage_3.csv',index=False)