In [1]:
import pandas as pd
import numpy as np
import nltk
import spacy
nlp = spacy.load('en_core_web_lg')
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
%matplotlib inline

In [2]:
df = pd.read_csv('Customer_Service_Dataset.csv')
df_text = df[['query']].copy()
df_text.head()

Unnamed: 0,query
0,I have problems with canceling an order
1,how can I find information about canceling ord...
2,I need help with canceling the last order
3,could you help me cancelling the last order I ...
4,problem with cancelling an order I made


In [3]:
#verbs = []
#nouns = []
def get_pos_tags(x):
    doc = nlp(x)
    for token in doc:
        verbs = ','.join([token.text for token in doc if token.pos_ == "VERB"])
        nouns = ','.join([token.text for token in doc if token.pos_ == "NOUN"])
    return verbs,nouns

In [4]:
df_text['verbs'] = df_text['query'].apply(lambda x: get_pos_tags(x)[0])
df_text['nouns'] = df_text['query'].apply(lambda x: get_pos_tags(x)[1])

In [5]:
df_text['verb_noun'] = df_text['verbs'].str.cat(df_text['nouns'],sep = ',')
#l_cancel = ['canceling','cancelling','cancel','cancellations','cancelations','cancellation','cancelation']
#df_text['cancel_noun_verb'] = df_text['verb_noun'].apply(lambda x: any([(i in l_cancel) for i in x.split(',')]))
df_text.head()

Unnamed: 0,query,verbs,nouns,verb_noun
0,I have problems with canceling an order,"have,canceling","problems,order","have,canceling,problems,order"
1,how can I find information about canceling ord...,"find,canceling","information,orders","find,canceling,information,orders"
2,I need help with canceling the last order,"need,canceling","help,order","need,canceling,help,order"
3,could you help me cancelling the last order I ...,"help,cancelling,made",order,"help,cancelling,made,order"
4,problem with cancelling an order I made,"cancelling,made","problem,order","cancelling,made,problem,order"


In [47]:
mapping_file = pd.read_csv('verb_noun_counts_annotated.csv')
mapping_file.drop('Unnamed: 0',inplace = True,axis=1)
mapping_file.dropna(inplace = True)
mapping_file.shape

(5982, 4)

In [50]:
df_text = df_text.merge(mapping_file[['verb_noun','Subject','Intent']],on = 'verb_noun')
df_text.head()

Unnamed: 0,query,verbs,nouns,verb_noun,Subject,Intent
0,could you help me cancelling the last order I ...,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
1,could you help me cancelling an order I made?,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
2,help me cancelling an order I made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
3,help me cancelling the last order I have made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
4,help with cancelling the order I have made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation


In [54]:
clause1 = ((df_text['Subject'].isin(['account','order','purchase'])) & (df_text['Intent'].isin(['set_up','restoration','enquiry','cancellation','modification'])))
df_text_new = df_text.loc[clause1].copy().reset_index(drop = True)
df_text_new.head()

Unnamed: 0,query,verbs,nouns,verb_noun,Subject,Intent
0,could you help me cancelling the last order I ...,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
1,could you help me cancelling an order I made?,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
2,help me cancelling an order I made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
3,help me cancelling the last order I have made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation
4,help with cancelling the order I have made,"help,cancelling,made",order,"help,cancelling,made,order",order,cancellation


In [67]:
si_obj = SentimentIntensityAnalyzer()
df_text_new['sentiment_score'] = df_text_new['query'].apply(lambda x: si_obj.polarity_scores(x)["compound"])
df_text_new['Sentiment'] = df_text_new["sentiment_score"].apply(lambda x: "Positive" if x > 0 else ("Negative" if x<0 else "Neutral"))
df_text_new.drop('sentiment_score',axis=1,inplace = True)

In [68]:
df_text_new.to_csv('df_text_new.csv',index = False)