### Politeness features and Markers in Convokit

In [2]:
# Note that I ran this using Python 3.9
import convokit
import spacy
import pandas as pd
import re

In [3]:
from convokit import Corpus, Speaker, Utterance
from convokit import download
from convokit import TextParser

In [4]:
from convokit import PolitenessStrategies

ps = PolitenessStrategies()
full_jury_data = pd.read_csv('../../data/raw_data/jury_conversations_with_outcome_var.csv')
spacy_nlp = spacy.load("en_core_web_sm", disable=["ner"])

In [5]:
full_jury_data.head()

Unnamed: 0,batch_num,round_num,speaker_hash,speaker_nickname,timestamp,message,majority_pct,num_flipped,flipped_pct,num_votes
0,0,0,5e7e1e0031f4e454e196c30b,niceRhino,2020-04-20T18:27:20.125Z,Hello!,1.0,1,0.333333,3
1,0,0,5e31d6e4e31c5304c46f1413,culturedCow,2020-04-20T18:27:23.764Z,Hi!,1.0,1,0.333333,3
2,0,0,5e7e4f4c31f4e454e196c9c4,spryBison,2020-04-20T18:27:27.724Z,Hello,1.0,1,0.333333,3
3,0,0,5d482ea421c9be351f762255,youngLion,2020-04-20T18:27:30.410Z,Hi,1.0,1,0.333333,3
4,0,0,5e84cc3c50f6e364321d6265,smallGiraffe,2020-04-20T18:27:35.506Z,hi,1.0,1,0.333333,3


In [6]:
def get_politeness_stragies(text):
    if pd.isnull(text):
        text = ""
    utt = ps.transform_utterance(
        text, spacy_nlp=spacy_nlp
    )
    return(utt.meta["politeness_strategies"])
# Apply transform function and convert to dataframe
transformed_df = full_jury_data['message'].apply(get_politeness_stragies).apply(pd.Series)

transformed_df = transformed_df.rename(columns=lambda x: re.sub('^feature_politeness_==()','',x)[:-2])
transformed_df

Unnamed: 0,Please,Please_start,HASHEDGE,Indirect_(btw),Hedges,Factuality,Deference,Gratitude,Apologizing,1st_person_pl.,...,1st_person_start,2nd_person,2nd_person_start,Indirect_(greeting),Direct_question,Direct_start,HASPOSITIVE,HASNEGATIVE,SUBJUNCTIVE,INDICATIVE
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14883,0,0,1,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
14884,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
14885,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14886,0,0,1,0,1,0,0,0,0,0,...,1,0,0,0,0,0,1,1,0,0


In [7]:
# Concatenate the transformed dataframe with the original dataframe
df_featurized = pd.concat([full_jury_data, transformed_df], axis=1)
df_featurized

Unnamed: 0,batch_num,round_num,speaker_hash,speaker_nickname,timestamp,message,majority_pct,num_flipped,flipped_pct,num_votes,...,1st_person_start,2nd_person,2nd_person_start,Indirect_(greeting),Direct_question,Direct_start,HASPOSITIVE,HASNEGATIVE,SUBJUNCTIVE,INDICATIVE
0,0,0,5e7e1e0031f4e454e196c30b,niceRhino,2020-04-20T18:27:20.125Z,Hello!,1.000000,1,0.333333,3,...,0,0,0,1,0,0,0,0,0,0
1,0,0,5e31d6e4e31c5304c46f1413,culturedCow,2020-04-20T18:27:23.764Z,Hi!,1.000000,1,0.333333,3,...,0,0,0,1,0,0,0,0,0,0
2,0,0,5e7e4f4c31f4e454e196c9c4,spryBison,2020-04-20T18:27:27.724Z,Hello,1.000000,1,0.333333,3,...,0,0,0,1,0,0,0,0,0,0
3,0,0,5d482ea421c9be351f762255,youngLion,2020-04-20T18:27:30.410Z,Hi,1.000000,1,0.333333,3,...,0,0,0,1,0,0,0,0,0,0
4,0,0,5e84cc3c50f6e364321d6265,smallGiraffe,2020-04-20T18:27:35.506Z,hi,1.000000,1,0.333333,3,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14883,177,3,5f1f466846a7f03dda9b37c3,eagerElephant,2020-07-28T22:36:22.226Z,I think he is the asshole,0.666667,1,0.333333,3,...,1,0,0,0,0,0,0,0,0,0
14884,177,3,5f1a214246a7f03dda9a6aad,inspiredGiraffe,2020-07-28T22:36:46.155Z,He didn't vent to them or take up frustrations...,0.666667,1,0.333333,3,...,0,0,0,0,0,0,0,1,0,0
14885,177,3,5f2096dab0aafc741f6865bb,inquisitiveBison,2020-07-28T22:36:49.238Z,very interested,0.666667,1,0.333333,3,...,0,0,0,0,0,0,0,0,0,0
14886,177,3,5f1f472646a7f03dda9b3895,excitedDolphin,2020-07-28T22:36:51.487Z,"I'm fine with venting on Reddit, I just don't ...",0.666667,1,0.333333,3,...,1,0,0,0,0,0,1,1,0,0
