In [3]:
import pandas as pd
import numpy as np

# Switchboard

In [24]:
swda = pd.read_csv('/nas/home/jwei/biases/dialog-bias/Switchboard-Corpus/swda_data/full_set.txt', \
                   delimiter='|', header=None)
swda.columns = ['speaker', 'utterance', 'act']

swda.utterance = swda.utterance.astype(str)
swda['is_qy'] = swda['act'] == 'qy'

swda

Unnamed: 0,speaker,utterance,act,is_qy
0,A,Okay.,"fo_o_fw_""_by_bc",False
1,A,"So, What kind of experience do you, do you have, then with child care?",qw,False
2,B,"I guess, I think, uh, I wonder if that worked.",qy^d,False
3,A,Does it say something?,qy,True
4,B,I think it usually does.,sd,False
...,...,...,...,...
199757,B,"and I know they've, there's a lot of refineries down there",sd,False
199758,B,"and that, that's some pretty potent stuff they put up in the air.",sv,False
199759,B,"I, but I don't know how, uh, you know,",sd,False
199760,B,"there's a difference in what you can smell and what you, uh, you know, what's bad.",sv,False


In [56]:
swda[swda['is_qy']].sample(30)

Unnamed: 0,speaker,utterance,act,is_qy,does_rule,keyword_startswith
32382,B,is it along the coast or or inland.,qy,True,False,False
101597,A,"well, uh, have there been significant changes, uh, do you think in the employment place, especially, uh, say at T I.",qy,True,False,False
74708,B,Do you want to start?,qy,True,False,True
171604,A,"Uh, did they make the movies after that",qy,True,False,False
161404,B,"Doesn't it just, uh, do they have to go back and do a whole new trial",qy,True,False,False
36230,B,"You didn't have to mix paint then, right.",qy,True,False,False
185678,A,Are you there?,qy,True,False,True
120286,B,"Doesn't this, doesn't this encourage, Uh, More .",qy,True,False,False
165218,A,"Well, do you think the U S should even be involved? I mean, as the world leader in the sense trying to resolve these issues",qy,True,False,False
31146,A,"Do you get, do you work at home all the time",qy,True,False,False


### Rules

In [65]:
def rule_1(sentence):
    sentence = sentence.lower().strip()
    return 'does' in sentence and sentence.endswith('?')

def rule_2(sentence):
    keywords = ['do', 'does', 'did', 'are', 'is', 'was', 'should']
    startswith = any(sentence.lower().startswith(i) for i in keywords)
    return startswith and sentence.endswith('?')

def rule_3(sentence):
    return sentence.endswith('?')

In [66]:
rules = [ ('does_rule', rule_1), ('keyword_startswith', rule_2), ('endswith_?', rule_3) ]

### Labeling

In [67]:
for name, rule in rules:
    swda[name] = swda['utterance'].apply(rule)

In [68]:
swda

Unnamed: 0,speaker,utterance,act,is_qy,does_rule,keyword_startswith,endswith_?
0,A,Okay.,"fo_o_fw_""_by_bc",False,False,False,False
1,A,"So, What kind of experience do you, do you have, then with child care?",qw,False,False,False,True
2,B,"I guess, I think, uh, I wonder if that worked.",qy^d,False,False,False,False
3,A,Does it say something?,qy,True,True,True,True
4,B,I think it usually does.,sd,False,False,False,False
...,...,...,...,...,...,...,...
199757,B,"and I know they've, there's a lot of refineries down there",sd,False,False,False,False
199758,B,"and that, that's some pretty potent stuff they put up in the air.",sv,False,False,False,False
199759,B,"I, but I don't know how, uh, you know,",sd,False,False,False,False
199760,B,"there's a difference in what you can smell and what you, uh, you know, what's bad.",sv,False,False,False,False


### Evaluation

In [69]:
for name, rule in rules:
    predicted_true = swda[swda[name]]
    precision = predicted_true['is_qy'].mean()
    
    true_examples = swda[swda['is_qy']]
    recall = true_examples[name].mean()
    
    print(name)
    print(len(predicted_true))
    print(precision)
    print(recall)
    print()

does_rule
213
0.5727699530516432
0.02582010582010582

keyword_startswith
1715
0.8303206997084548
0.30137566137566135

endswith_?
7956
0.4356460532931121
0.7335449735449735



### Manual analysis

In [72]:
swda[swda['keyword_startswith']].sample(20)

Unnamed: 0,speaker,utterance,act,is_qy,does_rule,keyword_startswith,endswith_?
91164,A,Do you have a boat?,qy,True,False,True,True
70948,B,Is that right?,qy,True,False,True,True
123016,A,Do you know anybody that's ever done this?,qy,True,False,True,True
14245,B,Do you like the job sharing?,qy,True,False,True,True
58755,B,Do they require you to presort it?,qy,True,False,True,True
69589,A,do you have a P C?,qy,True,False,True,True
153953,A,Do you?,bh,False,False,True,True
33391,A,Did you?,bh,False,False,True,True
186683,B,"Are you a Vietnam veteran, Dudley?",qy,True,False,True,True
53377,A,"Do you have a lot of property, I mean a lot of yard around your house now, in Denton?",qy,True,False,True,True
