In [161]:
import pandas as pd
import numpy as np
import glob
import os
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [162]:
##### load Odin extractions #####

combined_ie = pd.read_csv("combined_data_ie_label.csv")
ie_extraction = combined_ie[['message_id', 'ie_extraction']]

# keep nan values for ie_extraction to have the original sequence of utterances
# annotations may have CLC labels on utt that does not have ie extraction
ie_extraction

Unnamed: 0,message_id,ie_extraction
0,70cfac19-1171-44a8-b1c1-6d3069941118,
1,10a34235-9c1c-4a46-a2b6-1b553f58271d,
2,cec24808-49b9-43d1-a703-ac6fd5c7c202,
3,aa13fe0f-b4a8-4b22-a0eb-4ba9abebacbf,
4,c1f6b2ce-4476-4e6c-b913-a26fdd89ada1,
...,...,...
6722,d2b9850c-79a0-41ca-a91c-fc765ce7cb6b,KnowledgeSharing
6723,912ee2c8-0dbc-45cf-8e06-4f1c512dc4ca,"DeliberatePlan, TimeUnit"
6724,25d58291-6c08-46aa-9d1c-b39175370745,
6725,4602f1ba-7c39-4587-af49-a616eff03e1e,


In [163]:
len(ie_extraction)

6727

In [164]:
##### load annotation #####

def readFile(dir):
    dataAll_df = pd.read_csv(dir,index_col=None, header=0).\
        fillna("NA")\
        [["message_id", "Label", "Score", "Category", "Note"]]
    # exclude pre-game and post-game lines
    # exclude no annnotation lines
    data = dataAll_df[dataAll_df["Note"] == "NA"]\
                [dataAll_df["Label"] != "NA"]\
                [["message_id", "Label", "Score", "Category"]]
    return data

In [165]:
dir_csv = glob.glob(os.path.join("/Users/yuwei/Desktop/RA/CLC-new-ann-model/new_ann", 
                                "*.csv"))
dir_csv.sort()

dfs = (readFile(dir) for dir in dir_csv)
ann_df = pd.concat(dfs, ignore_index=True)
ann_df

  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,message_id,Label,Score,Category
0,34f36874-be3a-4187-bd66-1fa69d4e0c28,1a,,assert
1,50de2843-270f-47d8-9e23-b41fb82ac3e5,1b,1,acknowledge
2,342f31d6-388d-4e33-a257-4d4314b20cdb,2b,1,accept
3,b1952888-cc6f-4f4b-9843-fde04c1708a0,2a,,action-directive
4,8047c7fa-89d2-447b-bcb8-d1c2e6c98c5b,3a,,action-directive
...,...,...,...,...
5337,9fda7355-30c7-4363-9666-4f0347070a60,66c.67a,,acknowledge.assert
5338,0f240579-dcb2-49aa-8758-138e8a8a392d,67b.68a,3,info-provide.assert
5339,8521d8bc-fc19-4ff3-bc77-0e1133f993fe,68a+,,assert
5340,37dfcb89-3e16-41a3-ac94-b90e870b21fd,68a++,,assert


In [166]:
##### map ie extractions to annotation df #####
ie_dict = dict(zip(ie_extraction.message_id, ie_extraction.ie_extraction))
ann_df["ie_extraction"] = ie_extraction["message_id"].map(ie_dict)
ann_df

Unnamed: 0,message_id,Label,Score,Category,ie_extraction
0,34f36874-be3a-4187-bd66-1fa69d4e0c28,1a,,assert,
1,50de2843-270f-47d8-9e23-b41fb82ac3e5,1b,1,acknowledge,
2,342f31d6-388d-4e33-a257-4d4314b20cdb,2b,1,accept,
3,b1952888-cc6f-4f4b-9843-fde04c1708a0,2a,,action-directive,
4,8047c7fa-89d2-447b-bcb8-d1c2e6c98c5b,3a,,action-directive,
...,...,...,...,...,...
5337,9fda7355-30c7-4363-9666-4f0347070a60,66c.67a,,acknowledge.assert,
5338,0f240579-dcb2-49aa-8758-138e8a8a392d,67b.68a,3,info-provide.assert,
5339,8521d8bc-fc19-4ff3-bc77-0e1133f993fe,68a+,,assert,
5340,37dfcb89-3e16-41a3-ac94-b90e870b21fd,68a++,,assert,ReportLocation


In [19]:
# check manually if all trials has ie labels attached on them
# yes.
ann_df.to_csv("ann_extend_ie.csv", index=False)

In [167]:
###### Find Call-out label set #####

# filtering the rows where Label contains 'a'
callout_df = ann_df[ann_df['Label'].str.contains('a')]

# filtering the callout_df where Category is 'assert'
callout_assert = callout_df[callout_df['Category'].str.contains('assert')]
# filtering the callout_df where Category is 'commit'
callout_commit = callout_df[callout_df['Category'].str.contains('commit')]
# filtering the callout_df where Category is 'action-directive'
callout_action = callout_df[callout_df['Category'].str.contains('action-directive')]
# filtering the callout_df where Category is 'info-request'
callout_info = callout_df[callout_df['Category'].str.contains('info-request')]

In [168]:
###### Find Check-back label set #####

# filtering the rows where Label contains 'b'
checkback_df = ann_df[ann_df['Label'].str.contains('b')]

# filtering the checkback_df where Category is 'acknowledge'
checkback_acknowledge = checkback_df[checkback_df['Category'].str.contains('acknowledge')]
# filtering the checkback_df where Category is 'accept'
checkback_accept = checkback_df[checkback_df['Category'].str.contains('accept')]
# filtering the checkback_df where Category is 'follow-up-question'
checkback_question = checkback_df[checkback_df['Category'].str.contains('follow-up-question')]
# filtering the checkback_df where Category is 'info-request'
checkback_info = checkback_df[checkback_df['Category'].str.contains('info-provide')]

In [169]:
###### Find Closing-of-the-loop label set #####

# filtering the rows where Label contains 'c'
closing_df = ann_df[ann_df['Label'].str.contains('c')]

# filtering the checkback_df where Category is 'acknowledge'
closing_acknowledge = closing_df[closing_df['Category'].str.contains('acknowledge')]
# filtering the checkback_df where Category is 'accept'
closing_accept = closing_df[closing_df['Category'].str.contains('accept')]
# filtering the checkback_df where Category is 'info-request'
closing_info = closing_df[closing_df['Category'].str.contains('info-provide')]

In [171]:
##### get label frequency for each phase and category #####

def find_label_freq(label_cat_df):
    '''takes in df of each label/label_category, e.g.callout_df/callout_assert
    and returns a list of the freq for each Odin label for that clc lable/label_category.
    e.g. [(647, 'Agreement'), (197, 'KnowledgeSharing')]'''
    label_cat_list = label_cat_df['ie_extraction'].dropna().to_list()
    label_cat_freq = {}
    for labels in label_cat_list:
        labels = labels.split(', ')
        for label in labels:
            if label not in label_cat_freq:
                label_cat_freq[label] = 1
            else:
                label_cat_freq[label] += 1
    label_cat_freq_list = sorted([(value, key) for (key, value) in label_cat_freq.items()],
                           reverse=True)
    return label_cat_freq_list

In [232]:
def all_part_bool(df_all, part_id_list):
    '''for each id in df_all, if the id in part, give a 1, else 0
    This function is used to prepare the tokenized golden and detection for acc'''
    res = []
    for id in df_all["message_id"]:
        if id in part_id_list:
            res.append(1)
        else:
            res.append(0)
    return res

def find_labelset_scores(labelset, label_cat_df):
    '''takes in a label set that selected from the Odin label freq list, and the 
    label/label category df. 
    Golden ann is a list of bool values for the all utts on whether an utt is marked with a certain label/label_cat.
    Detected is a list of bool values for all utts on whether an utt contains the Odin label that in the selected Odin label list for that clc label/label_cat.
    Returns acc, precision, recall, and f1 scores for that clc label cat'''
    ann = all_part_bool(ann_df, label_cat_df["message_id"].to_list())
    label_cat_dict = dict(zip(label_cat_df.message_id, label_cat_df.ie_extraction))
    detected_id = []
    for id, labels in label_cat_dict.items():
        found = False
        if labels == labels: ## filter out nan
            labels = labels.split(', ')
            for label in labels:
                if label in labelset:
                    found = True
        if found:
            detected_id.append(id)
    detected = all_part_bool(ann_df, detected_id)
    print("acc: ", round(accuracy_score(ann, detected), 3))
    print("precision: ", precision_score(ann, detected, zero_division=1))
    print("recall: ", round(recall_score(ann, detected, zero_division=1), 3))
    print("f1: ", round(f1_score(ann, detected, zero_division=1), 3))

    

In [235]:
##### Call-out: assert #####
assert_labels = find_label_freq(callout_assert)
assert_labels = [i[1] for i in assert_labels]

In [263]:
assert_labels[0]

'Agreement'

In [236]:
##### Call-out: commit #####
commit_labels = find_label_freq(callout_commit)
commit_labels = [i[1] for i in commit_labels]

In [237]:
##### Call-out: action-directive #####
action_labels = find_label_freq(callout_action)
action_labels = [i[1] for i in action_labels]

In [238]:
##### Call-out: info-request #####
info_labels = find_label_freq(callout_info)
info_labels = [i[1] for i in info_labels]

In [262]:
ann = all_part_bool(ann_df, callout_assert["message_id"].to_list())
ann

[1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,


In [264]:
label_cat_dict = dict(zip(callout_assert.message_id, callout_assert.ie_extraction))
detected_id = []
for id, labels in label_cat_dict.items():
    found = False
    if labels == labels: ## filter out nan
        labels = labels.split(', ')
        for label in labels:
            if label == 'Agreement':
                found = True
    if found:
        detected_id.append(id)
detected = all_part_bool(ann_df, detected_id)
detected

[0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,


In [267]:
for i in range(len(ann)):
    if ann[i] > detected[i]:
        print("FN")
    elif ann[i] > detected[i]:
        print("FP")

FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
FN
F

In [261]:
##### Call-out: assert accuracy #####
find_labelset_scores(assert_labels[0], callout_assert)

acc:  0.748
precision:  1.0
recall:  0.293
f1:  0.453


In [234]:
##### Call-out: commit accuracy #####
find_labelset_scores(commit_labels, callout_commit)

acc:  0.981
precision:  1.0
recall:  0.615
f1:  0.762


In [239]:
##### Call-out: action-directive accuracy #####
find_labelset_scores(action_labels, callout_action)

acc:  0.946
precision:  1.0
recall:  0.632
f1:  0.774


In [242]:
##### Call-out: info-request accuracy #####
find_labelset_scores(info_labels, callout_info)

acc:  0.968
precision:  1.0
recall:  0.623
f1:  0.768


In [241]:
##### Call-out: whole labelset accuracy #####
callout_labelset = set(assert_labels + commit_labels + action_labels + info_labels)
find_labelset_scores(callout_labelset, callout_df)

acc:  0.767
precision:  1.0
recall:  0.615
f1:  0.761


In [244]:
##### Check-back: acknowledge #####
cb_acknowledge = find_label_freq(checkback_acknowledge)
cb_acknowledge = [i[1] for i in cb_acknowledge]

In [245]:
##### Check-back: accept #####
cb_accept = find_label_freq(checkback_accept)
cb_accept = [i[1] for i in cb_accept]

In [247]:
##### Check-back: follow-up-question #####
cb_question = find_label_freq(checkback_question)
cb_question = [i[1] for i in cb_question]

In [248]:
##### Check-back: info-provide #####
cb_info = find_label_freq(checkback_info)
cb_info = [i[1] for i in cb_info]

In [249]:
##### Check-back: acknowledge accuracy #####
find_labelset_scores(cb_acknowledge, checkback_acknowledge)

acc:  0.955
precision:  1.0
recall:  0.643
f1:  0.782


In [250]:
##### Check-back: accept accuracy #####
find_labelset_scores(cb_accept, checkback_accept)

acc:  0.956
precision:  1.0
recall:  0.631
f1:  0.774


In [251]:
##### Check-back: follow-up-question accuracy #####
find_labelset_scores(cb_question, checkback_question)

acc:  0.992
precision:  1.0
recall:  0.624
f1:  0.768


In [252]:
##### Check-back: info-provide accuracy #####
find_labelset_scores(cb_info, checkback_info)

acc:  0.957
precision:  1.0
recall:  0.602
f1:  0.752


In [253]:
##### Check-back: whole labelset accuracy #####
cb_labelset = set(cb_acknowledge + cb_accept + cb_question + cb_info)
find_labelset_scores(cb_labelset, checkback_df)

acc:  0.86
precision:  1.0
recall:  0.625
f1:  0.769


In [254]:
##### Closing-of-the-loop: acknowledge #####
cl_acknowledge = find_label_freq(closing_acknowledge)
cl_acknowledge = [i[1] for i in cl_acknowledge]

In [255]:
##### Closing-of-the-loop: accept #####
cl_accept = find_label_freq(closing_accept)
cl_accept = [i[1] for i in cl_accept]

In [256]:
##### Closing-of-the-loop: info-provide #####
cl_info = find_label_freq(closing_info)
cl_info = [i[1] for i in cl_info]

In [257]:
##### Closing-of-the-loop: acknowledge accuracy #####
find_labelset_scores(cl_acknowledge, closing_acknowledge)

acc:  0.98
precision:  1.0
recall:  0.639
f1:  0.779


In [258]:
##### Closing-of-the-loop: accept accuracy #####
find_labelset_scores(cl_accept, closing_accept)

acc:  0.999
precision:  1.0
recall:  0.619
f1:  0.765


In [259]:
##### Closing-of-the-loop: info-provide accuracy #####
find_labelset_scores(cl_info, closing_info)

acc:  0.992
precision:  1.0
recall:  0.598
f1:  0.749


In [260]:
##### Closing-of-the-loop: whole labelset accuracy #####
cl_labelset = set(cl_acknowledge + cl_accept + cl_info)
find_labelset_scores(cl_labelset, closing_df)

acc:  0.97
precision:  1.0
recall:  0.629
f1:  0.772
