In [12]:
import json
import os

# read lines from a json file, each line is a json object
def read_json_lines(file):
    with open(file, 'r') as f:
        for line in f:
            yield json.loads(line)

# write a json object to a file
def write_json(file, obj):
    with open(file, 'w') as f:
        json.dump(obj, f, indent=2)

def select_longest_dialogue(data):
    # select the longest dialogue using the frequency of the convo_id appearing in the data
    convo_id_freq = {}
    for d in data:
        convo_id = d['convo_id']
        if convo_id in convo_id_freq:
            convo_id_freq[convo_id] += 1
        else:
            convo_id_freq[convo_id] = 1
    
    longest_convo_id = max(convo_id_freq, key=convo_id_freq.get)
    longest_dialogue = [d for d in data if d['convo_id'] == longest_convo_id]
    return longest_dialogue

def main():
    # read the data
    data = list(read_json_lines('test_AST_abcd_woaction_flow_10p.json'))
    # select the longest dialogue
    longest_dialogue = select_longest_dialogue(data)
    print(longest_dialogue)
    print(len(longest_dialogue))
    for d in longest_dialogue:
        print(d['target'])
    

if __name__ == '__main__':
    main()

[{'sample_id': 2404, 'convo_id': 367, 'turn_id': 6, 'flow': 'single_item_query', 'target': 'single_item_query:search-faq [none]', 'input': 'Context: hello, what i can help you with today? i wanna know more about the product is it leather? hello? which product are you referring to?', 'target_data': '["single_item_query:search-faq", ["none"]]'}, {'sample_id': 2405, 'convo_id': 367, 'turn_id': 8, 'flow': 'single_item_query', 'target': 'single_item_query:search-boots [none]', 'input': 'Context: hello, what i can help you with today? i wanna know more about the product is it leather? hello? which product are you referring to? tommy hilfiger boots', 'target_data': '["single_item_query:search-boots", ["none"]]'}, {'sample_id': 2406, 'convo_id': 367, 'turn_id': 12, 'flow': 'single_item_query', 'target': 'single_item_query:verify-identity [crystal minh, n/a, n/a]', 'input': "Context: hello, what i can help you with today? i wanna know more about the product is it leather? hello? which product a

In [52]:
import json
import os

# read lines from a json file, each line is a json object
def read_json_lines(file):
    with open(file, 'r') as f:
        for line in f:
            yield json.loads(line)

def read_json(file):
    with open(file, 'r') as f:
        return json.load(f)
    
def write_json(file, obj):
    with open(file, 'w') as f:
        json.dump(obj, f, indent=2)

def select_longest_dialogue_raw(data):
    # select the longest dialogue using the frequency of the convo_id appearing in the data
    convo_id_freq = {}
    for d in data:
        convo_id = d['convo_id']
        for turn in d['original']:
            if convo_id not in convo_id_freq:
                convo_id_freq[convo_id] = 1
            else:
                convo_id_freq[convo_id] += 1
        
    longest_convo_id = max(convo_id_freq, key=convo_id_freq.get)
    longest_dialogue = [d for d in data if d['convo_id'] == longest_convo_id]
    return longest_dialogue

def select_longest_dialogue_actions(data):
    # select the longest dialogue using the frequency of the convo_id appearing in the data
    convo_id_freq = {}
    for d in data:
        convo_id = d['convo_id']
        if convo_id in convo_id_freq:
            convo_id_freq[convo_id] += 1
        else:
            convo_id_freq[convo_id] = 1
    
    longest_convo_id = max(convo_id_freq, key=convo_id_freq.get)
    longest_dialogue = [d for d in data if d['convo_id'] == longest_convo_id]
    return longest_dialogue

def select_longest_dialogue(data_raw, data_actions):
    convo_id_freq_action = {}
    for d in data_actions:
        convo_id = d['convo_id']
        if convo_id in convo_id_freq_action:
            convo_id_freq_action[convo_id] += 1
        else:
            convo_id_freq_action[convo_id] = 1

    convo_id_freq_raw = {}
    for d in data_raw:
        convo_id = d['convo_id']
        for turn in d['original']:
            if turn[0].startswith('action'):
                continue
            else:
                if convo_id not in convo_id_freq_raw:
                    convo_id_freq_raw[convo_id] = 1
                else:
                    convo_id_freq_raw[convo_id] += 1
    
    # the maximum number of actions
    max_action = max(convo_id_freq_action.values())
    # the maximum number of raw turns
    max_raw = max(convo_id_freq_raw.values())
    print(max_action, max_raw)

    # select the dialogues whose number of both actions and raw turns are larger than 25
    convo_ids = set(convo_id_freq_action.keys()).intersection(set(convo_id_freq_raw.keys()))
    convo_ids = [convo_id for convo_id in convo_ids if convo_id_freq_action[convo_id] > 7 and convo_id_freq_raw[convo_id] > 15]
    # convo_id = convo_ids[0]
    print(len(convo_ids))
    print(convo_ids)
    convo_id = convo_ids[0]
    longest_dialogue = [d for d in data_raw if d['convo_id'] == convo_id]
    return longest_dialogue

def main():
    data_raw = read_json('../raw/abcd_v1.1.json')
    test_data_raw = data_raw['test']
    data_actions = list(read_json_lines('test_AST_abcd_woaction_flow_10p.json'))

    longest_dialogue = select_longest_dialogue(test_data_raw, data_actions)
    # print(json.dumps(longest_dialogue, indent=2))

    for d in test_data_raw:
        # print(d['convo_id'])
        if d['convo_id'] == 8623:
            # print(json.dumps(d, indent=2))
            # print(d['original'])
            # print(len(d['original']))
            # print(d[''])
            print(d)
            turns = d['original']
            counter = 0
            for turn in turns:
                print(turn[0], ": ", turn[1])
                if turn[0].startswith('action'):
                    continue
                else:
                    counter += 1
            print(counter)
    

if __name__ == '__main__':
    main()

24 53
11
[159, 367, 8623, 433, 436, 4730, 2691, 1064, 1135, 1916, 10127]
{'convo_id': 8623, 'scenario': {'personal': {'account_id': 'jmpwbxr49h', 'customer_name': 'alessandro phoenix', 'member_level': 'bronze', 'phone': '(291) 457-4572'}, 'order': {'street_address': '1623 lincoln ave', 'full_address': '1623 lincoln ave  raleigh, wa 69297', 'city': 'raleigh', 'order_id': '6210757252', 'state': 'wa', 'zip_code': '69297'}, 'product': {'names': [], 'amounts': []}, 'flow': 'subscription_inquiry', 'subflow': 'status_due_amount'}, 'original': [['agent', 'Hi! How can I help you today?'], ['customer', 'hey have some questions about the status of my account and how to keep it active'], ['customer', 'also if I owe anything to you guys towards that at the moment'], ['agent', 'Okay, I can help you with that. Can you give me your name and account number, please?'], ['customer', 'Alessandro Phoenix'], ['customer', 'JMPWBXR49H'], ['action', 'Identity verification in progress ...'], ['agent', 'Okay - d

### select longest dialogue while keeping a good performance

In [12]:
import json
import os

# read lines from a json file, each line is a json object
def read_json_lines(file):
    with open(file, 'r') as f:
        for line in f:
            yield json.loads(line)

def read_json(file):
    with open(file, 'r') as f:
        return json.load(f)
    
def write_json(file, obj):
    with open(file, 'w') as f:
        json.dump(obj, f, indent=2)

def select_longest_dialogue_raw(data):
    # select the longest dialogue using the frequency of the convo_id appearing in the data
    convo_id_freq = {}
    for d in data:
        convo_id = d['convo_id']
        for turn in d['original']:
            if convo_id not in convo_id_freq:
                convo_id_freq[convo_id] = 1
            else:
                convo_id_freq[convo_id] += 1
        
    longest_convo_id = max(convo_id_freq, key=convo_id_freq.get)
    longest_dialogue = [d for d in data if d['convo_id'] == longest_convo_id]
    return longest_dialogue

def select_longest_dialogue_actions(data):
    # select the longest dialogue using the frequency of the convo_id appearing in the data
    convo_id_freq = {}
    for d in data:
        convo_id = d['convo_id']
        if convo_id in convo_id_freq:
            convo_id_freq[convo_id] += 1
        else:
            convo_id_freq[convo_id] = 1
    
    longest_convo_id = max(convo_id_freq, key=convo_id_freq.get)
    longest_dialogue = [d for d in data if d['convo_id'] == longest_convo_id]
    return longest_dialogue

def select_longest_dialogue(data_raw, data_actions):
    convo_id_freq_action = {}
    for d in data_actions:
        convo_id = d['convo_id']
        target = d['target'].split(' [')[0]
        pred = d['pred_action'].split(' [')[0]
        if convo_id in convo_id_freq_action:
            if target == pred:
                convo_id_freq_action[convo_id] += 1
        else:
            if target == pred:
                convo_id_freq_action[convo_id] = 1

    convo_id_freq_raw = {}
    for d in data_raw:
        convo_id = d['convo_id']
        for turn in d['original']:
            if turn[0].startswith('action'):
                continue
            else:
                if convo_id not in convo_id_freq_raw:
                    convo_id_freq_raw[convo_id] = 1
                else:
                    convo_id_freq_raw[convo_id] += 1
    
    # the maximum number of actions
    max_action = max(convo_id_freq_action.values())
    # the maximum number of raw turns
    max_raw = max(convo_id_freq_raw.values())
    print(max_action, max_raw)

    # select the dialogues whose number of both actions and raw turns are larger than 25
    convo_ids = set(convo_id_freq_action.keys()).intersection(set(convo_id_freq_raw.keys()))
    convo_ids = [convo_id for convo_id in convo_ids if convo_id_freq_action[convo_id] > 6 and convo_id_freq_raw[convo_id] > 15]
    # convo_id = convo_ids[0]
    print(len(convo_ids))
    print(convo_ids)
    convo_id = convo_ids[0]
    longest_dialogue = [d for d in data_raw if d['convo_id'] == convo_id]
    return longest_dialogue

def select_dialogue_success_rate(data_actions):
    convo_id_freq_action = {}
    for d in data_actions:
        convo_id = d['convo_id']
        if convo_id in convo_id_freq_action:
            convo_id_freq_action[convo_id] += 1
        else:
            convo_id_freq_action[convo_id] = 1
    
    


def main():
    data_raw = read_json('../raw/abcd_v1.1.json')
    test_data_raw = data_raw['test']
    data_actions = list(read_json_lines('/home/xiangyu/project/AST_abcd_part/dialogues/abcdASTWOActionFlowAll_with_chainedPrior.json'))

    # longest_dialogue = select_dialogue_success_rate(data_actions)


    longest_dialogue = select_longest_dialogue(test_data_raw, data_actions)
    # print(json.dumps(longest_dialogue, indent=2))

    for d in test_data_raw:
        # print(d['convo_id'])
        if d['convo_id'] == 2691:
            # print(json.dumps(d, indent=2))
            # print(d['original'])
            # print(len(d['original']))
            # print(d[''])
            print(d)
            turns = d['original']
            counter = 0
            for turn in turns:
                print(turn[0], ": ", turn[1])
                if turn[0].startswith('action'):
                    continue
                else:
                    counter += 1
            print(counter)
    

if __name__ == '__main__':
    main()

8 53
4
[2691, 1135, 9530, 10127]
{'convo_id': 2691, 'scenario': {'personal': {'customer_name': 'crystal minh', 'member_level': 'guest', 'phone': '(560) 856-5455'}, 'order': {'street_address': '8422 kennedy st', 'full_address': '8422 kennedy st  jacksonville, tx 98919', 'city': 'jacksonville', 'num_products': '1', 'products': "[{'brand': 'calvin_klein', 'product_type': 'jacket', 'amount': 59, 'image_url': 'images/calvin_klein-jacket.jpeg'}]", 'state': 'tx', 'zip_code': '98919'}, 'product': {'names': ['calvin_klein jacket'], 'amounts': [59]}, 'flow': 'single_item_query', 'subflow': 'jacket_how_4'}, 'original': [['agent', 'Hello, how can I help you today?'], ['customer', 'Yes on the Calvin_Klein jacket, could you tell me how the hood detaches'], ['agent', 'Happy to help, may I have your name please?'], ['action', 'Searching the FAQ pages ...'], ['action', 'System Action: search jacket'], ['customer', 'Crystal Minh'], ['action', 'FAQ answer related to jacket (how4) was selected.'], ['actio