In [1]:
import os
import sys
import json 

# sys.path.append("../")
# from src.utils import read_json, save_json

In [2]:
def save_json(data: dict, filename: str) -> None:
    with open(filename, "w", encoding="utf-8") as file:
        json.dump(data, file, indent=4, ensure_ascii=False)


def read_json(path):
    with open(path, mode="r") as file:
        data = file.read()
    return json.loads(data)

In [3]:
def prep_graph(graph: dict):
    def wrap_in_list(item):
        if isinstance(item, str):
            return [item]
        return item

    # Обработка edges
    for edge in graph['edges']:
        edge['utterances'] = wrap_in_list(edge['utterances'])

    # Обработка nodes
    for node in graph['nodes']:
        node['utterances'] = wrap_in_list(node['utterances'])
    return graph

In [4]:
def form_graph_file(
    dialog_id: int,
    samping_method: str,
    dialog_path: str,
    init_graph_path: str,
    gen_graph_path: str,
    incomplete_graph_path: str | None = None,
    dialog_field: str = 'dialog'
) -> dict:
    dialog = read_json(dialog_path)[dialog_field]
    init_graph = prep_graph(read_json(init_graph_path))
    gen_graph = prep_graph(read_json(gen_graph_path))
    incomplete_graph = prep_graph(read_json(incomplete_graph_path)) if incomplete_graph_path else {}

    graph_file = {
        "dialog_id": dialog_id,
        "samping_method": samping_method,
        "dialog": dialog,
        "target_graph": init_graph,
        "predicted_graph": gen_graph,
        "base_graph": incomplete_graph,
    }
    return graph_file

In [5]:
res = []
id = 0

exp_path = '../experiments/graph_complement'
dirs = [name for name in os.listdir(exp_path) if os.path.isdir(os.path.join(exp_path, name))]
for dir in dirs:
    graph_file = form_graph_file(
        dialog_id = id,
        samping_method = dir,
        dialog_path = f'{exp_path}/{dir}/dialog_to_give.json',
        init_graph_path = f'{exp_path}/{dir}/initial_graph.json',
        gen_graph_path = f'{exp_path}/{dir}/gpt_response.json',
        incomplete_graph_path = f'{exp_path}/{dir}/graph_to_give_gpt.json'
    )
    res.append(graph_file)
    id += 1

exp_path = '../experiments/graph_construction'
dirs = [name for name in os.listdir(exp_path) if os.path.isdir(os.path.join(exp_path, name))]
for dir in dirs:
    if os.path.isfile(f'{exp_path}/{dir}/partial_gt_graph.json'):
        graph_file = form_graph_file(
            dialog_id = id,
            samping_method = dir,
            dialog_path = f'{exp_path}/{dir}/dialog_to_give.json',
            init_graph_path = f'{exp_path}/{dir}/initial_graph.json',
            gen_graph_path = f'{exp_path}/{dir}/gpt_response1.json',
            incomplete_graph_path = f'{exp_path}/{dir}/partial_gt_graph.json'
        )
        res.append(graph_file)
        id += 1
    else:
        graph_file = form_graph_file(
            dialog_id = id,
            samping_method = dir,
            dialog_path = f'{exp_path}/{dir}/dialog_to_give.json',
            init_graph_path = f'{exp_path}/{dir}/initial_graph.json',
            gen_graph_path = f'{exp_path}/{dir}/gpt_response1.json',
            dialog_field = 'dialog1'
        )
        res.append(graph_file)
        id += 1

        graph_file = form_graph_file(
            dialog_id = id,
            samping_method = dir,
            dialog_path = f'{exp_path}/{dir}/dialog_to_give.json',
            init_graph_path = f'{exp_path}/{dir}/initial_graph.json',
            gen_graph_path = f'{exp_path}/{dir}/gpt_response2.json',
            dialog_field = 'dialog2'
        )
        res.append(graph_file)
        id += 1

In [6]:
from collections import defaultdict

def combine_utterances(data):
    # Используем defaultdict для объединения словарей с одинаковыми source и target
    combined_data = defaultdict(lambda: {"utterances": []})

    for entry in data:
        key = (entry["source"], entry["target"])
        combined_data[key]["source"] = entry["source"]
        combined_data[key]["target"] = entry["target"]
        combined_data[key]["utterances"].extend(entry["utterances"])

    # Преобразуем defaultdict обратно в список словарей
    result = list(combined_data.values())
    
    return result

In [8]:
for dialog_info in res:
    for graph_name in ("target_graph", "predicted_graph", "base_graph"):
        if dialog_info[graph_name]:
            dialog_info[graph_name]['edges'] = combine_utterances(dialog_info[graph_name]['edges'])

In [9]:
save_json(res, '../data/data.json')