In [4]:
import pandas as pd
import re
import json


In [5]:
def parse_thought_action(dict_str):
    thought_action = {}
    thought_match = re.search(r"'thought':\s*(.+?)\s*,\s*'action'", dict_str)
    action_match = re.search(r"'action':\s*(.+?)\s*}", dict_str)
    # 提取匹配的值
    thought = thought_match.group(1) if thought_match else None
    thought = thought.replace("\\","").replace("\"","").replace("\'","")
    action = action_match.group(1) if action_match else None
    action = action.replace("\\","").replace("\"","").replace("\'","")
    thought_action = {"thought":thought,"action":action}
    return thought_action

In [6]:
def to_dict(input_string):
    # 正则表达式模式
    # pattern = r"('action_type'|'element_id'|'url'|'fill_text'):\s*(<[^>]+>|\d+|'[^']+')"
    pattern = r"('action_type'|'element_id'|'url'|'fill_text'):\s*(<[^>]+>|\d+|'[^']+'|\"[^\"]+\")"
    matches = re.findall(pattern, input_string)
    extracted_fields = {}
    for match in matches:
        field_name, field_value = match
        if field_value.startswith('<') and field_value.endswith('>'):
            enum_name = field_value.split('.')[-1].strip('<> ')
            extracted_fields[field_name.strip("'")] = enum_name
        else:
            extracted_fields[field_name.strip("'")] = field_value.strip("'")
    action = ""
    if "google_search" in extracted_fields["action_type"].lower():
        action = "google_search" + "[" + extracted_fields["fill_text"] + "]"
    elif "fill_search" in extracted_fields["action_type"].lower():
        action = "fill_search" + \
            "[" + str(extracted_fields["element_id"]) + "," + \
            extracted_fields["element_id"] + "]"
    elif "fill_form" in extracted_fields["action_type"].lower():
        action = "fill_search" + \
            "[" + str(extracted_fields["element_id"]) + "," + \
            extracted_fields["element_id"] + "]"
    elif "goto" in extracted_fields["action_type"].lower():
        action = "goto" + "[" + extracted_fields["url"] + "]"
    elif "click" in extracted_fields["action_type"].lower():
        action = "click" + "[" + str(extracted_fields["element_id"]) + "]"
    elif "none" in extracted_fields["action_type"].lower():
        action = "None"
    return action

In [7]:
def score_rate(score):
    first,second = score.split("/")
    return int(first) / int(second)

In [8]:
def parse_step_reward(dict_str):
    score_description = {}
    score_match = re.search(r"'score':\s*(.+?)\s*,\s*'description'", dict_str)
    description_match = re.search(r"'description':\s*(.+?)\s*}", dict_str)
    score = score_match.group(1) if score_match else None
    score = score.replace("\\", "").replace("\"", "").replace("\'", "")
    description = description_match.group(1) if description_match else None
    description = description.replace(
        "\\", "").replace("\"", "").replace("\'", "")
    score_description = {"score": score, "description": description}
    return score_description


def process_step_reward(dict_str):
    if dict_str.lower() == "x":
        dict_str = {}
    elif dict_str.lower() == "finished":
        dict_str = {"score:": 10, "description": "finished"}
    else:
        dict_str = parse_step_reward(dict_str)
    return dict_str

In [9]:
def parse_selector(selector):
    if selector is not None:
        return selector
    else:
        return "None"

In [10]:
def write_to_json(file_path):
    df = pd.read_csv(file_path, index_col=False)
    df = df.drop(df.columns[0], axis=1)
    df["step_index"] += 1
    df["trace_to_dict"] = df["trace"].apply(lambda x: parse_thought_action(x))
    df["action_to_str"] = df["action"].apply(lambda x: to_dict(x))
    df["score_rate"] = df["score"].apply(lambda x: score_rate(x))
    df["step_reward"] = df["step_reward"].apply(
        lambda x: process_step_reward(x))
    df["selector"] = df["selector"].fillna("None")
    df_copy = df[
        [
            "step_index",
            "trace_to_dict",
            "selector",
            "action_to_str",
            "score",
            "score_rate",
            "step_reward",
            "step url"
        ]
    ]

    def summary(x):
        dic = {
            "step_index": x["step_index"],
            "trace_description": x["trace_to_dict"] if x["trace_to_dict"] else {},
            "selector": x["selector"] if x["selector"] != "None" else "",
            "action": x["action_to_str"] if x["action_to_str"] else "",
            "task_score": x["score"],
            "task_score_rate": x["score_rate"],
            "current_reward_score_description": x["step_reward"],
            "url": x["step url"] if x["step url"] != "finished" else ""
        }
        return dic
    step_list = []
    df_copy.apply(lambda x: step_list.append(summary(x)), axis=1)
    return step_list

In [13]:
import os
import pandas as pd

folder_path = './csv_results/Dom-based'
task_list = []
for _, filename in enumerate(os.listdir(folder_path)):
    out_json = {}
    task_name = filename.split("_")[1]
    out_json["task_id"] = int(filename.split("_")[0])
    out_json["task_name"] = task_name
    out_json["task_status"] = filename.split("_")[-2]
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        task_step_list = write_to_json(file_path)
        out_json["step_list"] = task_step_list
        task_list.append(out_json)
print(task_list)
task_list = sorted(task_list, key=lambda x: x['task_id'])
if not os.path.exists("./results/Dom-based/"):
    os.makedirs("./results/Dom-based")
out_json_file_path = './results/Dom-based/out.json'
with open(out_json_file_path, 'w') as json_file:
    json.dump(task_list, json_file)



In [136]:
import json5


def read_file(file_path="./data/group1.json"):
    '''读取标签数据'''
    return_list = []
    with open(file_path) as f:
        test_data = json5.load(f)
    for task in test_data:
        task_name = task["task"]
        evaluation_data = task["evaluation"]
        reference_task_length = task["reference_task_length"]
        reference_evaluate_steps = []
        for _, evaluation in enumerate(evaluation_data):
            match_function = evaluation["match_function_name"]
            if "url" in match_function:
                key = evaluation["content"]["key"]
                reference_answer = evaluation["content"]["reference_answer"]
                reference_evaluate_steps.append({"match_function": match_function,
                                                "key": key, "reference_answer": reference_answer, "score": 0})
            elif "element_path" in match_function:  # TODO
                reference_answer = evaluation["content"]["reference_answer"]
                method = evaluation["method"]
                netloc = evaluation["content"]["netloc"]
                reference_evaluate_steps.append({"match_function": match_function, "method": method,
                                                "reference_answer": reference_answer, "netloc": netloc, "score": 0})
            elif "element_value" in match_function:
                reference_answer = evaluation["content"]["reference_answer"]
                netloc = evaluation["content"]["netloc"]
                if "path" in evaluation["content"].keys():
                    path = evaluation["content"]["path"]
                    reference_evaluate_steps.append({"match_function": match_function,
                                                    "reference_answer": reference_answer, "netloc": netloc, "path": path, "score": 0})
                else:
                    reference_evaluate_steps.append({"match_function": match_function,
                                                     "reference_answer": reference_answer, "netloc": netloc, "score": 0})
        return_list.append(
            [task_name, reference_task_length, reference_evaluate_steps])
    # print(return_list)
    # return_list=return_list[1:]
    return return_list

In [140]:
import os
import pandas as pd

folder_path = './results/Dom-based'
task_list = read_file()
for _, filename in enumerate(os.listdir(folder_path)):
    out_json = {}
    out_json["task_id"] = int(filename.split("_")[0])
    out_json["task_name"] = task_list[out_json["task_id"]]
    task_status = filename.split("_")[-2]
    out_json["task_status"] = task_status
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        task_step_list = write_to_json(file_path)
        out_json["step_list"] = task_step_list
        task_list.append(out_json)
print(task_list)
task_list = sorted(task_list, key=lambda x: x['task_id'])
if not os.path.exists("./results/group1/Dom-based/"):
    os.makedirs("./results/group1/Dom-based")
out_json_file_path = './results/group1/Dom-based/out.json'
with open(out_json_file_path, 'w') as json_file:
    json.dump(task_list, json_file)

In [113]:
data_json = {
    "task_name":"s",
    "step_list":[
        {
            "step_index":1,
            "trace_description":"",
            "selector":"",
            "action":"",
            "score":"",
            "score_description":"",
            "score_rate":"",
        }
    ]
}