In [1]:
import pandas as pd
import ast
import json

from pydantic import BaseModel, Field
from typing import List
import openai
from dotenv import load_dotenv

from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser


import sys
import os
from pathlib import Path

try:
    # If running as a script, use __file__ to find the directory
    __file__
    base_path = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
    sys.path.append(base_path)
    
except NameError:
    # If running in an interactive environment (e.g., Jupyter Notebook)
    base_path = Path().resolve().parent.parent
    sys.path.append(str(base_path))


In [5]:


load_dotenv()
model = ChatOpenAI(
    model="gpt-4o",
  temperature=0,
  api_key=os.getenv("OPENAI_API_KEY"),
)

from utils.llm import call_llm
# Local
# filepath_apiinfo = "/home/UNT/ae0589/Desktop/HPCC/AutomaticWorkflowGeneration/ActionEngine/db/api_info/api_information.json"
#filepath = "/home/UNT/ae0589/Desktop/HPCC/AutomaticWorkflowGeneration/ActionEngine/eval/answers/"

#Cloud
filepath_apiinfo = filepath_apiinfo = "/home/cc/AutomaticWorkflowGeneration/ActionEngine/db/api_info/api_information.json"
filepath = "/home/cc/AutomaticWorkflowGeneration/ActionEngine/eval/answers/"

filename_query_1to2 = filepath + 'test_queries/query_answer_1-2_nodes.json'
filename_query_3to5 = filepath + 'test_queries/query_answer_3-5_nodes.json'
filename_query_6to10 = filepath + 'test_queries/query_answer_6-10_nodes.json'

def read_json_to_dict(filename):
    with open(filename, 'r') as file:
        data = json.load(file)
    return data

def escape_json(text: str) -> str:
    return text.replace('{', '{{').replace('}', '}}')

## Generate GT for Easy Dataset (1-2 Nodes)

### Generate TaskList

In [2]:
"""
SubTask Division
"""
from utils.schemas.workflow import Tasks
from utils.subtask_div import subtask_diviser


  warn_deprecated(


#### 1-2 nodes

In [26]:
#1-2 nodes
q_1to2 = read_json_to_dict(filename_query_1to2)
# Extract specific fields
extracted_data = [
    {
        'id': item['id'],
        'user_request': item['user_request'],
        'selected_apis': item['selected_apis']
    }
    for item in q_1to2["test_data"]
]

tasklists = []
for i in range(len(extracted_data)):
    tasklist = subtask_diviser(extracted_data[i]["user_request"])
    t = {"id":extracted_data[i]["id"], "node_number": len(tasklist), "user_query": extracted_data[i]["user_request"] ,"task_list":tasklist, "selected_apis":extracted_data[i]["selected_apis"]}
    tasklists.append(t)
with open('./test_tasklist/tasklist_GTlabel_1-2_nodes.json', 'w') as json_file:
    json.dump(tasklists, json_file, indent=4)

### ->Check the generated tasklist and selected functions manually before proceed further

### Generate topological order

In [9]:
"""
Workflow Optimizer
"""
from utils.schemas.workflow import Workflow
from utils.wf_optimizer import wf_optimizer

#### 1-2 Nodes

In [9]:
"""
Generate Topological Order
1. Confirm the correctness of topological order manually
"""
filename_tasklist_1to2 = filepath + 'test_tasklist/tasklist_GTlabel_1-2_nodes.json'

top_orders = read_json_to_dict(filename_tasklist_1to2)
for i in range(len(extracted_data)):
    topological_order = wf_optimizer(extracted_data[i]["user_request"], top_orders[i]["task_list"])
    top_orders[i]["topological_order"] = topological_order

with open('./test_topologicalorder/topologicalorder_GTlabel_1-2_nodes.json', 'w') as json_file:
    json.dump(top_orders, json_file, indent=4)

##### Manually confirm the correctness of topological order manually before proceed nexr step


In [10]:
"""
Generate Ground Truth for Topological Order
"""
# Read
filename_topologiacal_1to2 = filepath + 'test_topologicalorder/topologicalorder_GTlabel_1-2_nodes.json'
top_orders = read_json_to_dict(filename_topologiacal_1to2)

# Generate order list 
for i in range(len(top_orders)):
    # print(top_order[i]["id"])
    node = top_orders[i]["topological_order"]  
    order_list = [d["task_nums"] for d in node]
    top_orders[i]["list_of_orders"] = order_list
    # print(order_list)

for i in range(len(top_orders)):
    # Flatten the list_of_orders into a single list of numbers
    flattened_orders = [num for sublist in top_orders[i]['list_of_orders'] for num in sublist]

    # Initialize the result list
    result = []

    # Generate the pairs for each number
    for j, current_num in enumerate(flattened_orders):
        pairs = []
        for k in range(j + 1, len(flattened_orders)):
            next_num = flattened_orders[k]
            pairs.append(f"{current_num} < {next_num}")
        
        # Add the result for the current number
        result.append({"num": current_num, "pairs": pairs})
    top_orders[i]["pairs"] = result
    print(top_orders[i]["pairs"])

    topological_order_gt_1to2_nodes = [
    {
        'id': item["id"],
        'topological_order': item["topological_order"],
        'list_of_orders': item["list_of_orders"],
        'label': item["pairs"],
    } for item in top_orders]

    with open('./test_topologicalorder/topologicalorder_GTlabel_1-2_nodes.json', 'w') as json_file:
        json.dump(topological_order_gt_1to2_nodes, json_file, indent=4)

### ->Check the generated topological orders manually before proceed further

### Generate Data Dependency Management

In [3]:
"""
Prepare selected function list to feed into function of data dependency management
"""
filename_tasklist = filepath + 'test_tasklist/tasklist_GTlabel_1-2_nodes.json'
filename_topologicalorder = filepath + 'test_topologicalorder/topologicalorder_GTlabel_1-2_nodes.json'

def read_apiinfo(filename):
    api_info = []
    with open(filename, 'r') as file:
        for line in file:
            api_info.append(json.loads(line))

    return api_info

api_info = read_apiinfo(filepath_apiinfo)
top_orders = read_json_to_dict(filename_topologicalorder)
data = read_json_to_dict(filename_tasklist)

# Loop over both lists and add 'topological_order' to the corresponding item in data
for i in range(len(top_orders)):
    # Assuming the length of top_orders and data is the same
    data[i]['topological_order'] = top_orders[i]['topological_order']

extracted_api_names = [
    {
        'id': item['id'],
        'task_list': item["task_list"],
        'selected_apis': item['selected_apis']
    }
    for item in data
]

all_task_info = []
# Concatenate by same index
for i in range(len(extracted_api_names)):
    concatenated_list = [[task, api] for task, api in zip(extracted_api_names[i]["task_list"], extracted_api_names[i]["selected_apis"])]
    combined_dicts = [{**dict1, **dict2} for dict1, dict2 in concatenated_list]
    all_task_info.append(combined_dicts)

# Extract all api names from api repositories
all_api_names = [item["name"] for item in api_info]

# Retrieve apis' infomation
for i in range(len(all_task_info)):
    selected_functions = []
    for task in all_task_info[i]:
        if task["name"] in all_api_names:
            for func in api_info: 
                if task["name"] == func["name"]:
                    selected_func = func.copy()  # Make a copy to avoid mutating the original
                    selected_func["task_num"] = int(task["task_number"])
                    selected_func["task_description"] = task["task_description"]
                    selected_functions.append(selected_func)
        data[i]["selected_functions"] = selected_functions


In [5]:
"""
Dependency Management
"""
from utils.data_dependency import confirm_dependency
from utils.schemas.workflow import TaskOutputDescription, DependentParams

In [7]:
"""
For classification of data dependency 
"""
for i in range(len(data)):
    selected_functions, user_inputs, depended_params = confirm_dependency(data[i]["topological_order"], data[i]["selected_functions"])
    func_list = []
    for api in selected_functions:
        all_params = [component['name'] for component in api["input_parameters_with_datatype"]]
        depended_params = [list(item.keys())[0] for item in api["depended_params"]]
        user_inputs = [param for param in all_params if param not in depended_params]
        func_list.append({"name": api["name"], "all_params": all_params, "user_input": user_inputs, "dependent_params": depended_params})
    data[i]["param_dependency_management"] = func_list

  warn_deprecated(


In [9]:
"""
Save Data Dependency Management test data
- classification of user_input and dependent_param
- correctness of dependednt_param 
"""
dd_data = [
    {
        'id': item['id'],
        "number_of_node": len(item["task_list"]),
        'task_list': item["task_list"],
        "topological_order": item["topological_order"],
        'selected_apis': [{
            "name": api["name"], 
            "input_params": api["input_parameters_with_datatype"],
            "dependencies": api["dependencies"],
            "depended_params": api["depended_params"],
            } for api in item['selected_functions']],
        'param_dependency_management': item["param_dependency_management"]
    }
    for item in data
]

with open('./test_datadependency/datadep_GTlabel_1-2_nodes.json', 'w') as json_file:
    json.dump(dd_data, json_file, indent=4)


## Generate Intermidiate 3-5 Nodes Data

### Generate TaskList

In [None]:
"""
SubTask Division
"""
from utils.schemas.workflow import Tasks
from utils.subtask_div import subtask_diviser


  warn_deprecated(


In [None]:
#1-2 nodes
q_3to5 = read_json_to_dict(filename_query_3to5)
extracted_data = [
    {
        'id': item['id'],
        'user_request': item['user_request'],
        'selected_apis': item['selected_apis']
    }
    for item in q_3to5["test_data"]
]


tasklists = []
for i in range(len(extracted_data)):
    tasklist = subtask_diviser(extracted_data[i]["user_request"])
    t = {"id":extracted_data[i]["id"], "node_number": len(tasklist), "user_query": extracted_data[i]["user_request"] ,"task_list":tasklist, "selected_apis":extracted_data[i]["selected_apis"]}
    tasklists.append(t)
with open('./test_tasklist/tasklist_GTlabel_3-5_nodes.json', 'w') as json_file:
    json.dump(tasklists, json_file, indent=4)

### ->Check the generated tasklist and selected functions manually before proceed further

### Generate topological order

In [None]:
"""
Workflow Optimizer
"""
from utils.schemas.workflow import Workflow
from utils.wf_optimizer import wf_optimizer

In [None]:
"""
Generate Topological Order
"""
filename_tasklist_3to5 = filepath + 'test_tasklist/tasklist_GTlabel_3-5_nodes.json'
top_orders = read_json_to_dict(filename_tasklist_3to5)
for i in range(len(extracted_data)):
    topological_order = wf_optimizer(extracted_data[i]["user_request"], top_orders[i]["task_list"])
    top_orders[i]["topological_order"] = topological_order

with open('./test_topologicalorder/topologicalorder_GTlabel_3-5_nodes.json', 'w') as json_file:
    json.dump(top_orders, json_file, indent=4)


##### Manually confirm the correctness of topological order manually before proceed nexr step


In [None]:

filename_topologiacal_3to5 = filepath + 'test_topologicalorder/topologicalorder_GTlabel_3-5_nodes.json'
top_orders = read_json_to_dict(filename_topologiacal_3to5)

# Generate order list 
for i in range(len(top_orders)):
    # print(top_order[i]["id"])
    node = top_orders[i]["topological_order"]  
    order_list = [d["task_nums"] for d in node]
    top_orders[i]["list_of_orders"] = order_list
    # print(order_list)

for i in range(len(top_orders)):
    # Flatten the list_of_orders into a single list of numbers
    flattened_orders = [num for sublist in top_orders[i]['list_of_orders'] for num in sublist]

    # Initialize the result list
    result = []

    # Generate the pairs for each number
    for j, current_num in enumerate(flattened_orders):
        pairs = []
        for k in range(j + 1, len(flattened_orders)):
            next_num = flattened_orders[k]
            pairs.append(f"{current_num} < {next_num}")
        
        # Add the result for the current number
        result.append({"num": current_num, "pairs": pairs})
    top_orders[i]["pairs"] = result
    print(top_orders[i]["pairs"])

topological_order_gt_3to5_nodes = [
{
    'id': item["id"],
    'topological_order': item["topological_order"],
    'list_of_orders': item["list_of_orders"],
    'label': item["pairs"],
} for item in top_orders]

with open('./test_topologicalorder/topologicalorder_GTlabel_3-5_nodes.json', 'w') as json_file:
    json.dump(topological_order_gt_3to5_nodes, json_file, indent=4)

### ->Check the generated topological orders manually before proceed further

### Generate Data Dependency Management

In [None]:
"""
Prepare selected function list to feed into function of data dependency management
"""
def read_apiinfo(filename):
    api_info = []
    with open(filename, 'r') as file:
        for line in file:
            api_info.append(json.loads(line))

    return api_info

api_info = read_apiinfo(filepath_apiinfo)
top_orders = read_json_to_dict(filename_topologiacal_3to5)
data = read_json_to_dict(filename_tasklist_3to5)

# Loop over both lists and add 'topological_order' to the corresponding item in data
for i in range(len(top_orders)):
    # Assuming the length of top_orders and data is the same
    data[i]['topological_order'] = top_orders[i]['topological_order']

extracted_api_names = [
    {
        'id': item['id'],
        'task_list': item["task_list"],
        'selected_apis': item['selected_apis']
    }
    for item in data
]

all_task_info = []
# Concatenate by same index
for i in range(len(extracted_api_names)):
    concatenated_list = [[task, api] for task, api in zip(extracted_api_names[i]["task_list"], extracted_api_names[i]["selected_apis"])]
    combined_dicts = [{**dict1, **dict2} for dict1, dict2 in concatenated_list]
    all_task_info.append(combined_dicts)

# Extract all api names from api repositories
all_api_names = [item["name"] for item in api_info]

# Retrieve apis' infomation
for i in range(len(all_task_info)):
    selected_functions = []
    for task in all_task_info[i]:
        if task["name"] in all_api_names:
            for func in api_info: 
                if task["name"] == func["name"]:
                    selected_func = func.copy()  # Make a copy to avoid mutating the original
                    selected_func["task_num"] = int(task["task_number"])
                    selected_func["task_description"] = task["task_description"]
                    selected_functions.append(selected_func)
        data[i]["selected_functions"] = selected_functions


In [None]:
"""
Dependency Management
"""
from utils.data_dependency import confirm_dependency
from utils.schemas.workflow import TaskOutputDescription, DependentParams

In [None]:
"""
For classification of data dependency 
"""
for i in range(len(data)):
    selected_functions, user_inputs, depended_params = confirm_dependency(data[i]["topological_order"], data[i]["selected_functions"])
    func_list = []
    for api in selected_functions:
        all_params = [component['name'] for component in api["input_parameters_with_datatype"]]
        depended_params = [list(item.keys())[0] for item in api["depended_params"]]
        user_inputs = [param for param in all_params if param not in depended_params]
        func_list.append({"name": api["name"], "all_params": all_params, "user_input": user_inputs, "dependent_params": depended_params})
    data[i]["param_dependency_management"] = func_list

++++++++++++++++++++++++++
{2: [1], 3: [2], 1: []}
++++++++++++++++++++++++++


  warn_deprecated(


++++++++++++++++++++++++++
{2: [1], 3: [2], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++
++++++++++++++++++++++++++
{2: [1], 3: [2], 4: [3], 5: [4], 1: []}
++++++++++++++++++++++++++


In [None]:
"""
Save Data Dependency Management test data
- classification of user_input and dependent_param
- correctness of dependednt_param 
"""
dd_data = [
    {
        'id': item['id'],
        "number_of_node": len(item["task_list"]),
        'task_list': item["task_list"],
        "topological_order": item["topological_order"],
        'selected_apis': [{
            "name": api["name"], 
            "input_params": api["input_parameters_with_datatype"],
            "dependencies": api["dependencies"],
            "depended_params": api["depended_params"],
            } for api in item['selected_functions']],
        'param_dependency_management': item["param_dependency_management"]
    }
    for item in data
]

with open('./test_datadependency/datadep_GTlabel_3-5_nodes.json', 'w') as json_file:
    json.dump(dd_data, json_file, indent=4)


## Generate GT for Hard Dataset (6-10 Nodes)

### Generate TaskList

In [3]:
"""
SubTask Division
"""
from utils.schemas.workflow import Tasks
from utils.subtask_div import subtask_diviser


In [6]:
#1-2 nodes
q_6to10 = read_json_to_dict(filename_query_6to10)
extracted_data = [
    {
        'id': item['id'],
        'user_request': item['user_request'],
        'selected_apis': item['selected_apis']
    }
    for item in q_6to10["test_data"]
]


tasklists = []
for i in range(len(extracted_data)):
    tasklist = subtask_diviser(extracted_data[i]["user_request"])
    t = {"id":extracted_data[i]["id"], "node_number": len(tasklist), "user_query": extracted_data[i]["user_request"] ,"task_list":tasklist, "selected_apis":extracted_data[i]["selected_apis"]}
    tasklists.append(t)
with open('./test_tasklist/tasklist_GTlabel_6-10_nodes.json', 'w') as json_file:
    json.dump(tasklists, json_file, indent=4)

  warn_deprecated(


### ->Check the generated tasklist and selected functions manually before proceed further

### Generate topological order

In [7]:
"""
Workflow Optimizer
"""
from utils.schemas.workflow import Workflow
from utils.wf_optimizer import wf_optimizer

In [8]:
"""
Generate Topological Order
"""
filename_tasklist_6to10 = filepath + 'test_tasklist/tasklist_GTlabel_6-10_nodes.json'
top_orders = read_json_to_dict(filename_tasklist_3to5)
for i in range(len(extracted_data)):
    topological_order = wf_optimizer(extracted_data[i]["user_request"], top_orders[i]["task_list"])
    top_orders[i]["topological_order"] = topological_order

with open('./test_topologicalorder/topologicalorder_GTlabel_6-10_nodes.json', 'w') as json_file:
    json.dump(top_orders, json_file, indent=4)


##### Manually confirm the correctness of topological order manually before proceed nexr step


In [28]:

filename_topologiacal_6to10 = filepath + 'test_topologicalorder/topologicalorder_GTlabel_6-10_nodes.json'
top_orders = read_json_to_dict(filename_topologiacal_3to5)

# Generate order list 
for i in range(len(top_orders)):
    node = top_orders[i]["topological_order"]  
    order_list = [d["task_nums"] for d in node]
    top_orders[i]["list_of_orders"] = order_list

for i in range(len(top_orders)):
    # Flatten the list_of_orders into a single list of numbers
    flattened_orders = [num for sublist in top_orders[i]['list_of_orders'] for num in sublist]
    result = []

    # Generate the pairs for each number, but only with elements from subsequent sublists
    for j, current_num in enumerate(flattened_orders):
        pairs = []
        current_index = None
        for idx, sublist in enumerate(top_orders[i]['list_of_orders']):
            if current_num in sublist:
                current_index = idx
                break
        
        # Start pairing with elements in sublists after the current one
        if current_index is not None:
            for subsequent_sublist in top_orders[i]['list_of_orders'][current_index + 1:]:
                for next_num in subsequent_sublist:
                    pairs.append(f"{current_num} < {next_num}")
        
        # Add the result for the current number
        result.append({"num": current_num, "pairs": pairs})
    top_orders[i]["pairs"] = result
    print(top_orders[i]["pairs"])



topological_order_gt_3to5_nodes = [
{
    'id': item["id"],
    'topological_order': item["topological_order"],
    'list_of_orders': item["list_of_orders"],
    'label': item["pairs"],
} for item in top_orders]

with open('./test_topologicalorder/topologicalorder_GTlabel_6-10_nodes.json', 'w') as json_file:
    json.dump(topological_order_gt_3to5_nodes, json_file, indent=4)

[{'num': 1, 'pairs': ['1 < 4', '1 < 5', '1 < 6']}, {'num': 2, 'pairs': ['2 < 4', '2 < 5', '2 < 6']}, {'num': 3, 'pairs': ['3 < 4', '3 < 5', '3 < 6']}, {'num': 4, 'pairs': ['4 < 5', '4 < 6']}, {'num': 5, 'pairs': ['5 < 6']}, {'num': 6, 'pairs': []}]
[{'num': 1, 'pairs': ['1 < 4', '1 < 5', '1 < 6']}, {'num': 2, 'pairs': ['2 < 4', '2 < 5', '2 < 6']}, {'num': 3, 'pairs': ['3 < 4', '3 < 5', '3 < 6']}, {'num': 4, 'pairs': []}, {'num': 5, 'pairs': []}, {'num': 6, 'pairs': []}]
[{'num': 1, 'pairs': ['1 < 4', '1 < 5', '1 < 6', '1 < 7']}, {'num': 2, 'pairs': ['2 < 4', '2 < 5', '2 < 6', '2 < 7']}, {'num': 3, 'pairs': ['3 < 4', '3 < 5', '3 < 6', '3 < 7']}, {'num': 4, 'pairs': ['4 < 5', '4 < 6', '4 < 7']}, {'num': 5, 'pairs': ['5 < 6', '5 < 7']}, {'num': 6, 'pairs': ['6 < 7']}, {'num': 7, 'pairs': []}]
[{'num': 1, 'pairs': ['1 < 4', '1 < 5', '1 < 6', '1 < 7']}, {'num': 2, 'pairs': ['2 < 4', '2 < 5', '2 < 6', '2 < 7']}, {'num': 3, 'pairs': ['3 < 4', '3 < 5', '3 < 6', '3 < 7']}, {'num': 4, 'pairs': [

### ->Check the generated topological orders manually before proceed further

### Generate Data Dependency Management

In [34]:
"""
Prepare selected function list to feed into function of data dependency management
"""
def read_apiinfo(filename):
    api_info = []
    with open(filename, 'r') as file:
        for line in file:
            api_info.append(json.loads(line))

    return api_info

api_info = read_apiinfo(filepath_apiinfo)
top_orders = read_json_to_dict(filename_topologiacal_6to10)
data = read_json_to_dict(filename_tasklist_6to10)

# Loop over both lists and add 'topological_order' to the corresponding item in data
for i in range(len(top_orders)):
    # Assuming the length of top_orders and data is the same
    data[i]['topological_order'] = top_orders[i]['topological_order']

extracted_api_names = [
    {
        'id': item['id'],
        'task_list': item["task_list"],
        'selected_apis': item['selected_apis']
    }
    for item in data
]

all_task_info = []
# Concatenate by same index
for i in range(len(extracted_api_names)):
    concatenated_list = [[task, api] for task, api in zip(extracted_api_names[i]["task_list"], extracted_api_names[i]["selected_apis"])]
    combined_dicts = [{**dict1, **dict2} for dict1, dict2 in concatenated_list]
    all_task_info.append(combined_dicts)

# Extract all api names from api repositories
all_api_names = [item["name"] for item in api_info]

# Retrieve apis' infomation
for i in range(len(all_task_info)):
    selected_functions = []
    for task in all_task_info[i]:
        if task["name"] in all_api_names:
            for func in api_info: 
                if task["name"] == func["name"]:
                    selected_func = func.copy()  # Make a copy to avoid mutating the original
                    selected_func["task_num"] = int(task["task_number"])
                    selected_func["task_description"] = task["task_description"]
                    selected_functions.append(selected_func)
        data[i]["selected_functions"] = selected_functions


In [35]:
"""
Dependency Management
"""
from utils.data_dependency import confirm_dependency
from utils.schemas.workflow import TaskOutputDescription, DependentParams

In [36]:
"""
For classification of data dependency 
"""
for i in range(len(data)):
    selected_functions, user_inputs, depended_params = confirm_dependency(data[i]["topological_order"], data[i]["selected_functions"])
    func_list = []
    for api in selected_functions:
        all_params = [component['name'] for component in api["input_parameters_with_datatype"]]
        depended_params = [list(item.keys())[0] for item in api["depended_params"]]
        user_inputs = [param for param in all_params if param not in depended_params]
        func_list.append({"name": api["name"], "all_params": all_params, "user_input": user_inputs, "dependent_params": depended_params})
    data[i]["param_dependency_management"] = func_list

In [None]:
"""
Save Data Dependency Management test data
- classification of user_input and dependent_param
- correctness of dependednt_param 
"""
dd_data = [
    {
        'id': item['id'],
        "number_of_node": len(item["task_list"]),
        'task_list': item["task_list"],
        "topological_order": item["topological_order"],
        'selected_apis': [{
            "name": api["name"], 
            "input_params": api["input_parameters_with_datatype"],
            "dependencies": api["dependencies"],
            "depended_params": api["depended_params"],
            } for api in item['selected_functions']],
        'param_dependency_management': item["param_dependency_management"]
    }
    for item in data
]

with open('./test_datadependency/datadep_GTlabel_6-10_nodes.json', 'w') as json_file:
    json.dump(dd_data, json_file, indent=4)
