In [2]:
import subprocess
from typing import Dict, List, Any
from component_mapping import COMPONENT_MAPPING
from demo_definition import FLOW_DEFINITION
from pprint import pprint

BASE_IMPORTS_FOR_CODE = [
"from coai_datasets.dataset_manager import DatasetManager",
"from coai_datasets.standard import *",
"from config.config import Config",
"from evaluation.metrics import BLEUMetric, Rouge1Metric",
"from evaluation import *",
"from exporter.EvalExporter import EvalExporter",
"from models.openai_model import OpenAIModel"
]

def run_format(line, variables):
    try: 
        #for debugging, remove the comments for the print statements
        valid_variables = {k: v for k, v in variables.items() if k in line}
        #print(f"attempting to format line: \n{line} \nwith variables: \n{valid_variables}")
        line = line.format_map(valid_variables)
        #print(f"final result: ")
        #print(f"{line}")
        #print("\n")
        return line
    except KeyError as e:
        return 

def group_edges(edges):
    # Create an empty dictionary to hold grouped data
    grouped_data = {} 
    #Group the edges by their target component and add the relevant variable names for model, exporter, dataset, framework
    for item in edges:
        target = item["target"].replace('-','')
        
        item["target"] = item["target"].replace('-','')
        item["source"] = item["source"].replace('-','')   
        if "dataset" in item["sourceHandle"]:
            item["dataset"] = item["source"]
        if "model" in item["sourceHandle"]:
            item["model"] = item["source"]    
        if "exporter" in item["targetHandle"]:
            item["exporter"] = item["target"]
            dataset_reference_edges = grouped_data[item["source"]]
            for dataset_item in dataset_reference_edges:
                item["dataset"] = dataset_item["dataset"]
        if "framework" in item["sourceHandle"]:
            item["framework"] = item["source"]
        #dummy for now:
        item["model"]="dummy"
        if target not in grouped_data:
            grouped_data[target] = []
        grouped_data[target].append(item)
    return grouped_data


def generate_python_script(components: Dict[Any,Any]):
    script_lines = BASE_IMPORTS_FOR_CODE
    config_values = {}
    for component in components["nodes"]:
        # store the id of the current component since this gets overridden to a non unique id if the following condition is true
        variable_name = component["id"].replace('-','')
        config_values[variable_name] = {}
        # if multiple instances of the same flow component exist, the actual definition is in the data key (this now means that the id of the component is no longer unique which is why it was stored before doing this)
        if "data" in component.keys():
            component = component["data"]
        mapping = COMPONENT_MAPPING[component["id"]]
        
        if "fields" in component.keys():
            #populate the field values for each variable and put them in a dict of pattern field_name:field_value
            for field in component["fields"]:
                field_name = field["name"]
                field_value = field["value"]
                config_values[f"{variable_name}"][field_name]=field_value
        if "creation_lines" in mapping.keys():
            #create the instances of every component in the flow with its respective field values and variable names stored in the config_values dict
            for line in mapping["creation_lines"]:
                if mapping.get("instance"):
                    config_values[variable_name]["id"]=variable_name
                line = run_format(line, config_values[variable_name])
                script_lines.append(line)
    #group the edges by target so that for every target component you get all the inputs in one dict instead of having it spread out makes further processing easier
    edges_dict = group_edges(components["edges"])
    #pprint(edges_dict)
    for component in components["nodes"]:
        #some components get exported with their component id (if it is an exporter, mlsumdataset, config etc.) in data.id and some have them just in id even though id needs to be the unique id 
        if "data" in component.keys():
            mapping = COMPONENT_MAPPING[component["data"]["id"]]
        else:
            mapping = COMPONENT_MAPPING[component["id"]]
        
        if "execution_lines" in mapping.keys():
            for variable, edges in edges_dict.items():
                for edge in edges:
                    #keep count of how many lines are actually added for execution. preparation to not include wrongly generated lines
                    count = 0
                    temp_lines = []
                    for line in mapping.get("execution_lines"):
                        config_values[variable].update(edge)
                        line = run_format(line, config_values[variable])
                        # every variables edges get computed even though they are not actually exporter variables. only two lines get returned by non exporter variables that should not be generated, thus just keeping count works
                        if line:
                            count += 1
                            temp_lines.append(line)
                            if count > 2:
                                for temp_line in temp_lines:
                                    script_lines.append(temp_line)
    
    return "\n".join(script_lines)

print(generate_python_script(components=FLOW_DEFINITION))



from coai_datasets.dataset_manager import DatasetManager
from coai_datasets.standard.mlsum_dataset import MLSUMDataset
from coai_datasets.standard.mmlu_dataset import MMLUDataset
from config.config import Config
from evaluation.metrics import BLEUMetric, Rouge1Metric
from evaluation.MMLUEvaluationFramework import MMLUFramework
from evaluation.SUMEvalFramework import SUMEvalFramework
from evaluation.MTBenchEvaluationFramework import MTBenchEvaluationFramework
from exporter.EvalExporter import EvalExporter
from models.openai_model import OpenAIModel
Config.set_user_id('github|149702207')
Config.set_project_id('5cc73596-ad33-4659-afbd-b1972826e79b')
Config.set_tracing_key('dummy')
c6ab1f7378ed4d62a3dce1bd5ed61fa0 = MLSUMDataset()
c6ab1f7378ed4d62a3dce1bd5ed61fa0.load(nb_items=2)
f20527638ea94753b9c6f17232fc7c9b = COAIDataset()
f20527638ea94753b9c6f17232fc7c9b.load(dataset_id='331d506e-d7d0-40e9-b2e0-2a67d626af84')
80d67ff67c1d412c8cd6b2eefeea3128 = SUMEvalFramework(device='cuda')
17361a93

In [None]:
edges = [
      {
        "source": "75b51ac4-ecac-48ea-ad8b-5c43e93350cf",
        "sourceHandle": "output-mlsum_dataset-0",
        "target": "58c9b4f3-8536-4a23-8ecf-affbc26bd4a8",
        "targetHandle": "input-sum_eval_framework-1",
        "id": "reactflow__edge-75b51ac4-ecac-48ea-ad8b-5c43e93350cfoutput-mlsum_dataset-0-58c9b4f3-8536-4a23-8ecf-affbc26bd4a8input-sum_eval_framework-1"
      },
      {
        "source": "8773cbd6-f85b-4749-8539-e230592dd987",
        "sourceHandle": "output-custom_dataset-0",
        "target": "ada41bce-dfa6-43e3-8035-c89b3bb4dbdb",
        "targetHandle": "input-sum_eval_framework-1",
        "id": "reactflow__edge-8773cbd6-f85b-4749-8539-e230592dd987output-custom_dataset-0-ada41bce-dfa6-43e3-8035-c89b3bb4dbdbinput-sum_eval_framework-1"
      },
      {
        "source": "ada41bce-dfa6-43e3-8035-c89b3bb4dbdb",
        "sourceHandle": "output-sum_eval_framework-0",
        "target": "8bdc917c-bc19-414b-86cc-0cd612bca657",
        "targetHandle": "input-eval_exporter-0",
        "id": "reactflow__edge-ada41bce-dfa6-43e3-8035-c89b3bb4dbdboutput-sum_eval_framework-0-8bdc917c-bc19-414b-86cc-0cd612bca657input-eval_exporter-0"
      },
      {
        "source": "58c9b4f3-8536-4a23-8ecf-affbc26bd4a8",
        "sourceHandle": "output-sum_eval_framework-0",
        "target": "8bdc917c-bc19-414b-86cc-0cd612bca657",
        "targetHandle": "input-eval_exporter-0",
        "id": "reactflow__edge-58c9b4f3-8536-4a23-8ecf-affbc26bd4a8output-sum_eval_framework-0-8bdc917c-bc19-414b-86cc-0cd612bca657input-eval_exporter-0"
      }
    ]

# Create an empty dictionary to hold grouped data
grouped_data = {}

# Iterate through the data and group by "target"
for item in edges:
    target = item["target"]
    if target not in grouped_data:
        grouped_data[target] = []
    grouped_data[target].append(item)

pprint(grouped_data)