In [1]:
from common import *

In [2]:
ontology = get_ontology_graph()

In [4]:
graph = Graph()
graph.parse(r'./w1.ttl')

<Graph identifier=N58bfe7a6d57f4156ab1905ae348c3d16 (<class 'rdflib.graph.Graph'>)>

In [10]:
for s, p, o in graph.triples((dw['workflow_0_DescriptionIntent_a7dde3bb_f894_4db2_bc1b_677d226ed568-step_0_implementation_train_test_split'], None, None)):
    print(p, o)

http://www.w3.org/1999/02/22-rdf-syntax-ns#type https://diviloper.dev/ontology#Step
https://diviloper.dev/ontology#followedBy https://diviloper.dev/ontology/Workflow#workflow_0_DescriptionIntent_a7dde3bb_f894_4db2_bc1b_677d226ed568-step_1_implementation_min_max_scaling
https://diviloper.dev/ontology#followedBy https://diviloper.dev/ontology/Workflow#workflow_0_DescriptionIntent_a7dde3bb_f894_4db2_bc1b_677d226ed568-step_2_implementation_min_max_scaling_applier
https://diviloper.dev/ontology#hasInput nc5e866e8da8a4742be72ecb39ccb496fb8
https://diviloper.dev/ontology#hasOutput nc5e866e8da8a4742be72ecb39ccb496fb9
https://diviloper.dev/ontology#hasOutput nc5e866e8da8a4742be72ecb39ccb496fb10
https://diviloper.dev/ontology#hasParameterValue nc5e866e8da8a4742be72ecb39ccb496fb11
https://diviloper.dev/ontology#hasParameterValue nc5e866e8da8a4742be72ecb39ccb496fb12
https://diviloper.dev/ontology#hasParameterValue nc5e866e8da8a4742be72ecb39ccb496fb13
https://diviloper.dev/ontology#has_position 0
h

In [6]:
workflow_tasks = graph.query(f"""
    PREFIX dt: <{dtbox}>
    SELECT ?step ?component ?implementation
    WHERE {{
        ?task a dt:Step ;
            dt:order ?order ;
            dt:runs ?component .
        ?component a dt:Component ;
            dt:hasImplementation ?implementation .
    }}
    ORDER BY ?order
""").bindings
workflow_tasks

[]

In [6]:
def get_implementation(component):
    implementation = ontology.query(f"""
        PREFIX big_w: <{bigowl_workflows}>
        PREFIX big_a: <{bigowl_algorithms}>
        PREFIX big_d: <{bigowl_data}>
        SELECT ?implementation ?module ?function
        WHERE {{
            <{component}> big_w:hasImplementation ?implementation .
            ?implementation big_w:implementationLanguage "python" ;
                big_w:module ?module ;
                big_w:function ?function .
        }}
    """).bindings[0]
    return implementation


def get_previous_tasks(task):
    return graph.query(f"""
        PREFIX big_w: <{bigowl_workflows}>
        PREFIX do: <{do}>
        SELECT ?previousTask
        WHERE {{
            ?previousTask big_w:connectedTo <{task}> .
        }}
    """).bindings


def get_parameters(task):
    return graph.query(f"""
        PREFIX big_w: <{bigowl_workflows}>
        PREFIX do: <{do}>
        SELECT ?parameter ?value
        WHERE {{
            <{task}> do:hasParameterValue ?parameterValue .
            ?parameterValue do:forParameter ?parameter ;
                do:hasValue ?value .
        }}
    """).bindings


def get_parameter_name(parameter):
    return ontology.value(parameter, bigowl_workflows.hasName)


def get_inputs(component):
    return ontology.query(f"""
        PREFIX big_w: <{bigowl_workflows}>
        SELECT ?input
        WHERE {{
            <{component}> big_w:specifiesInputClass ?input .
        }}
    """).bindings


def get_outputs(component):
    return ontology.query(f"""
        PREFIX big_w: <{bigowl_workflows}>
        SELECT ?output
        WHERE {{
            <{component}> big_w:specifiesOutputClass ?output .
        }}
    """).bindings


def get_python_literal(literal):
    representation = literal.toPython()
    if isinstance(representation, str):
        return f'r"{representation}"'
    return representation

In [19]:
imports = []
script = []

outputs = {}  # task -> [(name, type)*]

for entry in workflow_tasks:
    task = entry['task']
    previous_tasks = get_previous_tasks(task)
    component = entry['component']
    implementation = get_implementation(component)
    parameters = get_parameters(task)
    task_input_types = get_inputs(component)
    task_output_types = get_outputs(component)

    task_python_var = task.split('#')[-1].replace('/', '_').replace('.', '_')

    task_outputs = [(out['output'].split("#")[-1].lower() + '_' + task_python_var, out['output']) for out in
                    task_output_types]
    outputs[task] = task_outputs

    previous_outputs = {ptype: pname for previous in previous_tasks for pname, ptype in
                        outputs[previous['previousTask']]}
    task_inputs = [previous_outputs[ptype['input']] for ptype in task_input_types]
    import_string = f'from {implementation["module"]} import {implementation["function"]}'

    parameter_string = ', '.join(
        (f'{get_parameter_name(param["parameter"])}={get_python_literal(param["value"])}' for param in parameters))
    input_string = ', '.join(task_inputs)
    result_string = ', '.join(name for name, _ in task_outputs)
    call_string = f'{result_string} = {implementation["function"]}({input_string}{", " if input_string else ""}{parameter_string})'
    imports.append(import_string)
    script.append(call_string)

pipeline_script = '\n'.join(imports) + '\n\n' + '\n'.join(script)

In [20]:
with open('pipeline.py', 'w') as f:
    f.write(pipeline_script)

In [130]:
for s, p, o in ontology.triples((None, bigowl_workflows.hasImplementation, None)):
    print(s, p, o)

https://diviloper.dev/ontology#KMeansComponent https://w3id.org/BIGOWLWorkflows/hasImplementation https://diviloper.dev/ontology#SklearnKMeans
https://diviloper.dev/ontology#Import_CSV https://w3id.org/BIGOWLWorkflows/hasImplementation https://diviloper.dev/ontology#PandasReadCSV


In [132]:
import pandas as pd

dataset = pd.read_csv('../dataset_annotator/penguins.csv')
dataset.head()

Unnamed: 0,species,island,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [134]:
dataset = dataset[dataset.select_dtypes(include=['number']).columns].dropna()
dataset.head()

Unnamed: 0,culmen_length_mm,culmen_depth_mm,flipper_length_mm,body_mass_g
0,39.1,18.7,181.0,3750.0
1,39.5,17.4,186.0,3800.0
2,40.3,18.0,195.0,3250.0
4,36.7,19.3,193.0,3450.0
5,39.3,20.6,190.0,3650.0
