In [1]:
import os 
from collections import defaultdict


In [2]:
def extract_steps(line):
    
    #% FACT: program(program_id, program_name, qualified_program_name, begin_annotation_id, end_annotation_id).
    #print("Extracting steps.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    
    data = data.split(',')
    #print(data)
    steps[data[0]] = {
                      "program_name" :data[1].strip().strip("'"),
                      "qualified_program_name": data[2].strip().strip("'")
                    }
    
    #print(steps)
    return

In [3]:
def extract_workflows(line):
    
    data = line[line.index("(")+1:line.index(")")]
    workflow[data[0]] = steps[data[0]]['program_name']
    return 

In [4]:
def extract_ports(line):
    #print("Extracting ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    
    #% FACT: port(port_id, port_type, port_name, qualified_port_name, port_annotation_id, data_id).
    
    data = data.split(',')
    
    ports[int(data[0])] = {
                             "port_type": data[1].strip().strip("'").upper(),
                             "port_name": data[2].strip().strip("'"),
                             "qualified_port_name" : data[3].strip().strip("'"), 
                             "data_id":data[5].strip().strip("'")
                            }
    return 

In [5]:

def input_ports(line,in_ports):
    #print("Extracting Input ports.")
    
    ## Extract data between "(" and ")".
    ## % FACT: has_in_port(step_id, port_id).

    data = line[line.index("(")+1:line.index(")")]
    
    data = data.split(',')
    
    in_ports[data[0]].append((data[0].strip(), data[1].strip()))
    
    return

In [6]:
def output_ports(line,out_ports): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    out_ports[data[0]].append((data[0].strip(), data[1].strip()))
    #print(data)
    return

In [7]:
def has_subprogram(line): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    list1 = []
    #print(data[0],data[1])
    if data[0] in sub_programs.keys():
        list1 = sub_programs[data[0]]
        list1.append(data[1].strip())
        sub_programs[data[0]] = list1
    else:
        list1.append(data[1].strip())
        sub_programs[data[0]]= list1
    #print(data)
    return

In [8]:
def port_alias(line):
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    A = ports[int(data[0].strip())]["port_name"]
    #print(data[1])
    port_alt_name[A] = data[1].strip().strip("'")
    return 

In [9]:
steps = {}
ports = {}
workflow = {}
in_ports = defaultdict(list)
out_ports = defaultdict(list)
sub_programs = {}
port_alt_name = {}
cnt =0
with open("models/exec_paleocar.P", "r") as yw_struct:
    line = yw_struct.readline()
    while line:
        if("program(" in line and "% FACT" not in line and "has_subprogram" not in line):
            extract_steps(line)
        elif("workflow(" in line and "% FACT" not in line):
            extract_workflows(line)            
        elif("has_subprogram(" in line and "% FACT" not in line):
            has_subprogram(line)            
        elif("port(" in line and "% FACT:" not in line and "has_in_port(" not in line and "has_out_port" not in line):
        #    print(line)
            extract_ports(line)
        elif("has_in_port(" in line and "% FACT" not in line):
            input_ports(line,in_ports)
        elif("has_out_port(" in line and "% FACT" not in line):
            output_ports(line,out_ports)
        elif ("port_alias(" in line and "% FACT" not in line ): 
            port_alias(line)
           
        line = yw_struct.readline()

In [10]:
def get_in_ports(program_id):
    #print("inside in ports")
    input_ports =[]
    for s in in_ports[program_id.strip()]:
        #print(s)
        input_ports.append(ports[int(s[1])]['port_name'])
        #print(ports[int(s[1])]['port_name'])
    return input_ports

In [11]:
def get_out_ports(program_id):
    output_ports =[]
    
    for s in out_ports[program_id.strip()]:
        output_ports.append(ports[int(s[1])]['port_name'])
    return output_ports

In [12]:
def create_file(filename, program_id):
    header = '''
#!/usr/bin/env cwl-runner


cwlVersion: v1.0
class: CommandLineTool
'''
    with open(filename, "w+") as writer:
        input_buffer = 'inputs: \n'
        for p in get_in_ports(program_id):
            input_buffer = input_buffer + ' '+ p + ': \n'+ '  type: string \n \n'

        #print(input_buffer)
        output_buffer = 'outputs: \n'
        output_buffer.strip()

        for p in get_out_ports(program_id):
            #print(len(p))
            if p =='' : 
                print("No output ports")
            else:
                output_buffer = output_buffer + ' '+ p + ': \n'+ '  type: string \n \n'

        writer.write(header)    
        writer.write(input_buffer)
        writer.write(output_buffer)
        writer.write("\n")
        writer.write("baseCommand: ")
    
    return

In [13]:
def create_cwl_files(program_id): 
    filename = "cwl_files/"+ steps[program_id]['program_name'] + ".cwl"
    create_file(filename, program_id)
    
    return 

In [14]:
def get_wf_steps(workflowId):
    
    wf_steps = "steps: \n " 
    
    for s in sub_programs[workflowId]:
        filename = 'cwl_files/' + steps[s]["program_name"] + '.cwl'
        step_buffer = steps[s]["program_name"] + ': \n ' + ' run: ' + filename + ' \n ' + ' in: \n'
        for i in get_in_ports(s):
            step_buffer = step_buffer + '   ' + i + ': '
            if i in in_out_ports.keys():
                step_buffer = step_buffer + in_out_ports[i][1] + '\n'
            else: 
                step_buffer = step_buffer + i + '\n'
        step_buffer =  step_buffer + "  out: [" + ' , '.join(get_out_ports(s)) + '] \n' 
        
        wf_steps = wf_steps + step_buffer
    
    return wf_steps

In [15]:
for wf in workflow:
    for sp in sub_programs[wf]:
        create_cwl_files(sp)

In [16]:
in_out_ports = {} 
for sid in steps.keys(): 
    for id in range(0,len(steps)+1):
        a = set(get_in_ports(str(id))).intersection(set(get_out_ports(str(sid))))
        #print(a)
        if len(a) > 0 : 
            for elem in a: 
                #print(steps[str(id)]["program_name"] , "\t\t", steps[sid]["program_name"]+"/"+ elem )
                in_out_ports[elem] = [steps[str(id)]["program_name"], steps[sid]["program_name"]+"/"+ elem]
                

In [17]:
def port_name(port_name):
    
    name = ""
    
    if p in port_alt_name.keys():
        name = port_alt_name[p]
    else: 
        name = p 
        
    return name

In [18]:
def get_in_out_ports(program_id):  
    for inp in get_in_ports(program_id):
        #print(inp)
        if inp in in_out_ports.keys(): 
            return in_out_ports[inp][1]
        else: 
            return inp

In [19]:
def link_wf_ports(workflow_id):
    wf_out_ports=[]
    for sp in sub_programs[workflow_id]:
        for p in set(get_out_ports(workflow_id)).intersection(set(get_out_ports(sp))):
            wf_out_ports.append(steps['2']['program_name']+ '/' + p)
    return wf_out_ports

In [20]:
def workflow_detail(program_id):
    header = '''
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
    - class: SubworkflowFeatureRequirement
'''
    input_buffer = 'inputs: \n'

    for p in get_in_ports(program_id):
        input_buffer = input_buffer.strip('') + '  '+ p + ': \n'+ '   type: string \n'
        
    output_buffer = 'outputs: \n'
    out_wf_ports = link_wf_ports(program_id)
    
    for port in out_wf_ports:
        output_buffer = output_buffer + '  ' +  port.split('/')[1]  + ': \n'+ '   type: string \n   outputSource: '+ port +'\n'
    
    #print(output_buffer)
    #print(header.strip(''), input_buffer.strip(''), output_buffer.strip())
    wf_step = get_wf_steps(program_id)
    
    #print(wf_step)
    return header, input_buffer, output_buffer, wf_step 

In [21]:
filename = 'wf_' + steps['1']["program_name"] + '.cwl'

with open(filename, 'w+') as file_writer:
    hdr, inp_buff, out_buff, wf_step = workflow_detail('1')
    file_writer.write(hdr)
    file_writer.write(inp_buff)
    file_writer.write(out_buff)
    file_writer.write(wf_step)

In [None]:
out_ports['1']