In [1]:
import os
import re
from collections import defaultdict


### Functions for parsing the yw models file. 

In [2]:
### function extracting the programs.

def extract_steps(line):
    
    #% FACT: program(program_id, program_name, qualified_program_name, begin_annotation_id, end_annotation_id).
    #print("Extracting steps.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    
    data = data.split(',')
    #print(data)
    steps[data[0]] = {
                      "program_name" :data[1].strip().strip("'"),
                      "qualified_program_name": data[2].strip().strip("'")
                    }
    
    #print(steps)
    return

In [3]:
### function extracting the workflow.

def extract_workflows(line):
    
    #print(line)
    data = line[line.index("(")+1:line.index(")")]
    workflow[data[0]] = steps[data[0]]['program_name']
    return 

In [4]:
### function extracting the ports

def extract_ports(line):
    #print("Extracting ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    
    #% FACT: port(port_id, port_type, port_name, qualified_port_name, port_annotation_id, data_id).
    
    data = data.split(',')
    
    ports[data[0]] ={
                     "port_type": data[1].strip().strip("'").upper(),
                     "port_name": data[2].strip().strip("'"),
                     "qualified_port_name" : data[3].strip().strip("'"), 
                     "data_id":data[5].strip().strip("'")
                    }
    return 

In [5]:
### function extracting the input ports

def input_ports(line,in_ports):
    #print("Extracting Input ports.")
    
    ## Extract data between "(" and ")".
    ## % FACT: has_in_port(step_id, port_id).

    data = line[line.index("(")+1:line.index(")")]
    
    data = data.split(',')
    
    in_ports[data[0]].append((data[0].strip(), data[1].strip()))
    
    return

In [6]:
### function extracting the output ports 

def output_ports(line,out_ports): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    out_ports[data[0]].append((data[0].strip(), data[1].strip()))
    #print(data)
    return

In [7]:
### function extracting the output ports 

def get_port_data(line): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    port_data[data[0]]= { 
                          "data_name" : data[1].strip(), 
                          "qual_data_name" : data[2].strip()
                        } 
    #print(data)
    return

In [8]:
### function extracting the output ports 

def get_channel(line): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    channel[data[0]]= data[1].strip()
    #print(data)
    return

In [9]:
### function extracting the output ports 

def get_port_connects_to_channel(line): 
     
    data = line[line.index("(")+1 : line.index(")")]
    data = data.split(',')
    port_connects_to_channel[data[0]]= data[1].strip()
    #print(data)
    return

In [10]:
def get_inflow_connections(line): 
     
    #print(line)
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    inflow_conn[data[0]]= data[1].strip()
    #print(data)
    return

In [11]:
def get_outflow_connections(line): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    outflow_conn[data[0]]= data[1].strip()
    #print(data)
    return 

In [12]:
### function extracting the subprograms 

def has_subprogram(line): 
    #print("Extracting output ports.")
    ## Extract data between "(" and ")".
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    list1 = []
    #print(data[0],data[1])
    if data[0] in sub_programs.keys():
        list1 = sub_programs[data[0]]
        list1.append(data[1].strip())
        sub_programs[data[0]] = list1
    else:
        list1.append(data[1].strip())
        sub_programs[data[0]]= list1
    #print(data)
    return

In [13]:
### function extracting the port alias

def port_alias(line):
    data = line[line.index("(")+1:line.index(")")]
    data = data.split(',')
    port_id = data[0].strip()
    alias = data[1].strip().strip("'")
    
    qualified_name = ports[port_id]['qualified_port_name']
    port_name = ports[port_id]['port_name'] 
    
    port_alias_name[port_id] = alias
    
    #print(port_name, port_id,qualified_name)
    
    ### regex for splitting the string and getting qualified program name
    regex = re.compile(r'>|<')
    qname = regex.split(qualified_name)
    
    pname = qname[0].split('.')[-1] 
    
    if pname.find('-') > -1:
        port_alt_name[alias] = pname[:-1]+ '/' +port_name
    else:
        port_alt_name[alias] = pname + '/' +port_name
        
    return 



In [14]:
## Method for getting the input ports 
## for a specific program 

def get_in_ports(program_id):
    #print("inside in ports")
    input_ports =[]
    for s in in_ports[program_id]:
        #print(s)
        input_ports.append(ports[s[1]]['port_name'])
        #print(ports[int(s[1])]['port_name'])
    return input_ports

In [15]:
## Method for getting the output ports 
## for a specific program 

def get_out_ports(program_id):
    output_ports =[]
    for s in out_ports[program_id]:
        output_ports.append(ports[s[1]]['port_name'])
    return output_ports

In [16]:
## Method for create a cwl file 
## It has the required header and formatting

def create_file(filename, program_id):
    header = '''
#!/usr/bin/env cwl-runner


cwlVersion: v1.0
class: CommandLineTool
'''
    with open(filename, "w+") as writer:
        input_buffer = 'inputs: \n'
        for p in get_in_ports(program_id):
            input_buffer = input_buffer + ' '+ p + ': \n'+ '  type: string \n \n'

        #print(input_buffer)
        output_buffer = 'outputs: \n'
        output_buffer.strip()

        for p in get_out_ports(program_id):
            #print(len(p))
            if p =='' : 
                print("No output ports")
            else:
                output_buffer = output_buffer + ' '+ p + ': \n'+ '  type: string \n \n'

        writer.write(header)    
        writer.write(input_buffer)
        writer.write(output_buffer)
        writer.write("\n")
        writer.write("baseCommand: ")
    
    return

In [17]:
### This function passes file name 
### for creating cwl files. 

def create_cwl_files(program_id,wf_name): 
    pname= steps[program_id]['program_name']
    filename = "Examples/" + wf_name +'/' + pname + ".cwl"
    create_file(filename, program_id)
    
    return 

In [18]:
### Function for exrtacting the steps in a wf. 

def get_wf_steps(wf_id, wf_name):
    
    wf_steps = "steps: \n " 
    
    ## first get the sub-programs of a workflow: 
    for s in sub_programs[wf_id]:
        filename =  steps[s]["program_name"] + '.cwl'
        step_buffer = steps[s]["program_name"] + ': \n ' + ' run: ' + filename + ' \n ' + ' in: \n'
        
        for pid in in_ports[s]:
            pname, qual_pname= get_inports_subprograms(pid[1])
            step_buffer = step_buffer + '   ' + pname + ': ' + qual_pname + '\n' 
            
        step_buffer =  step_buffer + "  out: [" + ' , '.join(get_out_ports(s)) + '] \n ' 
        
        wf_steps = wf_steps + step_buffer
    
    return wf_steps

In [19]:
def get_qual_port_name(port_name):
    regex = re.compile(r'>|<')
    for port in sorted(ports):
        #print(ports[port]["port_name"])
        if ports[port]["port_name"] == port_name:
            
            temp = regex.split(ports[port]['qualified_port_name'])
            qname = temp[0].split('.')[-1]
            qname = qname[:-1]
            #print(qname)
            #print(steps['1']["program_name"])
            
            if qname != steps['1']["program_name"]:          
                 return qname

In [20]:
## Get the inports of the subprograms

def get_inports_subprograms(port_id):
    
    pname = ports[port_id]["port_name"]
    #print(pname)
    a = set(port_id).intersection(set(list(port_alias_name.keys())))
    if a:
        for e in a:
            #print(port_alias_name[e])
            if port_alias_name[e] in get_in_ports('1'):
                qualified_port = port_alias_name[e]
            else:
                qualified_port = get_qual_port_name(port_alias_name[e])
    
    elif pname in get_in_ports('1'):
        qualified_port = pname
    
    elif pname in in_out_ports.keys():
        qualified_port = in_out_ports[pname][1]
    
    return pname,qualified_port 

In [21]:
def get_wf_outports(port_id):
    
    pname = ports[port_id]["port_name"]   
    
    if pname in port_alt_name.keys():
        qname = port_alt_name[pname]
    else: 
        print(pname)
        qname = get_qual_port_name(pname) +'/'+pname
        
    
    return pname, qname

In [22]:
def get_in_out_ports():
    for sid in steps.keys(): 
        for id in range(0,len(steps)+1):
            a = set(get_in_ports(str(id))).intersection(set(get_out_ports(str(sid))))
            #print(a)
            if len(a) > 0 : 
                for elem in a: 
                    #print(steps[str(id)]["program_name"] , "\t\t", steps[sid]["program_name"]+"/"+ elem )
                    in_out_ports[elem] = [steps[str(id)]["program_name"], steps[sid]["program_name"]+"/"+ elem]
                

In [23]:
## function for getting the ports with the program name
def link_wf_ports(workflow_id):
    wf_out_ports=[]
    
    for sp in sub_programs[workflow_id]:
        for p in set(get_out_ports(workflow_id)).intersection(set(get_out_ports(sp))):
            wf_out_ports.append(steps[sp]['program_name']+ '/' + p)
    
    return wf_out_ports

In [24]:
def workflow_detail(program_id, wf_name):
    header = '''
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
    - class: SubworkflowFeatureRequirement
'''
    input_buffer = 'inputs: \n'

    for p in get_in_ports(program_id):
        input_buffer = input_buffer.strip('') + '  '+ p + ': \n'+ '   type: string \n'
        
    output_buffer = 'outputs: \n'
    #out_wf_ports = link_wf_ports(program_id)
    
    for port in out_ports[program_id]:
        pname, qname = get_wf_outports(port[1])
        output_buffer = output_buffer + '  ' +  pname  + ': \n'+ '   type: string \n   outputSource: '+ qname +'\n'

    #print(output_buffer)
    #print(header.strip(''), input_buffer.strip(''), output_buffer.strip())
    #print(wf_name)
    wf_step = get_wf_steps(program_id, wf_name)
    
    #print(wf_step)
    return header, input_buffer, output_buffer, wf_step 

In [25]:
def create_wf_file(filename, program_id,wf_name):
    #filename = 'wf_' + steps['1']["program_name"] + '.cwl'

    with open(filename, 'w+') as file_writer:
        hdr, inp_buff, out_buff, wf_step = workflow_detail(program_id,wf_name)
        file_writer.write(hdr)
        file_writer.write(inp_buff)
        file_writer.write(out_buff)
        file_writer.write(wf_step)

In [26]:
def read_yw_model(filename):
    regex = re.compile(r'^%')
    chk_channel = re.compile(r'^ch')
    with open(filename, "r") as yw_struct:
        line = yw_struct.readline()
        while line:
            #print(line)
            comments = regex.match(line)
            if comments: 
                #print(line)
                line = yw_struct.readline()
            else:  
                if("program(" in line and "has_subprogram" not in line):
                    extract_steps(line)
                elif("workflow(" in line):
                    extract_workflows(line)            
                elif("has_subprogram(" in line):
                    has_subprogram(line)            
                elif("port(" in line  and "has_in_port(" not in line and "has_out_port" not in line):
                #    print(line)
                    extract_ports(line)
                elif("has_in_port(" in line):
                    input_ports(line,in_ports)
                elif("has_out_port(" in line ):
                    output_ports(line,out_ports)
                elif ("port_alias(" in line ): 
                    port_alias(line)
                elif("data(" in line):
                    get_port_data(line)
                elif("port_connects_to_channel(" in line):
                    #print(line)
                    get_port_connects_to_channel(line)
                elif("inflow_connects_to_channel("in line):
                    #print(line)
                    get_inflow_connections(line)                    
                elif("outflow_connects_to_channel("in line):
                    get_outflow_connections(line)
                elif( chk_channel.match(line) ):
                    get_channel(line)

                line = yw_struct.readline()
            get_in_out_ports()

In [27]:
steps = {}
ports = {}
workflow = {}
in_ports = defaultdict(list)
out_ports = defaultdict(list)
sub_programs = {}
port_alias_name= {}
port_alt_name={}
cnt =0

qualified_port=''
channel={}
port_data={}
port_connects_to_channel={}
inflow_conn={}
outflow_conn={}

yw_model_file = "Examples/get_itrdb_data/get_itrdb_data.P"

read_yw_model(yw_model_file)



for w in sorted(workflow, reverse=True): 
    wf_name = steps[w]["program_name"]
    #print(wf_name)
    #wf_name ='main'
    for sp in sub_programs[w]:
        create_cwl_files(sp,wf_name)

    filename = 'Examples/' + wf_name + '/wf_' + steps[w]["program_name"] + '.cwl'
    create_wf_file(filename,w, wf_name)
    

In [28]:
for i in in_ports['2']:
    print(i[1])
    data_id = ports[i[1]]["data_id"]
    p2c = inflow_conn[i[1]]
    ch = channel[p2c]
    

5


KeyError: '5'

In [31]:
def get_portid_frm_data(data_id):
    for port in ports:
        if ports[port]["data_id"] == data_id:
            return port

In [32]:
port_connects_to_channel

{'10': '6',
 '11': '5',
 '12': '8',
 '13': '7',
 '17': '7',
 '18': '8',
 '19': '9',
 '21': '9',
 '23': '10',
 '24': '10',
 '28': '14',
 '33': '15',
 '5': '13',
 '6': '5',
 '7': '12',
 '8': '4',
 '9': '6'}

In [33]:
inflow_conn

{'1': '12', '2': '13', '3': '14'}

In [30]:
outflow_conn

{'4': '15'}