In [171]:
import yaml
import xmltodict

In [201]:
with open("./activities_test.yaml") as f:
    flow = yaml.load(f, Loader=yaml.FullLoader)

__toolshed_file__ = "/Users/alexanderostrovsky/Desktop/galaxy/config/integrated_tool_panel.xml"
with open(__toolshed_file__) as f:
    tools_have = xmltodict.parse(f.read())
tools_have = tools_have["toolbox"]

In [202]:
def step_select(workflow, place):
    '''
    For steps with multiple possiblities, return just the tools from selected option
    '''
    options = list(workflow[place])[2:]
    for x in options:
        print(x + ":", workflow[place][x]["description"])
    selection = input("Which version of this step would you like to use?")
    return(selection)

In [203]:
def flow_parse(workflow):
    '''
    From activities yaml, returns list of viable tools for each step
    '''
    steps = {}
    for step in list(workflow.keys()):
        if workflow[step]["multi"] == True:
            choice = step_select(workflow, step)        
            tools = workflow[step][choice]
            tools.pop("description")
        else:
            tools = workflow[step]
            tools.pop("multi")
            tools.pop("description")
        steps[step] = tools
    return(steps)

In [204]:
def tool_select(options):
    '''
    Takes in output from flow_parse() to allow user to select a final tool for each step
    Format of final dictionary:
    key: step_name
    value: galaxy tool_id
    '''
    final_list = {}
    for x in options:
        if len(options[x]) > 1:
            print("There are multiple tool choices for", x)
            for y in options[x]:
                print(y, options[x][y]["description"])
            tool_choice = input("Which tool to use?")
            tool_choice = options[x][tool_choice]["tool_id"]
        else:
            tool_choice = "".join(list(options[x].keys()))
            tool_choice = options[x][tool_choice]["tool_id"]
        final_list[x] = tool_choice
    return(final_list)

In [214]:
def tools_list(tools_have):
    '''
    Extract list of currently installed tools against which activity list can be compared
    '''
    installed = []
    for section in tools_have["section"]:
        print(section)
        print(section["tool"])
#         if "tool" in list(section):
#             for tool in section["tool"]:
#                 try:
#                     installed.append(str(tool["@id"]))
#                 except:
#                     try:
#                         # Strange parsing error with xmltodict failing on certain sections for no reason
#                         installed.append(str(section["tool"]["@id"]))
#                     except:
#                         print("parsing failed")
#                         break
#     return(installed)

In [215]:
tools_list(tools_have)

OrderedDict([('@id', 'getext'), ('@name', 'Get Data'), ('@version', ''), ('tool', [OrderedDict([('@id', 'upload1')]), OrderedDict([('@id', 'ucsc_table_direct1')]), OrderedDict([('@id', 'ucsc_table_direct_archaea1')]), OrderedDict([('@id', 'ebi_sra_main')]), OrderedDict([('@id', 'modENCODEfly')]), OrderedDict([('@id', 'intermine')]), OrderedDict([('@id', 'flymine')]), OrderedDict([('@id', 'modmine')]), OrderedDict([('@id', 'mousemine')]), OrderedDict([('@id', 'ratmine')]), OrderedDict([('@id', 'yeastmine')]), OrderedDict([('@id', 'modENCODEworm')]), OrderedDict([('@id', 'wormbase')]), OrderedDict([('@id', 'zebrafishmine')]), OrderedDict([('@id', 'eupathdb')]), OrderedDict([('@id', 'genomespace_importer')]), OrderedDict([('@id', 'genomespace_push')])])])
[OrderedDict([('@id', 'upload1')]), OrderedDict([('@id', 'ucsc_table_direct1')]), OrderedDict([('@id', 'ucsc_table_direct_archaea1')]), OrderedDict([('@id', 'ebi_sra_main')]), OrderedDict([('@id', 'modENCODEfly')]), OrderedDict([('@id', 

KeyError: 'tool'

In [206]:
def tools_need(flow_file):
    '''
    List of all possible tool ids from the yaml file
    '''
    need = []
    for step in flow_file:
        if flow_file[step]["multi"] == True:
            versions = list(flow_file[step].keys())[2:]
            for version in versions:
                for x in flow_file[step][version]:
                    if x == "description":
                        pass
                    else:
                        need.append(flow_file[step][version][x]["tool_id"])
        else:
            for x in flow_file[step]:
                if x == "description" or x == "multi":
                    pass
                else:
                    need.append(flow_file[step][x]["tool_id"])
    return(need)

In [207]:
def missing(have, need):
    '''
    Return list of tools not present in toolshed
    '''
    miss = []
    for tool in need:
        if tool in have:
            pass
        else:
            miss.append(tool)
    return(list(miss))

In [208]:
def tool_remove(missing, flow_file):
    '''
    Removes all tools not installed on the instance from the activities dict and tells user what is not avaliable
    '''
    remove = []
    for step in flow_file:
        if flow_file[step]["multi"] == True:
            versions = list(flow_file[step].keys())[2:]
            for version in versions:
                for x in flow_file[step][version]:
                    if x == "description":
                        pass
                    else:
                        if flow_file[step][version][x]["tool_id"] in missing:
                            print("Missing tool:", x + ". Retrieve from toolshed to make available.")
                            remove.append([step, version, x])
        else:
            for x in flow_file[step]:
                    if x == "description" or x == "multi":
                        pass
                    else:
                        if flow_file[step][x]["tool_id"] in missing:
                            print("Missing tool:", x + ". Retrieve from toolshed to make available.")
                            remove.append([step, x])
    for tool in remove:
        if len(tool) == 3:
            del(flow_file[tool[0]][tool[1]][tool[2]])
        else:
             del(flow_file[tool[0]][tool[1]])
    return(flow_file)

In [209]:
flow_all = tools_need(flow)
miss = missing(installed, flow_all)
tool_remove(miss, flow)

Missing tool: alevin. Retrieve from toolshed to make available.
Missing tool: star. Retrieve from toolshed to make available.
Missing tool: scanpy. Retrieve from toolshed to make available.
Missing tool: RaceID. Retrieve from toolshed to make available.
Missing tool: rtsne. Retrieve from toolshed to make available.
Missing tool: rtsne. Retrieve from toolshed to make available.


{'alignmap': {'description': 'asdfasdf',
  'multi': True,
  'align': {'description': 'TEST123',
   'kallisto': {'tool_id': 'kallistobus', 'description': 'qwerty'}},
  'map': {'description': 'TEST345'}},
 'quant': {'description': 'asdfasdf2',
  'multi': False,
  'alevin': {'tool_id': 'alevin', 'description': 'qwerty3'}},
 'filter': {'description': 'asdfasdf3', 'multi': False},
 'dimredux': {'description': 'asdfasdf4',
  'multi': True,
  'umap': {'description': 'randomwriting'},
  'tsne': {'description': 'random2'},
  'test': {'description': 'random2'}}}

In [36]:
test = flow_parse(flow)
testing = tool_select(test)
installed = tools_list(tools_have)

align: TEST123
map: TEST345
Which version of this step would you like to use?align
umap: randomwriting
tsne: random2
test: random2
Which version of this step would you like to use?umap
There are multiple tool choices for alignmap
kallisto qwerty
alevin qwerty1
Which tool to use?kallisto


In [210]:
testing

{'alignmap': 'kallistobus',
 'quant': 'alevin',
 'filter': 'scanpy_filter',
 'dimredux': 'raceid'}

In [None]:
#Optional?

def tool_pull(missing_tools):
    '''
    Admin option for local instances:
    if tools not installed, auto-install all tools in a downloaded activity
    '''
    