## Load model and compute recommended tools

In [5]:
import os
import numpy as np
import json
import h5py

def load_model(model_path):
    model = h5py.File(model_path, 'r')
    dictionary = json.loads(model.get('data_dictionary').value)
    paths = json.loads(model.get('multilabels_paths').value)
    c_tools = json.loads(model.get('compatible_next_tools').value)
    class_weights = json.loads(model.get('class_weights').value)
    standard_connections = json.loads(model.get('standard_connections').value)
    rev_dict = dict((str(v), k) for k, v in dictionary.items())
    return paths, dictionary, rev_dict, c_tools, class_weights, standard_connections

def predict_tools(dict_paths, d_dict, c_tools, class_weights, test_path="bowtie2"):
    p_num = list()
    for t in test_path.split(","):
            p_num.append(str(d_dict[t]))
    p_num = ",".join(p_num)
    predicted_tools = list()
    for k in dict_paths:
        if k == p_num:
            predicted_tools = dict_paths[k].split(",")
            break
    pred_names = list()
    for tool in predicted_tools:
        pred_names.append(rev_dict[tool])
    return predicted_tools, pred_names

In [6]:
model_path = "data/tool_recommendation_model_statistical_model.hdf5"
test_path = "umi_tools_extract"
dict_paths, d_dict, rev_dict, c_tools, class_weights, standard_connections = load_model(model_path)
pred_ids, pred_names = predict_tools(dict_paths, d_dict, c_tools, class_weights, test_path)

{'Grep1': ['Remove beginning1', 'computeMatrix'], 'Cut1': ['addValue', 'Paste1', 'wig_to_bigWig', 'sort1'], 'Remove beginning1': ['Cut1'], 'addValue': ['Paste1', 'cat1'], 'Paste1': ['addValue', 'Add_a_column1'], 'Add_a_column1': ['addValue', 'Cut1'], 'cat1': ['barchart_gnuplot'], 'Filter1': ['Summary_Statistics1', 'Add_a_column1', 'seq_filter_by_id'], 'gops_coverage_1': ['Filter1', 'cshl_grep_tool'], 'cshl_grep_tool': ['Summary_Statistics1'], 'Summary_Statistics1': ['Cut1'], 'bedtools_intersectbed_bam': ['wc_gnu', 'sort1'], 'wc_gnu': ['addValue'], 'sort1': ['cshl_awk_tool', 'cshl_uniq_tool'], 'cshl_awk_tool': ['cshl_uniq_tool'], 'Count1': ['cat1', 'barchart_gnuplot'], 'cshl_uniq_tool': ['Count1'], 'bamCompare_deepTools': ['computeMatrix'], 'computeMatrix': ['heatmapper_deepTools', 'dt_profiler'], 'bamCoverage_deepTools': ['computeMatrix'], 'rmcontamination': ['scaffold2fasta'], 'scaffold2fasta': ['eukaryotic_ncbi_submission', 'prokaryotic_ncbi_submission'], 'eukaryotic_ncbi_submission'

KeyError: 'umi_tools_extract'

## Fetch top recommended tools (sorted in descending order based on their usage)

In [None]:
c_wt_names = dict()
topk = 20 # this specifies how many top recommended tools are computed
for t_id in pred_ids:
    t_name = rev_dict[t_id]
    c_wt_names[t_name] = class_weights[t_id]
sorted_pred_tools = sorted(c_wt_names.items(), key=lambda item: item[1], reverse=True)
sorted_names = list()
for k, v in sorted_pred_tools:
    sorted_names.append(k)

## Top recommended tools

In [None]:
print(sorted_names[:topk])

In [None]:
d_dict