In [1]:
import numpy as np
import json
import warnings
import operator

import h5py
from keras.models import model_from_json
from keras import backend as K

warnings.filterwarnings("ignore")


def read_file(file_path):
    with open(file_path, 'r') as data_file:
        data = json.loads(data_file.read())
    return data


def create_model(model_path):
    reverse_dictionary = dict((str(v), k) for k, v in dictionary.items())
    model_weights = list()
    weight_ctr = 0
    for index, item in enumerate(trained_model.keys()):
        if "weight_" in item:
            d_key = "weight_" + str(weight_ctr)
            weights = trained_model.get(d_key).value
            model_weights.append(weights)
            weight_ctr += 1
    # set the model weights
    loaded_model.set_weights(model_weights)
    return loaded_model, dictionary, reverse_dictionary


def compute_recommendations(model, tool_sequence, labels, dictionary, reverse_dictionary, class_weights, topk=20, max_seq_len=25):
    tl_seq = tool_sequence.split(",")
    last_tool_name = reverse_dictionary[str(tl_seq[-1])]
    sample = np.zeros(max_seq_len)
    for idx, tool_id in enumerate(tl_seq):
        sample[idx] = int(tool_id)
    sample_reshaped = np.reshape(sample, (1, max_seq_len))

    tool_sequence_names = [reverse_dictionary[str(tool_pos)] for tool_pos in tool_sequence.split(",")]
    
    # predict next tools for a test path
    prediction = model.predict(sample_reshaped, verbose=0)
    
    weight_val = list(class_weights.values())
    weight_val = np.reshape(weight_val, (len(weight_val),))
    
    prediction = np.reshape(prediction, (prediction.shape[1],))
    
    prediction = prediction * weight_val
    
    prediction = prediction / float(np.max(prediction))
    
    prediction_pos = np.argsort(prediction, axis=-1)

    # get topk prediction
    topk_prediction_pos = prediction_pos[-topk:]
    topk_prediction_val = [int(prediction[pos] * 100) for pos in topk_prediction_pos]
    
    # read tool names using reverse dictionary
    pred_tool_ids = [reverse_dictionary[str(tool_pos)] for tool_pos in topk_prediction_pos]

    pred_tool_ids_sorted = dict()
    for (tool_pos, tool_pred_val) in zip(topk_prediction_pos, topk_prediction_val):
        tool_name = reverse_dictionary[str(tool_pos)]
        pred_tool_ids_sorted[tool_name] = tool_pred_val
    pred_tool_ids_sorted = dict(sorted(pred_tool_ids_sorted.items(), key=lambda kv: kv[1], reverse=True))

    ids_tools = dict()
    keys = list(pred_tool_ids_sorted.keys())
    tool_seq_name = ",".join(tool_sequence_names)
    print("Current tool sequence: ")
    print()
    print(tool_seq_name)
    print()
    print("Recommended tools for the tool sequence '%s' with their scores in decreasing order:" % tool_seq_name)
    print()
    for i in pred_tool_ids_sorted:
        print(i + "(" + str(pred_tool_ids_sorted[i]) + "%)")
    for key in pred_tool_ids_sorted:
        ids_tools[key] = dictionary[key]
    print()
    print("Tool ids:")
    print()
    for i in ids_tools:
        print(i + "(" + str(ids_tools[i]) + ")")


model_path = "../output_files/data/models/model_18_10.hdf5"

trained_model = h5py.File(model_path, 'r')
model_config = json.loads(trained_model.get('model_config').value)
dictionary = json.loads(trained_model.get('data_dictionary').value)
class_weights = json.loads(trained_model.get('class_weights').value)
loaded_model = model_from_json(model_config)

model, dictionary, reverse_dictionary = create_model(model_path)

print(reverse_dictionary)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.
{'1': 'rseqc_read_distribution', '2': 'random_lines1', '3': 'rseqc_bam_stat', '4': 'query_tabular', '5': 'cuffdiff', '6': 'generic_filter', '7': 'bctools_remove_tail', '8': 'hcluster_sg', '9': 'minfi_dmr', '10': 'mtbls520_08d_concentration', '11': 'FROGS_affiliation_OTU', '12': '__APPLY_RULES__', '13': 'seurat_scale_data', '14': 'remove_tail.py', '15': 'vcftools_merge', '16': 'iReport', '17': 'hicexplorer_hictransform', '18': 'correct_barcodes', '19': 'mtbls520_10_species_varpart', '20': 'bcftools_call', '21': 'minfi_getM', '22': 'fastqc', '23': 'minimap2', '24': 'ia_coordinates_of_roi', '25': 'ctb_sdf2fps', '26': 'tp_sorted_uniq', '27': 'antismash', '28': 'IDScoreSwitcher', '29': 'tp_cat', '30': 'bedtools_intersectbed_bam', '31': 'IDFileConverter', '32': 'cshl_fastx_clipper', '33': 'mummer_dnadiff', '34': 'krona-text', '35': 'samtools_flagstat', '36': 'eukaryotic_ncbi_submission', '37': 'qiime_assign_taxonomy', '3

In [13]:
topk = 20 # set the maximum number of recommendations
tool_seq = "22,1023" # give tools ids in a sequence and see the recommendations. To know all the tool ids, 
                     # please print the variable 'reverse_dictionary'
compute_recommendations(model, tool_seq, "", dictionary, reverse_dictionary, class_weights, topk)

Current tool sequence: 

fastqc,CONVERTER_gz_to_uncompressed

Recommended tools for the tool sequence 'fastqc,CONVERTER_gz_to_uncompressed' with their scores in decreasing order:

fastqc(100%)
bowtie2(80%)
fastq_groomer(78%)
tophat2(52%)
macs2_callpeak(41%)
fastq_to_fasta_python(27%)
bwa_mem(23%)
trimmomatic(14%)
featurecounts(13%)
bowtie_wrapper(12%)
hisat2(10%)
cshl_fastx_reverse_complement(9%)
rna_star(9%)
gffcompare(7%)
fastq_filter(6%)
deeptools_multi_bam_summary(6%)
trim_galore(5%)
unicycler(4%)
__FLATTEN__(4%)
cutadapt(4%)

Tool ids:

fastqc(22)
bowtie2(989)
fastq_groomer(1234)
tophat2(908)
macs2_callpeak(580)
fastq_to_fasta_python(136)
bwa_mem(997)
trimmomatic(246)
featurecounts(1056)
bowtie_wrapper(858)
hisat2(188)
cshl_fastx_reverse_complement(911)
rna_star(613)
gffcompare(794)
fastq_filter(227)
deeptools_multi_bam_summary(187)
trim_galore(1074)
unicycler(438)
__FLATTEN__(399)
cutadapt(633)


In [9]:
class_weights["1023"]

8.174907483683684