In [58]:
import numpy as np
import json
import warnings
import operator

import h5py
from keras.models import model_from_json
from keras import backend as K

from matplotlib import pyplot as plt

warnings.filterwarnings("ignore")

size_title = 18
size_label = 14
n_pred = 2

path_data_dict = "data/generated_files/data_dict.txt"
path_inverted_wt = "data/generated_files/inverted_weights.txt"
path_usage_wt = "data/generated_files/usage_prediction.txt"
path_class_wt = "data/generated_files/class_weights.txt"
path_test_data = "data/generated_files/test_data.txt"
model_path = "data/generated_files/trained_model_86.hdf5"
#model_path = "data/generated_files/trained_model_86.hdf5"

def read_file(file_path):
    with open(file_path, 'r') as data_file:
        data = json.loads(data_file.read())
    return data

class_weights = read_file(path_class_wt)
usage_weights = read_file(path_usage_wt)
inverted_weights = read_file(path_inverted_wt)
data_dict = read_file(path_data_dict)

def create_model(model_path):
    trained_model = h5py.File(model_path, 'r')
    model_config = json.loads(trained_model.get('model_config').value)
    loaded_model = model_from_json(model_config)
    dictionary = json.loads(trained_model.get('data_dictionary').value)
    compatibile_tools = json.loads(trained_model.get('compatible_tools').value)
    reverse_dictionary = dict((str(v), k) for k, v in dictionary.items())
    model_weights = list()
    weight_ctr = 0
    while True:
        try:
            d_key = "weight_" + str(weight_ctr)
            weights = trained_model.get(d_key).value
            model_weights.append(weights)
            weight_ctr += 1
        except Exception as exception:
            break
    # set the model weights
    loaded_model.set_weights(model_weights)
    return loaded_model, dictionary, reverse_dictionary, compatibile_tools

model, dictionary, reverse_dictionary, compatibile_tools = create_model(model_path)

In [61]:
def verify_model(model, tool_sequence, labels, dictionary, reverse_dictionary, compatible_tools, topk=5, max_seq_len=25):
    tl_seq = tool_sequence.split(",")
    last_tool_name = reverse_dictionary[str(tl_seq[-1])]
    last_compatible_tools = compatible_tools[last_tool_name]
    sample = np.zeros(max_seq_len)
    for idx, tool_id in enumerate(tl_seq):
        sample[-(idx + 1)] = int(tool_id)
        sample_reshaped = np.reshape(sample, (1, max_seq_len))

        # predict next tools for a test path
        prediction = model.predict(sample_reshaped, verbose=0)
        prediction = np.reshape(prediction, (prediction.shape[1],))
        prediction_pos = np.argsort(prediction, axis=-1)

        # get topk prediction
        topk_prediction_pos = prediction_pos[-topk:]
        topk_prediction_val = [np.round(prediction[pos] * 100, 2) for pos in topk_prediction_pos]        

        # read tool names using reverse dictionary
        pred_tool_ids = [reverse_dictionary[str(tool_pos)] for tool_pos in topk_prediction_pos]
        actual_next_tool_ids = list(set(pred_tool_ids).intersection(set(last_compatible_tools.split(","))))      
        
        print("Actual tools: %s" % ",".join(actual_next_tool_ids))
        #print("Predicted tools: %s" % ",".join(pred_tool_ids))
        print()
        pred_tool_ids_sorted = dict()
        for (tool_pos, tool_pred_val) in zip(topk_prediction_pos, topk_prediction_val):
            tool_name = reverse_dictionary[str(tool_pos)]
            #if tool_name in actual_next_tool_ids:
            pred_tool_ids_sorted[tool_name] = tool_pred_val
        pred_tool_ids_sorted = dict(sorted(pred_tool_ids_sorted.items(), key=lambda kv: kv[1], reverse=True))
        cls_wt = dict()
        usg_wt = dict()
        inv_wt = dict()
        keys = list(pred_tool_ids_sorted.keys())
        for k in keys:
            cls_wt[k] = class_weights[str(data_dict[k])]
            usg_wt[k] = usage_weights[k]
            inv_wt[k] = inverted_weights[str(data_dict[k])]
        print("Predicted tools: \n")
        print(pred_tool_ids_sorted)
        print()
        print("Class weights: \n")
        print(cls_wt)
        print()
        print("Usage weights: \n")
        print(usg_wt)
        print()
        print("Inverted weights: \n")
        print(inv_wt)
        print("======================================")
        # find false positives
        #false_positives = [ tool_name for tool_name in top_predicted_next_tool_names if tool_name not in actual_next_tool_names ]
        #absolute_precision = 1 - ( len( false_positives ) / float( len( actual_classes_pos ) ) )
        #ave_abs_precision.append(absolute_precision)
    #mean_precision = np.mean(ave_abs_precision)
    #print("Absolute precision on test data using current model is: %0.6f" % mean_precision)
    #return mean_precision

def get_predictions(model, dictionary, reverse_dictionary, compatibile_tools):
    t_data = read_file(path_test_data)
    ctr = 1
    for ph, cl in t_data.items():
        verify_model(model, ph, cl, dictionary, reverse_dictionary, compatibile_tools)
        ctr += 1
        if ctr == 400:
            break
verify_model(model, tool_seq, "", dictionary, reverse_dictionary, compatibile_tools)
get_predictions(model, dictionary, reverse_dictionary, compatibile_tools)

Actual tools: multiqc

Predicted tools: 

{'multiqc': 21.11, 'Filter1': 8.39, 'htseq_count': 4.23, 'featurecounts': 3.85, 'tp_awk_tool': 2.22}

Class weights: 

{'multiqc': 607.7372072437983, 'Filter1': 187.2361639657165, 'htseq_count': 840.4553052741519, 'featurecounts': 1389.358749404951, 'tp_awk_tool': 258.68538806820925}

Usage weights: 

{'multiqc': 434.45555556, 'Filter1': 1570.20016933, 'htseq_count': 538.7081783, 'featurecounts': 1397.2983265, 'tp_awk_tool': 224.9}

Inverted weights: 

{'multiqc': 850.1318681318681, 'Filter1': 22.326695526695527, 'htseq_count': 1311.2203389830509, 'featurecounts': 1381.4642857142858, 'tp_awk_tool': 297.54615384615386}
Actual tools: fastqc,rna_star

Predicted tools: 

{'fastqc': 72.55, 'bowtie2': 9.09, 'rna_star': 7.64, 'htseq_count': 6.58, 'samtools_sort': 5.91}

Class weights: 

{'fastqc': 2303.1236832075983, 'bowtie2': 1615.6868628096963, 'rna_star': 1173.7924874363764, 'htseq_count': 840.4553052741519, 'samtools_sort': 424.0077368038999}

Us

Actual tools: Convert characters1,tp_easyjoin_tool,Grouping1,join1

Predicted tools: 

{'join1': 14.94, 'tp_easyjoin_tool': 4.73, 'Grouping1': 3.63, 'Cut1': 3.23, 'Convert characters1': 2.78}

Class weights: 

{'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'Grouping1': 53.82802413400234, 'Cut1': 122.80379255282038, 'Convert characters1': 25.528665312695974}

Usage weights: 

{'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'Grouping1': 273.44597588, 'Cut1': 2455.24051351, 'Convert characters1': 48.71713307}

Inverted weights: 

{'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'Grouping1': 10.596082728393371, 'Cut1': 6.1422786820166735, 'Convert characters1': 13.377485734048072}
Actual tools: Add_a_column1,Convert characters1,Grouping1,join1

Predicted tools: 

{'join1': 72.6, 'Grouping1': 19.49, 'Convert characters1': 10.35, 'Cut1': 7.4, 'Add_a_column1': 6.8}

Class weights: 

{'join1': 105.03701179940524, 'Grouping1': 53.828024134002

Actual tools: Cut1,join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 13.14, 'Cut1': 10.16, 'Convert characters1': 7.72, 'join1': 0.86, 'Add_a_column1': 0.82}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'Cut1': 122.80379255282038, 'Convert characters1': 25.528665312695974, 'join1': 105.03701179940524, 'Add_a_column1': 144.8935771484877}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'Cut1': 2455.24051351, 'Convert characters1': 48.71713307, 'join1': 418.28191742, 'Add_a_column1': 365.0}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'Cut1': 6.1422786820166735, 'Convert characters1': 13.377485734048072, 'join1': 26.376406409819296, 'Add_a_column1': 57.5182156133829}
Actual tools: Cut1,join1,addValue,Convert characters1

Predicted tools: 

{'Cut1': 21.38, 'Convert characters1': 12.84, 'join1': 3.68, 'collapse_dataset': 1.52, 'addValue': 0.61}

Class weights: 

{'Cut1': 122.80379255282038, 'Convert characters1': 25.528665312695974, 'join1

Actual tools: 

Predicted tools: 

{'tp_easyjoin_tool': 16.85, 'Paste1': 4.9, 'join1': 2.0, 'fastqc': 0.85, 'Convert characters1': 0.82}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'Paste1': 92.32608425880143, 'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'Convert characters1': 25.528665312695974}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'Paste1': 55.42288507, 'join1': 418.28191742, 'fastqc': 6993.70010361, 'Convert characters1': 48.71713307}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'Paste1': 153.80119284294236, 'join1': 26.376406409819296, 'fastqc': 758.4509803921569, 'Convert characters1': 13.377485734048072}
Actual tools: 

Predicted tools: 

{'tp_easyjoin_tool': 13.85, 'join1': 1.45, 'fastqc': 0.51, 'Convert characters1': 0.49, 'Paste1': 0.41}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'Convert characters1': 25.528665312695974, 'Paste1': 92.326084258

Actual tools: join1,tp_easyjoin_tool,Remove beginning1

Predicted tools: 

{'fastqc': 24.87, 'join1': 9.17, 'tp_easyjoin_tool': 5.92, 'Grouping1': 5.15, 'Remove beginning1': 2.57}

Class weights: 

{'fastqc': 2303.1236832075983, 'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'Grouping1': 53.82802413400234, 'Remove beginning1': 63.235244558058824}

Usage weights: 

{'fastqc': 6993.70010361, 'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'Grouping1': 273.44597588, 'Remove beginning1': 295.70772083}

Inverted weights: 

{'fastqc': 758.4509803921569, 'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'Grouping1': 10.596082728393371, 'Remove beginning1': 13.522461108197868}
Actual tools: Cut1,join1,tp_easyjoin_tool

Predicted tools: 

{'join1': 14.94, 'tp_easyjoin_tool': 4.73, 'Grouping1': 3.63, 'Cut1': 3.23, 'Convert characters1': 2.78}

Class weights: 

{'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'Grouping1': 53.828

Actual tools: Paste1,join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 8.29, 'join1': 1.99, 'Convert characters1': 0.49, 'Grouping1': 0.33, 'Paste1': 0.32}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'Convert characters1': 25.528665312695974, 'Grouping1': 53.82802413400234, 'Paste1': 92.32608425880143}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'join1': 418.28191742, 'Convert characters1': 48.71713307, 'Grouping1': 273.44597588, 'Paste1': 55.42288507}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'join1': 26.376406409819296, 'Convert characters1': 13.377485734048072, 'Grouping1': 10.596082728393371, 'Paste1': 153.80119284294236}
Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 15.39, 'join1': 3.54, 'Convert characters1': 1.39, 'collapse_dataset': 0.79, 'Grouping1': 0.72}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'Convert characters

Actual tools: join1,addValue,Convert characters1

Predicted tools: 

{'Remove beginning1': 7.33, 'join1': 1.22, 'Convert characters1': 0.53, 'addValue': 0.48, 'tp_easyjoin_tool': 0.1}

Class weights: 

{'Remove beginning1': 63.235244558058824, 'join1': 105.03701179940524, 'Convert characters1': 25.528665312695974, 'addValue': 68.8574344458798, 'tp_easyjoin_tool': 314.7650430911095}

Usage weights: 

{'Remove beginning1': 295.70772083, 'join1': 418.28191742, 'Convert characters1': 48.71713307, 'addValue': 463.02927967, 'tp_easyjoin_tool': 489.22502467}

Inverted weights: 

{'Remove beginning1': 13.522461108197868, 'join1': 26.376406409819296, 'Convert characters1': 13.377485734048072, 'addValue': 10.239841164791528, 'tp_easyjoin_tool': 202.51832460732984}
Actual tools: join1,addValue,Convert characters1

Predicted tools: 

{'Remove beginning1': 5.18, 'join1': 0.44, 'addValue': 0.35, 'Convert characters1': 0.28, 'tp_easyjoin_tool': 0.04}

Class weights: 

{'Remove beginning1': 63.2352445

Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 6.39, 'fastqc': 2.95, 'join1': 1.8, 'Convert characters1': 0.34, 'collapse_dataset': 0.32}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'fastqc': 2303.1236832075983, 'join1': 105.03701179940524, 'Convert characters1': 25.528665312695974, 'collapse_dataset': 37.9852280938911}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'fastqc': 6993.70010361, 'join1': 418.28191742, 'Convert characters1': 48.71713307, 'collapse_dataset': 40.82700763}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'fastqc': 758.4509803921569, 'join1': 26.376406409819296, 'Convert characters1': 13.377485734048072, 'collapse_dataset': 35.341251713111006}
Actual tools: Paste1,join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 3.95, 'fastqc': 2.64, 'join1': 2.33, 'tp_sort_header_tool': 1.01, 'Paste1': 0.66}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'fastqc': 2303.1236832075983, 'join

Actual tools: Paste1,fastqc,join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 2.98, 'Convert characters1': 0.53, 'fastqc': 0.13, 'join1': 0.04, 'Paste1': 0.04}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'Convert characters1': 25.528665312695974, 'fastqc': 2303.1236832075983, 'join1': 105.03701179940524, 'Paste1': 92.32608425880143}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'Convert characters1': 48.71713307, 'fastqc': 6993.70010361, 'join1': 418.28191742, 'Paste1': 55.42288507}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'Convert characters1': 13.377485734048072, 'fastqc': 758.4509803921569, 'join1': 26.376406409819296, 'Paste1': 153.80119284294236}
Actual tools: Paste1,fastqc,join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 3.2, 'fastqc': 0.47, 'Convert characters1': 0.25, 'Paste1': 0.12, 'join1': 0.07}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'fastqc': 2303.1236832075983, 'Convert characte

Actual tools: fastqc,join1,Convert characters1

Predicted tools: 

{'join1': 31.4, 'tp_easyjoin_tool': 25.21, 'Grouping1': 8.4, 'Convert characters1': 2.58, 'fastqc': 1.81}

Class weights: 

{'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'Grouping1': 53.82802413400234, 'Convert characters1': 25.528665312695974, 'fastqc': 2303.1236832075983}

Usage weights: 

{'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'Grouping1': 273.44597588, 'Convert characters1': 48.71713307, 'fastqc': 6993.70010361}

Inverted weights: 

{'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'Grouping1': 10.596082728393371, 'Convert characters1': 13.377485734048072, 'fastqc': 758.4509803921569}
Actual tools: Paste1,fastqc,join1

Predicted tools: 

{'join1': 46.52, 'tp_easyjoin_tool': 29.39, 'Grouping1': 11.39, 'Paste1': 2.33, 'fastqc': 2.04}

Class weights: 

{'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'Grouping1': 53.82802413400234, 'Paste

Actual tools: Paste1,join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 13.47, 'join1': 6.06, 'fastqc': 4.67, 'Convert characters1': 1.51, 'Paste1': 1.33}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'Convert characters1': 25.528665312695974, 'Paste1': 92.32608425880143}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'join1': 418.28191742, 'fastqc': 6993.70010361, 'Convert characters1': 48.71713307, 'Paste1': 55.42288507}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'join1': 26.376406409819296, 'fastqc': 758.4509803921569, 'Convert characters1': 13.377485734048072, 'Paste1': 153.80119284294236}
Actual tools: tp_easyjoin_tool,Remove beginning1

Predicted tools: 

{'tp_easyjoin_tool': 11.53, 'Convert characters1': 2.24, 'fastqc': 0.84, 'Remove beginning1': 0.76, 'collapse_dataset': 0.64}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'Convert characters1': 25.52866531269

Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'Convert characters1': 2.72, 'collapse_dataset': 1.92, 'join1': 0.4, 'tp_easyjoin_tool': 0.15, 'tp_sort_header_tool': 0.05}

Class weights: 

{'Convert characters1': 25.528665312695974, 'collapse_dataset': 37.9852280938911, 'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'tp_sort_header_tool': 715.2672298711229}

Usage weights: 

{'Convert characters1': 48.71713307, 'collapse_dataset': 40.82700763, 'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'tp_sort_header_tool': 753.90013126}

Inverted weights: 

{'Convert characters1': 13.377485734048072, 'collapse_dataset': 35.341251713111006, 'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'tp_sort_header_tool': 678.6140350877193}
Actual tools: htseq_count

Predicted tools: 

{'fastqc': 72.55, 'bowtie2': 9.09, 'rna_star': 7.64, 'htseq_count': 6.58, 'samtools_sort': 5.91}

Class weights: 

{'fastqc': 2303.1236832075983, 'bowtie2': 1615.68

Actual tools: fastqc,rna_star,Cut1

Predicted tools: 

{'Add_a_column1': 10.23, 'Cut1': 7.85, 'rna_star': 4.39, 'fastqc': 4.08, 'melt': 2.44}

Class weights: 

{'Add_a_column1': 144.8935771484877, 'Cut1': 122.80379255282038, 'rna_star': 1173.7924874363764, 'fastqc': 2303.1236832075983, 'melt': 0.2567115768021768}

Usage weights: 

{'Add_a_column1': 365.0, 'Cut1': 2455.24051351, 'rna_star': 641.14677656, 'fastqc': 6993.70010361, 'melt': 0.00471925}

Inverted weights: 

{'Add_a_column1': 57.5182156133829, 'Cut1': 6.1422786820166735, 'rna_star': 2148.9444444444443, 'fastqc': 758.4509803921569, 'melt': 13.964259927797833}
Actual tools: Cut1,join1,Convert characters1

Predicted tools: 

{'Add_a_column1': 14.99, 'join1': 13.4, 'Grouping1': 9.64, 'Cut1': 9.33, 'Convert characters1': 1.94}

Class weights: 

{'Add_a_column1': 144.8935771484877, 'join1': 105.03701179940524, 'Grouping1': 53.82802413400234, 'Cut1': 122.80379255282038, 'Convert characters1': 25.528665312695974}

Usage weights: 

{'

Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'join1': 10.41, 'tp_easyjoin_tool': 6.32, 'collapse_dataset': 0.36, 'Grouping1': 0.33, 'fastqc': 0.24}

Class weights: 

{'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'collapse_dataset': 37.9852280938911, 'Grouping1': 53.82802413400234, 'fastqc': 2303.1236832075983}

Usage weights: 

{'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'collapse_dataset': 40.82700763, 'Grouping1': 273.44597588, 'fastqc': 6993.70010361}

Inverted weights: 

{'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'collapse_dataset': 35.341251713111006, 'Grouping1': 10.596082728393371, 'fastqc': 758.4509803921569}
Actual tools: Paste1,join1,tp_easyjoin_tool

Predicted tools: 

{'tp_easyjoin_tool': 11.92, 'join1': 6.27, 'collapse_dataset': 0.42, 'Paste1': 0.35, 'bedtools_mergebed': 0.1}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'collapse_dataset': 37.9852280938911

Actual tools: fastqc,tp_sort_header_tool,join1

Predicted tools: 

{'join1': 12.96, 'fastqc': 12.0, 'tp_easyjoin_tool': 7.63, 'collapse_dataset': 5.63, 'tp_sort_header_tool': 1.94}

Class weights: 

{'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'tp_easyjoin_tool': 314.7650430911095, 'collapse_dataset': 37.9852280938911, 'tp_sort_header_tool': 715.2672298711229}

Usage weights: 

{'join1': 418.28191742, 'fastqc': 6993.70010361, 'tp_easyjoin_tool': 489.22502467, 'collapse_dataset': 40.82700763, 'tp_sort_header_tool': 753.90013126}

Inverted weights: 

{'join1': 26.376406409819296, 'fastqc': 758.4509803921569, 'tp_easyjoin_tool': 202.51832460732984, 'collapse_dataset': 35.341251713111006, 'tp_sort_header_tool': 678.6140350877193}
Actual tools: fastqc,join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 6.39, 'fastqc': 2.95, 'join1': 1.8, 'Convert characters1': 0.34, 'collapse_dataset': 0.32}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'fastqc': 23

Actual tools: Cut1,join1

Predicted tools: 

{'collapse_dataset': 3.62, 'join1': 3.51, 'Cut1': 3.4, 'Convert characters1': 1.55, 'addValue': 0.46}

Class weights: 

{'collapse_dataset': 37.9852280938911, 'join1': 105.03701179940524, 'Cut1': 122.80379255282038, 'Convert characters1': 25.528665312695974, 'addValue': 68.8574344458798}

Usage weights: 

{'collapse_dataset': 40.82700763, 'join1': 418.28191742, 'Cut1': 2455.24051351, 'Convert characters1': 48.71713307, 'addValue': 463.02927967}

Inverted weights: 

{'collapse_dataset': 35.341251713111006, 'join1': 26.376406409819296, 'Cut1': 6.1422786820166735, 'Convert characters1': 13.377485734048072, 'addValue': 10.239841164791528}
Actual tools: Cut1,join1

Predicted tools: 

{'Cut1': 9.74, 'join1': 7.6, 'collapse_dataset': 1.85, 'Convert characters1': 0.4, 'tp_cat': 0.33}

Class weights: 

{'Cut1': 122.80379255282038, 'join1': 105.03701179940524, 'collapse_dataset': 37.9852280938911, 'Convert characters1': 25.528665312695974, 'tp_cat': 2

Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'join1': 23.71, 'addValue': 3.66, 'collapse_dataset': 2.59, 'Convert characters1': 1.19, 'tp_easyjoin_tool': 0.92}

Class weights: 

{'join1': 105.03701179940524, 'addValue': 68.8574344458798, 'collapse_dataset': 37.9852280938911, 'Convert characters1': 25.528665312695974, 'tp_easyjoin_tool': 314.7650430911095}

Usage weights: 

{'join1': 418.28191742, 'addValue': 463.02927967, 'collapse_dataset': 40.82700763, 'Convert characters1': 48.71713307, 'tp_easyjoin_tool': 489.22502467}

Inverted weights: 

{'join1': 26.376406409819296, 'addValue': 10.239841164791528, 'collapse_dataset': 35.341251713111006, 'Convert characters1': 13.377485734048072, 'tp_easyjoin_tool': 202.51832460732984}
Actual tools: Paste1,join1,tp_easyjoin_tool

Predicted tools: 

{'join1': 9.9, 'Paste1': 1.49, 'collapse_dataset': 1.34, 'tp_easyjoin_tool': 1.34, 'fastqc': 0.55}

Class weights: 

{'join1': 105.03701179940524, 'Paste1': 92.32608425880143, 'collapse_da

Actual tools: 

Predicted tools: 

{'tp_sort_header_tool': 2.38, 'Convert characters1': 2.14, 'datamash_transpose': 0.8, 'fastqc': 0.06, 'htseq_count': 0.06}

Class weights: 

{'tp_sort_header_tool': 715.2672298711229, 'Convert characters1': 25.528665312695974, 'datamash_transpose': 6.561140733883205, 'fastqc': 2303.1236832075983, 'htseq_count': 840.4553052741519}

Usage weights: 

{'tp_sort_header_tool': 753.90013126, 'Convert characters1': 48.71713307, 'datamash_transpose': 17.41986263, 'fastqc': 6993.70010361, 'htseq_count': 538.7081783}

Inverted weights: 

{'tp_sort_header_tool': 678.6140350877193, 'Convert characters1': 13.377485734048072, 'datamash_transpose': 2.471234627056381, 'fastqc': 758.4509803921569, 'htseq_count': 1311.2203389830509}
Actual tools: 

Predicted tools: 

{'tp_sort_header_tool': 1.89, 'Convert characters1': 1.59, 'datamash_transpose': 0.24, 'fastqc': 0.03, 'htseq_count': 0.03}

Class weights: 

{'tp_sort_header_tool': 715.2672298711229, 'Convert characters1'

Actual tools: 

Predicted tools: 

{'tp_easyjoin_tool': 5.24, 'join1': 2.48, 'Convert characters1': 0.23, 'tp_cat': 0.21, 'Paste1': 0.16}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'Convert characters1': 25.528665312695974, 'tp_cat': 296.12249653747506, 'Paste1': 92.32608425880143}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'join1': 418.28191742, 'Convert characters1': 48.71713307, 'tp_cat': 154.15372511, 'Paste1': 55.42288507}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'join1': 26.376406409819296, 'Convert characters1': 13.377485734048072, 'tp_cat': 568.8382352941177, 'Paste1': 153.80119284294236}
Actual tools: 

Predicted tools: 

{'tp_easyjoin_tool': 6.0, 'join1': 0.24, 'Convert characters1': 0.14, 'collapse_dataset': 0.06, 'Paste1': 0.03}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.03701179940524, 'Convert characters1': 25.528665312695974, 'collapse_dataset': 37.9852280938911, 'Paste

Actual tools: fastqc,rna_star

Predicted tools: 

{'fastqc': 72.55, 'bowtie2': 9.09, 'rna_star': 7.64, 'htseq_count': 6.58, 'samtools_sort': 5.91}

Class weights: 

{'fastqc': 2303.1236832075983, 'bowtie2': 1615.6868628096963, 'rna_star': 1173.7924874363764, 'htseq_count': 840.4553052741519, 'samtools_sort': 424.0077368038999}

Usage weights: 

{'fastqc': 6993.70010361, 'bowtie2': 877.3240739, 'rna_star': 641.14677656, 'htseq_count': 538.7081783, 'samtools_sort': 106.9}

Inverted weights: 

{'fastqc': 758.4509803921569, 'bowtie2': 2975.4615384615386, 'rna_star': 2148.9444444444443, 'htseq_count': 1311.2203389830509, 'samtools_sort': 1681.7826086956522}
Actual tools: fastqc,join1

Predicted tools: 

{'fastqc': 38.7, 'multiqc': 4.07, 'join1': 2.52, 'tp_easyjoin_tool': 2.47, 'cutadapt': 2.36}

Class weights: 

{'fastqc': 2303.1236832075983, 'multiqc': 607.7372072437983, 'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'cutadapt': 3434.6919588205656}

Usage weights: 

{'

Actual tools: Remove beginning1

Predicted tools: 

{'FileInfo': 6.15, 'PeptideIndexer': 1.31, 'Remove beginning1': 1.09, 'tp_sort_header_tool': 0.6, 'datamash_transpose': 0.31}

Class weights: 

{'FileInfo': 180.43705727415227, 'PeptideIndexer': 116.80686465416075, 'Remove beginning1': 63.235244558058824, 'tp_sort_header_tool': 715.2672298711229, 'datamash_transpose': 6.561140733883205}

Usage weights: 

{'FileInfo': 56.81428571, 'PeptideIndexer': 6.17272727, 'Remove beginning1': 295.70772083, 'tp_sort_header_tool': 753.90013126, 'datamash_transpose': 17.41986263}

Inverted weights: 

{'FileInfo': 573.0518518518519, 'PeptideIndexer': 2210.342857142857, 'Remove beginning1': 13.522461108197868, 'tp_sort_header_tool': 678.6140350877193, 'datamash_transpose': 2.471234627056381}
Actual tools: Remove beginning1

Predicted tools: 

{'FileInfo': 4.52, 'Remove beginning1': 0.83, 'tp_sort_header_tool': 0.58, 'datamash_transpose': 0.25, 'melt': 0.24}

Class weights: 

{'FileInfo': 180.4370572741

Actual tools: fastqc,join1

Predicted tools: 

{'fastqc': 38.7, 'multiqc': 4.07, 'join1': 2.52, 'tp_easyjoin_tool': 2.47, 'cutadapt': 2.36}

Class weights: 

{'fastqc': 2303.1236832075983, 'multiqc': 607.7372072437983, 'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'cutadapt': 3434.6919588205656}

Usage weights: 

{'fastqc': 6993.70010361, 'multiqc': 434.45555556, 'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'cutadapt': 762.46147023}

Inverted weights: 

{'fastqc': 758.4509803921569, 'multiqc': 850.1318681318681, 'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'cutadapt': 15472.4}
Actual tools: fastqc,tp_sort_header_tool,Cut1

Predicted tools: 

{'fastqc': 13.96, 'tp_easyjoin_tool': 8.31, 'Cut1': 7.31, 'tp_cat': 6.29, 'tp_sort_header_tool': 4.6}

Class weights: 

{'fastqc': 2303.1236832075983, 'tp_easyjoin_tool': 314.7650430911095, 'Cut1': 122.80379255282038, 'tp_cat': 296.12249653747506, 'tp_sort_header_tool': 715.2672298711229}

U

Actual tools: tp_easyjoin_tool,Cut1,join1,addValue

Predicted tools: 

{'join1': 26.68, 'fastqc': 13.02, 'Cut1': 12.5, 'tp_easyjoin_tool': 9.32, 'addValue': 2.93}

Class weights: 

{'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'Cut1': 122.80379255282038, 'tp_easyjoin_tool': 314.7650430911095, 'addValue': 68.8574344458798}

Usage weights: 

{'join1': 418.28191742, 'fastqc': 6993.70010361, 'Cut1': 2455.24051351, 'tp_easyjoin_tool': 489.22502467, 'addValue': 463.02927967}

Inverted weights: 

{'join1': 26.376406409819296, 'fastqc': 758.4509803921569, 'Cut1': 6.1422786820166735, 'tp_easyjoin_tool': 202.51832460732984, 'addValue': 10.239841164791528}
Actual tools: Paste1,Cut1,join1,tp_easyjoin_tool

Predicted tools: 

{'Cut1': 26.69, 'join1': 22.02, 'fastqc': 21.27, 'tp_easyjoin_tool': 14.26, 'Paste1': 2.63}

Class weights: 

{'Cut1': 122.80379255282038, 'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'tp_easyjoin_tool': 314.7650430911095, 'Paste1': 92.3260842588014

Actual tools: join1,tp_easyjoin_tool

Predicted tools: 

{'fastqc': 9.11, 'join1': 6.52, 'tp_easyjoin_tool': 4.71, 'collapse_dataset': 1.48, 'multiqc': 0.84}

Class weights: 

{'fastqc': 2303.1236832075983, 'join1': 105.03701179940524, 'tp_easyjoin_tool': 314.7650430911095, 'collapse_dataset': 37.9852280938911, 'multiqc': 607.7372072437983}

Usage weights: 

{'fastqc': 6993.70010361, 'join1': 418.28191742, 'tp_easyjoin_tool': 489.22502467, 'collapse_dataset': 40.82700763, 'multiqc': 434.45555556}

Inverted weights: 

{'fastqc': 758.4509803921569, 'join1': 26.376406409819296, 'tp_easyjoin_tool': 202.51832460732984, 'collapse_dataset': 35.341251713111006, 'multiqc': 850.1318681318681}
Actual tools: Cut1,join1,tp_easyjoin_tool

Predicted tools: 

{'join1': 25.84, 'fastqc': 8.72, 'tp_easyjoin_tool': 7.53, 'collapse_dataset': 5.3, 'Cut1': 2.5}

Class weights: 

{'join1': 105.03701179940524, 'fastqc': 2303.1236832075983, 'tp_easyjoin_tool': 314.7650430911095, 'collapse_dataset': 37.985228093

Actual tools: Paste1,tp_easyjoin_tool

Predicted tools: 

{'fastqc': 0.61, 'collapse_dataset': 0.29, 'tp_easyjoin_tool': 0.13, 'Convert characters1': 0.04, 'Paste1': 0.02}

Class weights: 

{'fastqc': 2303.1236832075983, 'collapse_dataset': 37.9852280938911, 'tp_easyjoin_tool': 314.7650430911095, 'Convert characters1': 25.528665312695974, 'Paste1': 92.32608425880143}

Usage weights: 

{'fastqc': 6993.70010361, 'collapse_dataset': 40.82700763, 'tp_easyjoin_tool': 489.22502467, 'Convert characters1': 48.71713307, 'Paste1': 55.42288507}

Inverted weights: 

{'fastqc': 758.4509803921569, 'collapse_dataset': 35.341251713111006, 'tp_easyjoin_tool': 202.51832460732984, 'Convert characters1': 13.377485734048072, 'Paste1': 153.80119284294236}
Actual tools: tp_easyjoin_tool

Predicted tools: 

{'collapse_dataset': 7.23, 'fastqc': 0.44, 'tp_easyjoin_tool': 0.12, 'Convert characters1': 0.07, 'cat1': 0.05}

Class weights: 

{'collapse_dataset': 37.9852280938911, 'fastqc': 2303.1236832075983, 'tp_ea

Actual tools: Paste1,join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 23.19, 'Convert characters1': 1.27, 'collapse_dataset': 0.7, 'Paste1': 0.56, 'join1': 0.29}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'Convert characters1': 25.528665312695974, 'collapse_dataset': 37.9852280938911, 'Paste1': 92.32608425880143, 'join1': 105.03701179940524}

Usage weights: 

{'tp_easyjoin_tool': 489.22502467, 'Convert characters1': 48.71713307, 'collapse_dataset': 40.82700763, 'Paste1': 55.42288507, 'join1': 418.28191742}

Inverted weights: 

{'tp_easyjoin_tool': 202.51832460732984, 'Convert characters1': 13.377485734048072, 'collapse_dataset': 35.341251713111006, 'Paste1': 153.80119284294236, 'join1': 26.376406409819296}
Actual tools: join1,Convert characters1

Predicted tools: 

{'tp_easyjoin_tool': 33.02, 'join1': 1.62, 'Convert characters1': 0.4, 'collapse_dataset': 0.32, 'Grouping1': 0.31}

Class weights: 

{'tp_easyjoin_tool': 314.7650430911095, 'join1': 105.0370

Actual tools: 

Predicted tools: 

{'Cut1': 4.98, 'samtools_flagstat': 4.78, 'fraggenescan': 2.44, 'multiqc': 2.1, 'featurecounts': 2.08}

Class weights: 

{'Cut1': 122.80379255282038, 'samtools_flagstat': 496.6614491209681, 'fraggenescan': 373.5805744707452, 'multiqc': 607.7372072437983, 'featurecounts': 1389.358749404951}

Usage weights: 

{'Cut1': 2455.24051351, 'samtools_flagstat': 274.21528882, 'fraggenescan': 27.06027099, 'multiqc': 434.45555556, 'featurecounts': 1397.2983265}

Inverted weights: 

{'Cut1': 6.1422786820166735, 'samtools_flagstat': 899.5581395348837, 'fraggenescan': 5157.466666666666, 'multiqc': 850.1318681318681, 'featurecounts': 1381.4642857142858}
Actual tools: 



KeyError: 'ggplot_histogram'