In [None]:
import pandas as pd
import ipywidgets as ipw
import numpy as np
import networkx as nx

In [None]:
#simple helper functions
def gmean(data, axis=0):
    return np.exp(np.mean(np.log(data), axis=axis))

def combine_ranks(data, hpo_terms):
    # get geometric mean of ranks for given terms
    hpo_terms = [x for x in hpo_terms if x in data.columns] # remove terms not in dataset    
    return pd.Series(scipy.stats.gmean(data.loc[:, hpo_terms], axis=1), index=data.index)

def reduce_terms(data, hpo_tree, hpo_terms):
    #given a list of hpo terms, get the closest ancestor in dataset columns
    term_list = set()
    for term in hpo_terms:
        #get the list of terms that actually are in dataset
        path = [x for x in nx.shortest_path(hpo_tree, term, 'HP:0000001') if x in data.columns if nx.has_path(hpo_tree, term, 'HP:0000001')]
        if len(path) > 0:
            term_list.add(path[0])
    return term_list

In [None]:
#load data
#github does not allow files larger than 100 MB, so I splitted our model
model_ranks = pd.read_pickle("data/global_ind_pheno_tumor_munge.best_pred.00.pickle")
for chunk in range(1, 9):
    model_ranks = pd.concat([model_ranks, pd.read_pickle("data/global_ind_pheno_tumor_munge.best_pred.%02d.pickle" % chunk)])
    
#scored terms with some annotation
hp_annot = pd.read_pickle("data/global_ind_pheno_tumor_munge.best.pickle")
#combined ranks for OMIM diseases
omim = pd.read_pickle("data/OMIM_annotation_data.pickle")
#the HPO obo tree
hpo_net = nx.gpickle.read_gpickle("data/hp.180127.obo.gpickle")

In [None]:
hpo_ids = list(hpo_net.nodes())
hpo_ids.sort()
hpo_names = [hpo_net.node[x]['name'] for x in hpo_ids]
hpo_terms = ["%s: %s" % (hpo_ids[x], hpo_names[x]) for x in range(len(hpo_ids))]
selected_terms = set()

#define the widget for HPO search
#the elements
gene_list = ipw.Textarea(
    placeholder='Enter your gene list',
    description='Genes:',
    disabled=False
)

search_widget = ipw.Text(placeholder='Start typing phenotypes...') #to search the phenotypes

options_widget = ipw.SelectMultiple(options=hpo_terms) #this lists all terms to be selected
add_button = ipw.Button(description='Add term')
remove_button = ipw.Button(description='Remove term')
selected_widget = ipw.SelectMultiple()
submit_button = ipw.Button(description='Submit')
reset_button = ipw.Button(description='Reset')

#stitch together
left_area = ipw.VBox([options_widget, add_button])
right_area = ipw.VBox([selected_widget, remove_button])
bottom_area = ipw.HBox([reset_button, submit_button])
selection_area = ipw.HBox([left_area, right_area])
multi_select = ipw.VBox([search_widget, selection_area])

#define actions
def on_text_change(change):
    search_input = change['new']
    if search_input == '':
        # Reset search field
        new_options = hpo_terms
    else:
        # Filter by search field 
        new_options = [x for x in hpo_terms if search_input.lower() in x.lower()]
    options_widget.options = new_options

def on_add(b):
    new_options = set([x for x in selected_widget.options] + [x for x in options_widget.value])
    new_options = list(new_options)
    new_options.sort()
    selected_widget.options = new_options

def on_remove(b):
    v = [x for x in selected_widget.options if not x in selected_widget.value]
    selected_widget.options = v

def on_submit(b):
    pass

def on_reset(b):
    search_widget.value = ''
    gene_list.value = ''
    selected_widget.options = ()

#link actions
add_button.on_click(on_add)    
remove_button.on_click(on_remove)    
submit_button.on_click(on_submit)
reset_button.on_click(on_reset)
search_widget.observe(on_text_change, names='value')

#display
display(gene_list, multi_select, bottom_area)
