# eDISH Plots for Office of Women's Health 

This notebook is for creating interactive [eDISH plots](https://pubmed.ncbi.nlm.nih.gov/21332248/#:~:text=eDISH%20(evaluation%20of%20Drug%2DInduced,laboratory%20data%20for%20each%20subject).  

In [None]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import glob
import matplotlib.pylab as plt
from matplotlib.ticker import NullFormatter
from numpy.polynomial.polynomial import polyfit

import os, math
import numpy as np

from send import send_db

In [None]:
import re

def filter_text(x):
    
    """ returns null if x does not contain a valid numeric response,
    else it extracts that using a regex pattern """
    digit_pattern = r'[-+]?([0-9]*\.[0-9]+|[0-9]+)'
    digit_extract = re.search(digit_pattern, str(x))
    if digit_extract:
        return float(digit_extract.group(0))
    return np.nan

def regrex_match(regrex, string: str):
    """ generic function to just max a string of text """
    pattern = re.compile(regrex, re.IGNORECASE)
    match = pattern.search(string)
    if match:
        return True
    return False


def is_cholestasis(finding: str):
    """ takes a text string from an MI finding and classifies whether or not
        the finding can be classified as cholestasis

        wikipedia link for Cholestasis: https://en.wikipedia.org/wiki/Cholestasis
    """
    regrex = r'chol(e|o|a)|bil(i|e)'
    return regrex_match(regrex, finding)

def is_steatosis(finding: str):

    """ takes a text string from an MI finding and classifies whether or not
        the finding can be classified as steatosis

        wikipedia link for steatosis: https://en.wikipedia.org/wiki/Steatosis
    """
    steatosis_regrex = r'fat|lipid|vacuol|acc|steat|congest'
    increased_regrex = r'decreas|lower'
    return regrex_match(steatosis_regrex, finding) and not regrex_match(increased_regrex, finding)

def is_necrosis(finding: str):
    """ takes a text string from an MI finding and classifies whether or not
        the finding can be classified as necrosis

        """
    steatosis_regrex = r'necros|fibros|degen|atroph|apop|deplet'
    return regrex_match(steatosis_regrex, finding)


def classify_helper(helper_fx, findings):
    """ goes through all the findings and returns if the helper function results in true """
    for finding in findings:
        if helper_fx(finding):
            return 1
    return 0

from functools import partial
classify_steatosis = partial(classify_helper, is_steatosis)
classify_cholestasis = partial(classify_helper, is_cholestasis)
classify_necrosis = partial(classify_helper, is_necrosis)

def get_classified_liver_results():
    """ will pull all liver results and classfy them as either necrosis, steatosis, cholestasis """
    mi = send_db.generic_query('SELECT STUDYID, USUBJID, MISTRESC FROM MI WHERE MISPEC="LIVER"')
    mi['STEATOSIS'] = mi.groupby(['STUDYID', 'USUBJID'])['MISTRESC'].transform(classify_steatosis)
    mi['CHOLESTASIS'] = mi.groupby(['STUDYID', 'USUBJID'])['MISTRESC'].transform(classify_cholestasis)
    mi['NECROSIS'] = mi.groupby(['STUDYID', 'USUBJID'])['MISTRESC'].transform(classify_necrosis)
    mi['MISTRESC'] = mi.groupby(['STUDYID', 'USUBJID'])['MISTRESC'].transform(lambda x: ';'.join(x))
    return mi.drop_duplicates(['STUDYID', 'USUBJID'])

# Set Global Variables

There are two global variables that need to be set relative to the ineractive eDISH plots that will be made.  

1) `SPECIES`: the species, in SEND controlled terminology to be used to create plots.  
2) `LBSPEC`: the specimen, in SEND controlled terminology to be used for the ALT and BILI responses.  This refers to the "tissue" or "component" the sample is taken from (i.e., plasma or serum).  

In [None]:
SPECIES = 'RAT'
LBSPEC = 'SERUM'

### Select animals

In [None]:
animals = send_db.get_all_animals()
animals = animals[animals.SPECIES == SPECIES]
print(animals.head())

Get all the ALT and BILI responses from for the target animals. 

In [None]:
clin_chem = send_db.generic_query(f'SELECT USUBJID, LBTESTCD, LBSPEC, LBSTRESC, LBSTRESU FROM LB WHERE (LBTESTCD == "ALT" OR LBTESTCD == "BILI") AND (LBSPEC == "{LBSPEC}")')

clin_chem = clin_chem[clin_chem.USUBJID.isin(animals.USUBJID)]

print(clin_chem.head())

### Check unit for ALT and BILI.

From previous tests, there appears to be only 2 unique units provided for each test.  

ALT -> UI/L or IU/L or ukat/L  
BILI -> mg/dL or umol/L

In [None]:
print("Unique ALT tests: ", clin_chem[clin_chem.LBTESTCD == 'ALT'].LBSTRESU.unique())
print("Unique BILI tests: ", clin_chem[clin_chem.LBTESTCD == 'BILI'].LBSTRESU.unique())

### Conversion

1) Remove unitless responses  
2) Use `filter_text` to extract numeric values from discrete responses (e.g., <0.1)   
3a) ALT -> convert ukat/L to IU/L or U/L by multiplying by 1/0.0167  
3b) BILI -> convert umol/L  to mg/dL by multiplying by 1/17.104    

In [None]:

clin_chem = clin_chem[clin_chem.LBSTRESU != '']

clin_chem['LBSTRESC_T'] = clin_chem.LBSTRESC.apply(filter_text)
clin_chem = clin_chem[clin_chem.LBSTRESC_T.notnull()]

clin_chem['LBSTRESC_CONV'] = clin_chem.LBSTRESC_T

clin_chem.loc[(clin_chem.LBTESTCD == 'ALT') & (clin_chem.LBSTRESU == 'ukat/L'), 'LBSTRESC_CONV'] = clin_chem.loc[(clin_chem.LBTESTCD == 'ALT') & (clin_chem.LBSTRESU == 'ukat/L'), 'LBSTRESC_T'] * (1/0.0167)
clin_chem.loc[(clin_chem.LBTESTCD == 'BILI') & (clin_chem.LBSTRESU == 'umol/L'), 'LBSTRESC_CONV'] = clin_chem.loc[(clin_chem.LBTESTCD == 'BILI') & (clin_chem.LBSTRESU == 'umol/L'), 'LBSTRESC_T'] * (1/17.104)
clin_chem.head()

### Transform data

Merge the converted data with the animal meta data, then take the max response for each animal/LBTESTCD.

Pivot the data from long to wide format.

In [None]:
animal_clin_chem = animals.merge(clin_chem)

animal_clin_chem.loc[:, 'LBSTRESC_MAX'] = animal_clin_chem.groupby(['STUDYID', 'USUBJID',  'LBTESTCD'])['LBSTRESC_CONV'].transform('max')
max_responses = animal_clin_chem.drop_duplicates(['STUDYID', 'USUBJID',  'LBTESTCD'])


data = max_responses.pivot_table(index=['STUDYID', 'USUBJID'], 
                                            columns='LBTESTCD', 
                                            values='LBSTRESC_MAX').reset_index()

data = data[data[['ALT', 'BILI']].notnull().all(1)]

animal_data = animals.merge(data)
print(animal_data.head())

### Merge histopath findings

In [None]:
liver_findings = get_classified_liver_results()

animal_data = animal_data.merge(liver_findings)
print(animal_data.head())

### Load Bokeh

Bokeh is nice for interactions, so load that for these plots.

In [None]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Span
output_notebook()

### Establishing upper limit of normal 

For this we'll need to identify control animals.  The `send_db` object has a useful function to get control animals.  

After that we plot the log normal distrubtions of the 

In [None]:
control_animals = send_db.get_control_animals().merge(animals)
control_animals_data = control_animals.merge(data)
control_animals_data.head()

In [None]:
def make_histogram(data, title, n_bins=100):
    """ make a bokeh histogram from a log normal distribution """
    import numpy as np
    from scipy.stats import lognorm
    
    hist, edges = np.histogram(data, density=True, bins=n_bins)
    f = figure(tools='box_zoom', background_fill_color="#fafafa", title=title)
    f.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="navy", line_color="white", alpha=0.5)

    # lognormal distribution 
    s, loc, scale =  lognorm.fit(data, floc=0)
    xmin = data.min()
    xmax = data.max()
    x = np.linspace(xmin, xmax, 100)
    pdf = lognorm.pdf(x, s, scale=scale)
    
    max_95 = lognorm.ppf(0.95, s, scale=scale)
    f.line(x, pdf, line_color="#ff8888", line_width=4, alpha=0.7, legend_label="PDF")
    vline = Span(location=max_95, dimension='height', line_dash='dotdash', line_color="black", line_alpha=0.7, line_width=3)
    f.line([], [], legend_label='ULN', line_dash='dotdash', line_color="black", line_alpha=0.7, line_width=3)
    f.renderers.extend([vline])
    return f , max_95

In [None]:
ULNs = {}

alt_data_M = control_animals_data.loc[control_animals_data.SEX == 'M', 'ALT']
alt_data_F = control_animals_data.loc[control_animals_data.SEX == 'F', 'ALT']
bili_data_M = control_animals_data.loc[control_animals_data.SEX == 'M', 'BILI']+0.001
bili_data_F = control_animals_data.loc[control_animals_data.SEX == 'F', 'BILI']+0.001


alt_m_f, ULNs['ALT - M'] = make_histogram(alt_data_M, 'ALT - Male')
alt_f_f, ULNs['ALT - F'] = make_histogram(alt_data_F, 'ALT - Female')
bili_m_f, ULNs['BILI - M'] = make_histogram(bili_data_M, 'BILI - Male', n_bins=50)
bili_f_f, ULNs['BILI - F'] = make_histogram(bili_data_F, 'BILI - Female', n_bins=50)

show(gridplot([[alt_m_f, alt_f_f],
               [bili_m_f, bili_f_f]]))

### Controls

Add a label if they're control or not

In [None]:
animal_data.loc[:, 'IS_CONTROL'] = animal_data.USUBJID.isin(control_animals.USUBJID)

In [None]:
from bokeh.models import CustomJS, Slider
from bokeh.layouts import column, row, Spacer, layout
from bokeh.models import ColumnDataSource, BooleanFilter, CDSView, Label
from bokeh.models.widgets import DataTable, TableColumn, NumberFormatter, Div, Tabs, Panel, Spinner, TextInput
from bokeh.transform import factor_cmap
from bokeh.plotting import output_file

import numpy as np
from scipy.stats import lognorm, norm



def make_edish_dashboard(data, disease, sex):
    
    
    # plot aestetic settings
    SIZE = 7.5
    BACKGROUND_FILL_COLOR = '#F7F7F7'
    POSITIVE_COLOR = 'red'
    NEGATIVE_COLOR = 'black'
    ALPHA = 0.4
    THRESHOLD_LINE_COLOR = 'black'
    THRESHOLD_LINE_ALPHA = 0.6
    HISTOGRAM_BIN_SIZE = 25
    LINE_COLOR = 'black'
    
    
    
    animal_data_sex = data[data.SEX == sex]
    animal_data_sex.loc[:, 'ALT'] = animal_data_sex['ALT'].apply(lambda x: math.log10(x))
#     animal_data_sex.loc[:, 'BILI'] =  animal_data_sex['BILI']
    animal_data_sex.loc[animal_data_sex.BILI < 0, 'BILI'] = 0
    animal_data_sex.loc[:, 'BILI'] = animal_data_sex.BILI + 0.001
    animal_data_sex.loc[:, 'BILI'] = animal_data_sex.loc[:, 'BILI'].apply(lambda x: math.log10(x))

    animal_data_sex.loc[:, 'DISEASE'] = animal_data_sex.loc[:, disease]

    alt_uln = math.log10(ULNs['ALT - {}'.format(sex)]*3)
    bili_uln = math.log10(ULNs['BILI - {}'.format(sex)]*2)
    
    TITLE = f'eDISH plot for {animal_data_sex.shape[0]:,} {sex} {SPECIES} with {disease} colorized'

    animal_source = ColumnDataSource(animal_data_sex)

    p = figure()


    # filter positive animals
    positives = [bool(d) for d in animal_source.data[disease]]
    negatives = [not bool(d) for d in animal_source.data[disease]]
    pos_view = CDSView(source=animal_source, filters=[BooleanFilter(positives)])
    neg_view = CDSView(source=animal_source, filters=[BooleanFilter(negatives)])



    vline = Span(location=alt_uln, dimension='height', line_color=THRESHOLD_LINE_COLOR, line_alpha=THRESHOLD_LINE_ALPHA, line_width=4)
    hline = Span(location=bili_uln, dimension='width', line_color=THRESHOLD_LINE_COLOR, line_alpha=THRESHOLD_LINE_ALPHA, line_width=4)
    
    xULN_bili = (10**bili_uln) / ULNs['BILI - {}'.format(sex)]
    xULN_alt = (10**alt_uln) / ULNs['ALT - {}'.format(sex)]

    


    p.scatter(x='ALT', y='BILI',
              size=SIZE,
              fill_color=NEGATIVE_COLOR,
              fill_alpha=ALPHA,
              line_color=LINE_COLOR,
              source=animal_source,
              view=neg_view)

    p.scatter(x='ALT', y='BILI',
              size=SIZE,
              fill_color=POSITIVE_COLOR,
              fill_alpha=ALPHA, 
              line_color=LINE_COLOR,
              source=animal_source,
              view=pos_view)


    p.yaxis.axis_label = "log(PEAK BILI mg/dL)"
    p.xaxis.axis_label = "log(PEAK ALT U/L)"

    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
    
    bili_slider = Slider(start=animal_data_sex.loc[:, 'BILI'].min(), end=animal_data_sex.loc[:, 'BILI'].max(), value=bili_uln, step=0.01, title="BILI Threshold", name='biliSlider')
    alt_slider = Slider(start=animal_data_sex.loc[:, 'ALT'].min(), end=animal_data_sex.loc[:, 'ALT'].max(), value=alt_uln, step=0.01, title="ALT Threshold", name='altSlider')

    bili_slider.js_link('value', hline, 'location')
    alt_slider.js_link('value', vline, 'location')

    p.renderers.extend([vline, hline])



    above_hyslaw = sum((animal_data_sex.BILI > bili_uln) & (animal_data_sex.ALT > alt_uln))
    positive_above_hyslaw = sum((animal_data_sex.BILI > bili_uln) & (animal_data_sex.ALT > alt_uln) & (animal_data_sex[disease] == 1))



    

    table_data = {
        'above_hl': [above_hyslaw],
        'pos_above_hl': [positive_above_hyslaw],
#         'total_disease': [(animal_data_sex[disease] == 1).sum()],
        'BILI_x_ULN': [xULN_bili],
        'ALT_x_ULN': [xULN_alt],
#         'total' : [animal_data_sex.shape[0]],
        'sensitivity': [positive_above_hyslaw / (animal_data_sex[disease] == 1).sum()],
        'ppv': [positive_above_hyslaw / above_hyslaw]
    }

    table_source = ColumnDataSource(pd.DataFrame(table_data))



    columns = [
            TableColumn(field="above_hl", title="#\nAbove\nHy's Law"),
            TableColumn(field="pos_above_hl", title="#\nPositive\nAbove\nHy's Law"),
            TableColumn(field="sensitivity", title="Sensitivity", formatter=NumberFormatter(format='0.00 %')),
            TableColumn(field="ppv", title="PPV", formatter=NumberFormatter(format='0.00 %')),   
#             TableColumn(field="total_disease", title="#\nPositive"),
#             TableColumn(field="total", title="Total\nAnimals"), 
        ]

    data_table = DataTable(source=table_source, columns=columns, index_position=None)


    # Custom JavaScript to update all 
    # the DOM elements 
    
 

    
    
    # top histogram plotting ALT responses
    
    _, e = np.histogram(animal_data_sex.loc[:, 'ALT'], density=False, bins=HISTOGRAM_BIN_SIZE)
    hist_n, edges_n = np.histogram(animal_data_sex.loc[animal_data_sex.DISEASE == 0, 'ALT'], density=False, bins=e)
    hist_p, edges_p = np.histogram(animal_data_sex.loc[animal_data_sex.DISEASE == 1, 'ALT'], density=False, bins=e)

    h1 = figure(tools='box_zoom', 
               x_axis_location=None, 
#                y_axis_location=None,
               height=200,
               y_axis_type='linear',
               x_axis_type='linear',  sizing_mode='fixed')
    
    h1.quad(top=hist_n, bottom=0, left=edges_n[:-1], right=edges_n[1:], fill_color=NEGATIVE_COLOR, fill_alpha=ALPHA, line_color=LINE_COLOR)
    h1.quad(top=hist_n+hist_p, bottom=hist_n, left=edges_p[:-1], right=edges_p[1:], fill_color=POSITIVE_COLOR, fill_alpha=ALPHA, line_color=LINE_COLOR)
    
    h1.x_range = p.x_range
    
    h1.height = 200
    
    label_y = (hist_n+hist_p).max() *0.5

    uln_alt_text = Label(text_color=LINE_COLOR, text=f"{xULN_alt:.2f} x ULN", x=alt_uln, y=label_y, x_offset=5)
    alt_slider.js_link('value', uln_alt_text, 'x')
    
    h1.renderers.extend([vline, uln_alt_text])
    h1.plot_width = p.plot_width
    
   
    
    

    
    # vertical histogram plotting BILI responses
    
    _, e = np.histogram(animal_data_sex.loc[:, 'BILI'], density=False, bins=HISTOGRAM_BIN_SIZE)
    hist_n, edges_n = np.histogram(animal_data_sex.loc[animal_data_sex.DISEASE == 0, 'BILI'], density=False, bins=e)
    hist_p, edges_p = np.histogram(animal_data_sex.loc[animal_data_sex.DISEASE == 1, 'BILI'], density=False, bins=e)

    h2 = figure(tools='box_zoom', 
#                x_axis_location=None, 
               y_axis_location=None,
               width=200,
               height=p.plot_height, 
               y_axis_type='linear',
               x_axis_type='linear', sizing_mode='fixed')
    
    h2.quad(right=hist_n, left=0, top=edges_n[:-1], bottom=edges_n[1:], fill_color=NEGATIVE_COLOR, fill_alpha=ALPHA, line_color=LINE_COLOR)
    h2.quad(right=hist_n+hist_p, left=hist_n, top=edges_p[:-1], bottom=edges_p[1:], fill_color=POSITIVE_COLOR, fill_alpha=ALPHA, line_color=LINE_COLOR)
    
    h2.y_range = p.y_range

    h2.xaxis.major_label_orientation = np.pi/4
   

    label_x = (hist_n+hist_p).max() *0.5
    
    uln_bili_text = Label(text_color=LINE_COLOR, text=f"{xULN_bili:.2f} x ULN", y=bili_uln, x=label_y, y_offset=5)
    bili_slider.js_link('value', uln_bili_text, 'y')
    
    h2.renderers.extend([hline, uln_bili_text])
    
    # add widgets for spinners
    bili_spinner = TextInput(title="BILI x ULN ", value=str(round(xULN_bili, 2)), width=100)
    alt_spinner = TextInput(title="ALT x ULN ", value=str(round(xULN_alt, 2)), width=100)
    
    update_from_spinner = CustomJS(args=dict(
                                altSpinner=alt_spinner, 
                                biliSpinner=bili_spinner,
                                biliSlider=bili_slider, 
                                altSlider=alt_slider, 
                                biliULN=ULNs['BILI - {}'.format(sex)],
                                altULN=ULNs['ALT - {}'.format(sex)]
                                    ), code="""
                        

            var BILI_x_ULN = parseFloat(biliSpinner.value);
            var ALT_x_ULN = parseFloat(altSpinner.value);
            
            biliSlider.value = Math.log(BILI_x_ULN*biliULN);
            altSlider.value = Math.log(ALT_x_ULN*altULN);
                """)
    
    bili_spinner.js_on_change('value', update_from_spinner)
    alt_spinner.js_on_change('value', update_from_spinner)
    
    # update threshold labels for 
    # the histograms
    change_label = CustomJS(args=dict(
                                altText=uln_alt_text, 
                                biliText=uln_bili_text,
                                biliSlider=bili_slider, 
                                altSlider=alt_slider, 
                                biliULN=ULNs['BILI - {}'.format(sex)],
                                altULN=ULNs['ALT - {}'.format(sex)]
                                    ), code="""
            var biliThreshold = biliSlider.value;
            var altThreshold = altSlider.value;
            var BILI_x_ULN = ((10**biliThreshold) / biliULN).toFixed(2);
            var ALT_x_ULN = ((10**altThreshold) / altULN).toFixed(2);
            altText.text = String(ALT_x_ULN) + " x ULN"
            biliText.text = String(BILI_x_ULN) + " x ULN"
                """)

    bili_slider.js_on_change('value', change_label)
    alt_slider.js_on_change('value', change_label)
    
    
    
    update = CustomJS(args=dict(table_source=table_source, 
                                animal_source=animal_source, 
#                                 altSpinner=alt_spinner,
#                                 biliSpinner=bili_spinner,
                                biliSlider=bili_slider, 
                                altSlider=alt_slider, 
                                biliULN=ULNs['BILI - {}'.format(sex)],
                                altULN=ULNs['ALT - {}'.format(sex)],
                                totalDisease=(animal_data_sex[disease] == 1).sum(),
                                    ), code="""
            var biliThreshold = biliSlider.value;
            var altThreshold = altSlider.value;
            var aData = animal_source.data;
            var tData = table_source.data;
            var aboveHl = 0;
            var posAboveHl = 0;

            for (var i = 0; i < aData.ALT.length; i++) {
                if (aData.ALT[i] > altThreshold && aData.BILI[i] > biliThreshold) {
                  aboveHl++;
                }
                if (aData.ALT[i] > altThreshold && aData.BILI[i] > biliThreshold && aData.DISEASE[i] == 1) {
                  posAboveHl++;
                } 
            }
            tData.above_hl = [aboveHl];
            tData.pos_above_hl = [posAboveHl];
            tData.sensitivity = [posAboveHl / totalDisease];
            tData.ppv = [posAboveHl / aboveHl];
            table_source.change.emit()
            //altSpinner.value = String(((10**altThreshold) / altULN).toFixed(2));
            //biliSpinner.value = String((10**biliThreshold) / biliULN).toFixed(2));

                """)

    bili_slider.js_on_change('value', update)
    alt_slider.js_on_change('value', update)
    

    
    # data table length and width 
    data_table.width = p.plot_width
    data_table.height = 75   
    
    # set background colors
    for plot in [p, h1, h2]:
        plot.background_fill_color = BACKGROUND_FILL_COLOR
    
 
    header = div = Div(text=f"""Interactive eDISH for {animal_data_sex.shape[0]:,} {'male' if sex == 'M' else 'female'} {SPECIES}.<br>
                                {(animal_data_sex.DISEASE == 1).sum():,} are positive for {disease.lower()}.<br>
                                ULN for AST was found to be {ULNs['ALT - {}'.format(sex)]:.2f} U/L.<br>
                                ULN for BILI was found to be {ULNs['BILI - {}'.format(sex)]:.2f} mg/dL.""",
                        width=p.plot_width, height=100, style={'font-size': '125%'})

    
    return gridplot([   [row(header)],
                        [row(data_table)],
                        [row(column(bili_slider, alt_slider), 
#                               column(bili_spinner, alt_spinner)
                            ), None],
                        [h1, None],
                        [p, h2],
                     ])


In [None]:
tabs = []

for disease in ['NECROSIS', 'STEATOSIS', 'CHOLESTASIS']:
    top_widget = make_edish_dashboard(animal_data, disease, 'M')
    bottom_widget = make_edish_dashboard(animal_data, disease, 'F')

    l = row(top_widget, Spacer(width=70), bottom_widget)
    tabs.append(Panel(child=l, title=disease.title()))
output_file(f"figures/interactive_edish_{SPECIES}.html")
show(Tabs(tabs=tabs))