May have to run through DataProcessor().apply_pacmap() once again. (Inside source/pacmap_functions.py) since samples went from 1500 to 900

# Import Libraries

In [39]:
from bokeh.models import (DataTable, HoverTool, IntEditor, TableColumn,
                          NumberEditor, NumberFormatter, SelectEditor,
                          StringEditor, StringFormatter, Div, Slider, TabPanel,
                          Tabs, Legend, ColumnDataSource, LegendItem, Span,
                          BoxAnnotation, Label, Segment)
from bokeh.models import CDSView, GroupFilter
from bokeh.sampledata.autompg2 import autompg2 as mpg
from bokeh.io import curdoc, output_notebook
from bokeh.transform import factor_cmap
from bokeh.plotting import figure, show
from bokeh.embed import json_item
from bokeh.layouts import layout
from bokeh.themes import Theme
import pandas as pd
import numpy as np
import json
import sys

from bokeh.models import CategoricalColorMapper

from bokeh.layouts import column
from bokeh.models import Column

# Import Data

In [40]:
sys.path.append('../')
mount = '/mnt/d/'
input_path = mount + 'MethylScore/Intermediate_Files/'
output_path = mount + 'MethylScore/Processed_Data/'

# Load clinical data
discovery_clinical_data = pd.read_csv(input_path + 'discovery_clinical_data.csv',
                                      low_memory=False, index_col=0)

# Load clinical data
validation_clinical_data = pd.read_csv(input_path + 'validation_clinical_data.csv',
                                        low_memory=False, index_col=0)

# Adjust clinical data
discovery_clinical_data['Train Test'] = 'Discovery (train) Samples'
validation_clinical_data['Train Test'] = 'Validation (test) Samples'

discovery_clinical_data['PaCMAP Output'] = 'Patient Samples'
validation_clinical_data['PaCMAP Output'] = 'Patient Samples'

# Set the theme for the plot
curdoc().theme = 'light_minimal' # or 'dark_minimal'

df2 = pd.read_csv(output_path+'pacmap_output/pacmap_2d_model_dx_al.csv', index_col=0)

# Concatenate discovery and validation clinical data
clinical_data = pd.concat([discovery_clinical_data, validation_clinical_data])

# Select columns to plot
cols = ['WHO 2022 Diagnosis']

# Join clinical data to the embedding
df2 = df2.join(clinical_data[cols], rsuffix='_copy', on='index')

methylscore_df = pd.read_excel(output_path + 'multivariate_cox_lasso/ewas_cog_os_MethylScoreAML_Px.xlsx', index_col=0)

# Concatenate df2 with temp based on the index
df3 = df2.join(methylscore_df[['WHO 2022 Diagnosis', 'Vital Status', 'First Event', 'Clinical Trial', 'MethylScoreAML_Px']], rsuffix='_copy', on='index').dropna(subset=['MethylScoreAML_Px'])

df3 = df3.sort_values(by=['MethylScoreAML_Px'])

df3 = df3.reset_index(drop=True)  

# Normalize the Patient Number between 0 and 100 in a new column called "Percentile"
df3['Percentile'] = df3.index / (len(df3.index) - 1)

# Concatenate MethylScore to 2 decimal places
df3['MethylScoreAML_Px'] = df3['MethylScoreAML_Px'].round(2)

In [41]:
output_notebook()

# Bokeh Plot

In [48]:
# Create a ColumnDataSource from df: source
source = ColumnDataSource(df3)

# Initialize Bokeh Document with theme
curdoc().theme = Theme(json={
    "attrs": {
        "Axis": {
            "major_label_text_color": 'black',
            "major_label_text_font": 'Arial',
        },
        "Legend": {
            "label_text_color": 'black',
            "label_text_font": 'Arial',
        },
        "Title": {
            "text_color": 'black',
            "text_font": 'Arial',
        },
    }
})

p1 = figure(width=450, height=450, 
            tools="pan,wheel_zoom,box_zoom,xbox_select,reset", 
            active_drag="xbox_select")
p1.toolbar.logo = None

BINARY_THRESHOLD = 0.2208

# Set the x_range and y_range of the plot to (-3, 3) and (-0.05, 1.1) respectively
p1.x_range.bounds = (-3, 3)
p1.x_range.start = -3  
p1.x_range.end = 3
p1.y_range.bounds = (-0.05, 1.1)
p1.y_range.start = -0.05
p1.y_range.end = 1.1

# Add circle glyphs to the figure p with the selected and non-selected properties
p1.circle(x='MethylScoreAML_Px', y='Percentile', source=source, selection_color='#ff7f0e', 
nonselection_alpha=1.0, color='#1f77b4', size=5, alpha=0.8, hover_color='#ff7f0e', hover_alpha=1.0)

# Axis labels and range
p1.xaxis.axis_label = "MethylScore"
p1.yaxis.axis_label = "Probability of AML Progression"
p1.xaxis.axis_label_text_font_size = "10pt"
p1.yaxis.axis_label_text_font_size = "10pt"
p1.xaxis.axis_label_text_font_style = "normal"
p1.yaxis.axis_label_text_font_style = "normal"
p1.xaxis.axis_label_text_font = "Arial"
p1.yaxis.axis_label_text_font = "Arial"
p1.xaxis.axis_label_text_color = "black"
p1.yaxis.axis_label_text_color = "black"


# Add vertical line at the binary threshold
p1.add_layout(Span(location=BINARY_THRESHOLD, dimension='height', line_color='black'))

# Text annotations for risk categories
labels = [
    Label(x=-BINARY_THRESHOLD, y=20, text="Low Risk", text_color='black', text_align='center', y_units='screen', text_font_size='10pt'),
    Label(x=BINARY_THRESHOLD + 0.45, y=20, text="High Risk", text_color='black', text_align='center', y_units='screen', text_font_size='10pt'),
]

for label in labels:
    p1.add_layout(label)

columns = [
    TableColumn(field="WHO 2022 Diagnosis", title="WHO 2022 Diagnosis"),
    TableColumn(field="Vital Status", title="Vital Status"),
    TableColumn(field="First Event", title="First Event"),
    TableColumn(field="Clinical Trial", title="Clinical Trial"),
    TableColumn(field="MethylScoreAML_Px", title="MethylScoreAML_Px")
]
data_table = DataTable(source=source, columns=columns, width=450, editable=True)

# Define the tooltips for the HoverTool
tooltips = [
    ("MethylScore", "@MethylScoreAML_Px"),
    ("Percentile", "@Percentile"),
]
hover = HoverTool(tooltips=tooltips)
p1.add_tools(hover)

layout = column(p1, data_table)

show(layout)

In [32]:

#### ------------------ PaCMAP Plot ------------------ ####

# Custom color palette
custom_color_palette = [
    '#1f77b4',  # Vivid blue
    '#ff7f0e',  # Vivid orange 
    '#2ca02c',  # Vivid green
    '#d62728',  # Vivid red
    '#9467bd',  # Vivid purple 
    '#7f7f7f',  # Medium gray
    '#e377c2',  # Pink
    '#e7ba52',  # Light orange
    '#bcbd22',  # Olive
    '#17becf',  # Light blue
    '#393b79',  # Dark blue
    '#8c564b',  # Brown
    '#f7b6d2',  # Light pink
    '#c49c94',  # Light brown
    '#a2769e',  # Soft purple
    '#dbdb8d',  # Pale yellow
    '#9edae5',  # Pale cyan
    '#c5b0d5',  # Pale purple
    '#c7c7c7',  # Light gray
    '#ff9896',  # Light red
    '#637939',  # Dark olive
    '#aec7e8',  # Light blue
    '#ffbb78',  # Light orange
    '#98df8a',  # Light green
    '#7c231e',  # Dark red
    '#3d6a3d',  # Dark green
    '#f96502',  # Deep orange
    '#6d3f7d',  # Deep purple
    '#6b4423',  # Dark brown
    '#d956a6'   # Hot pink
]

# Initial setup
title = ''
x_range = (-45, 45)
y_range = (-45, 45)
datapoint_size = 5
tooltip_dx_cols = 'WHO 2022 Diagnosis'
width = 1000
height = 1000

tabs = []
slider = Slider(title="Adjust datapoint size", start=0, end=10, step=1, value=datapoint_size)

for col in cols:
    factors = [str(val) for val in df3[col].unique() if pd.notnull(val)]
    color_mapper = CategoricalColorMapper(factors=factors, palette=custom_color_palette)

    p = figure(title=title, width=width, height=height, x_range=x_range, y_range=y_range, 
               tools="pan,wheel_zoom,reset,save,box_select", tooltips=[("Dx", "@{"+tooltip_dx_cols+"}")], 
               x_axis_label='Longitude (PaCMAP 1)', y_axis_label='Latitude (PaCMAP 2)')
    p.toolbar.logo = None
    p.toolbar_location = 'above'

    # Create scatter plot for each factor
    for factor in factors:
        view = CDSView(filter=GroupFilter(column_name=col, group=factor))
        p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source, view=view, 
                  color={'field': col, 'transform': color_mapper})

    # Create a list of legend items
    legend_items = [LegendItem(label=factor, renderers=[r]) for factor, r in zip(factors, p.renderers)]

    # Create a legend
    legend = Legend(items=legend_items, location="center")

    # Add the legend to the plot
    p.add_layout(legend, 'below')

    tab = TabPanel(child=p, title=col)
    tabs.append(tab)

tabs_control = Tabs(tabs=tabs, tabs_location='left')

layout = column(tabs_control, slider)
show(layout)

In [34]:
layout = column(tabs_control, slider)
show(layout)

In [None]:

# layout = column(p1, data_table, tabs_control, slider)
layout = column(p1, data_table, slider)

show(layout)

In [31]:
tabs

[TabPanel(id='p13354', ...)]

In [18]:
# Export as BokehJS

json_layout = json.dumps(json_item(layout, "myplot"))
with open('risk_pacmap_plot.json', 'w') as f:
    f.write(json_layout)

# Export as HTML

# from bokeh.resources import CDN
# from bokeh.embed import file_html

# html = file_html(layout, CDN, "myplot")
# with open('risk_pacmap_plot.html', 'w') as f:
#     f.write(html)