# AML-only map with new interactive prognostic and race histograms

6/17/2024

The map now displays only samples from pediatric AML clinical trials. You may select clusters and their prognosis will appear below, as well as race distribution.

In [1]:
import pandas as pd
import sys
sys.path.append('../')
from source.bokeh_plots_temp import *
from source.data_visualization import *
output_notebook()

file_path = '../data/alma_main_results.xlsx'
model_name = 'AML Epigenomic Risk'

# Read the data
df = pd.read_excel(file_path, index_col=0).sort_index()

df = df[df['Clinical Trial'].isin(['AAML0531', 'AAML1031', 'AAML03P1', 'CCG2961', 'Japanese AML05', 'AML02', 'AML08'])]

# Define train and test samples
df_train = df[df['Train-Test']=='Train Sample']
df_test = df[df['Train-Test'] == 'Test Sample']

# Drop the samples with missing labels for the selected column
df_px = df_train[~df_train['Vital Status'].isna()]
df_px = df_px[~df_px['Batch'].isin(['GDC_TARGET-ALL'])]

# drop the samples with missing labels for the ELN AML 2022 Diagnosis
df_dx = df_train[~df_train['WHO 2022 Diagnosis'].isna()]

# exclude the classes with fewer than 10 samples
df_dx = df_dx[~df_dx['WHO 2022 Diagnosis'].isin([
                                       'MPAL with t(v;11q23.3)/KMT2A-r',
                                       'B-ALL with hypodiploidy',
                                       'AML with t(16;21); FUS::ERG',
                                       'AML with t(9;22); BCR::ABL1'
                                       ])]

### Select samples from COG AAML1031, 0531, and 03P1 Dx samples
df_cog = df[df['Clinical Trial'].isin(['AAML0531', 'AAML1031', 'AAML03P1'])]
df_cog = df_cog[df_cog['Sample Type'].isin(['Diagnosis', 'Primary Blood Derived Cancer - Bone Marrow',
                                            'Primary Blood Derived Cancer - Peripheral Blood'])]
df_cog = df_cog[~df_cog['Patient_ID'].duplicated(keep='last')]

In [2]:
plot_linked_histograms5(df)

## Watermark

In [3]:
%load_ext watermark

In [4]:
# watermark with all libraries used in this notebook
%watermark -v -p pandas,seaborn,matplotlib,bokeh -a Francisco_Marchi@Lamba_Lab_UF -d -m

Author: Francisco_Marchi@Lamba_Lab_UF

Python implementation: CPython
Python version       : 3.10.13
IPython version      : 8.20.0

pandas    : 2.2.0
seaborn   : 0.13.2
matplotlib: 3.8.2
bokeh     : 3.3.4

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 5.15.133.1-microsoft-standard-WSL2
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit

