# Reclassification to WHO and ELN 2022 criteria

```{contents}
```

## Guidelines

### WHO 2022

### ELN 2022

## Load and process clinical data

In [1]:
# Import functions to clean up clinical data
import sys
sys.path.insert(0, '..')
from source.clinical_data_cleanup_functions import *

# Call functions to merge, index and clean clinical data files
labels_0531         = clean_cog       (merge_index_0531())
labels_1031         = clean_cog       (merge_index_1031())
labels_aml05        = clean_aml05     (merge_index_aml05())
labels_beataml      = clean_beataml   (merge_index_beataml())
labels_amltcga      = clean_amltcga   (merge_index_amltcga())
labels_nordic_all   = clean_nordic_all(merge_index_nordic_all())
labels_mds_taml     = clean_mds_taml  (merge_index_mds_taml())
labels_all_graal    = clean_all_graal (merge_index_all_graal())
labels_target_all   = clean_target_all(merge_index_target_all())

# Combine all clinical data labels into one dataframe
labels_combined = pd.concat([labels_aml05, labels_beataml,
                        labels_0531, labels_amltcga, labels_1031,
                        labels_nordic_all, labels_mds_taml,
                        labels_all_graal,labels_target_all], axis=0, join='outer')

# Redefine output path (for troubleshooting purposes in case only this cell is run)
output_path = '../../Data/Intermediate_Files/'

# Read df
df = pd.read_pickle(output_path + '3330samples-333351cpgs-withbatchcorrection-bvalues.pkl')

# Remove samples that are not in the methyl dataset
df_labels = labels_combined.loc[labels_combined.index.isin(df.index)].sort_index()

# Add age categorization and main disease classification to the clinical data
df_labels = process_df_labels(df_labels)

# Save the clinical data labels
df_labels.to_csv(output_path + 'discovery_clinical_data.csv')

print('The clinical data has been indexed and cleaned.\n\
Exclusion of samples may be applied depending on the analysis.')

The clinical data has been indexed and cleaned.
Exclusion of samples may be applied depending on the analysis.


## Implement Reclassification Strategy

### WHO AML 2022 Diagnosis

In [2]:
df_labels['WHO AML 2022 Diagnosis'].value_counts()

WHO AML 2022 Diagnosis
AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement                           313
MDS-related or secondary myeloid neoplasms                                230
AML with mutated NPM1                                                     179
AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11             178
AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1                                169
Otherwise-Normal Control                                                  162
AML with NUP98-fusion                                                      97
AML with bZIP mutated CEBPA                                                69
APL with t(15;17)(q24.1;q21.2)/PML::RARA                                   31
AML with CBFA2T3::GLIS2 (inv(16)(p13q24))                                  30
AML with t(6;9)(p23;q34.1)/DEK::NUP214                                     28
AML with ETV6 fusion                                                       16
AML with inv(3)(q21.3q26.2) or t(3;3)(q21

### ELN AML 2022 Diagnosis

In [3]:
df_labels['ELN AML 2022 Diagnosis'].value_counts()

ELN AML 2022 Diagnosis
AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement                           313
MDS-related or secondary myeloid neoplasms                                228
AML with other rare recurring translocations                              185
AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11             178
AML with mutated NPM1                                                     172
AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1                                169
Otherwise-Normal Control                                                  162
AML with in-frame bZIP mutated CEBPA                                       69
APL with t(15;17)(q24.1;q21.2)/PML::RARA                                   31
AML with t(6;9)(p23;q34.1)/DEK::NUP214                                     28
AML with inv(3)(q21.3q26.2) or t(3;3)(q21.3;q26.2)/MECOM-rearrangement     10
AML with t(9;22)(q34.1;q11.2)/BCR::ABL1                                     3
Myeloid leukaemia associated with Down sy

### Evaluate final sample size by batch

In [17]:
df['Batch'].value_counts(dropna=False)

Batch
GSE49031          933
GSE190931         581
GSE124413         495
GSE159907         316
GDC_TARGET-AML    287
GDC_TCGA-AML      194
GSE152710         166
GSE147667         153
GDC_TARGET-ALL    141
GSE133986          64
Name: count, dtype: int64