In [1]:
from trainer import *
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [2]:
train_df = pd.read_csv('train.csv').drop(["efs_time"], axis=1)
test_df = pd.read_csv('test.csv')
train_df

Unnamed: 0,ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,...,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10,efs
0,0,N/A - non-malignant indication,No,,No,,,No TBI,No,6.0,...,No,,Unrelated,"N/A, Mel not given",8.0,No,2.0,No,10.0,0.0
1,1,Intermediate,No,Intermediate,No,2.0,8.0,"TBI +- Other, >cGy",No,6.0,...,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,Yes,10.0,1.0
2,2,N/A - non-malignant indication,No,,No,2.0,8.0,No TBI,No,6.0,...,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,No,10.0,0.0
3,3,High,No,Intermediate,No,2.0,8.0,No TBI,No,6.0,...,Yes,Permissive mismatched,Unrelated,"N/A, Mel not given",8.0,No,2.0,No,10.0,0.0
4,4,High,No,,No,2.0,8.0,No TBI,No,6.0,...,No,Permissive mismatched,Related,MEL,8.0,No,2.0,No,10.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28795,28795,Intermediate - TED AML case <missing cytogenetics,,Favorable,No,2.0,8.0,No TBI,No,6.0,...,,Bi-directional non-permissive,,"N/A, Mel not given",8.0,,2.0,No,10.0,0.0
28796,28796,High,No,Poor,Yes,1.0,4.0,No TBI,No,5.0,...,No,GvH non-permissive,Related,"N/A, Mel not given",6.0,Yes,1.0,Yes,8.0,1.0
28797,28797,TBD cytogenetics,,Poor,,2.0,8.0,No TBI,,6.0,...,,GvH non-permissive,Unrelated,"N/A, Mel not given",8.0,,2.0,No,10.0,0.0
28798,28798,N/A - non-malignant indication,No,Poor,No,1.0,4.0,No TBI,No,3.0,...,No,Permissive mismatched,Related,MEL,4.0,No,1.0,No,5.0,0.0


In [3]:
train_df, eval_df = train_test_split(train_df, test_size=0.1, random_state=42)
train_df

Unnamed: 0,ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,...,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10,efs
14856,14856,High,No,Favorable,No,2.0,7.0,TBI + Cy +- Other,,5.0,...,No,HvG non-permissive,Related,"N/A, Mel not given",7.0,No,2.0,No,7.0,1.0
15925,15925,N/A - pediatric,No,Favorable,,2.0,8.0,No TBI,No,6.0,...,,Permissive mismatched,Multiple donor (non-UCB),"N/A, Mel not given",8.0,No,2.0,No,10.0,1.0
8528,8528,Low,No,,No,,,TBI + Cy +- Other,No,3.0,...,No,,Related,MEL,5.0,No,,No,6.0,1.0
21845,21845,N/A - non-malignant indication,No,Poor,No,1.0,5.0,"TBI +- Other, <=cGy",No,4.0,...,No,HvG non-permissive,Related,MEL,5.0,No,1.0,No,6.0,1.0
23607,23607,High - TED AML case <missing cytogenetics,No,Intermediate,No,2.0,,No TBI,No,6.0,...,No,,Related,"N/A, Mel not given",8.0,No,2.0,Not done,10.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21575,21575,Intermediate,No,Poor,No,,,"TBI +- Other, >cGy",No,,...,Yes,,Related,"N/A, Mel not given",,,,No,,0.0
5390,5390,Low,No,Intermediate,No,2.0,6.0,No TBI,No,5.0,...,Yes,Permissive mismatched,Related,"N/A, Mel not given",7.0,No,1.0,Yes,9.0,0.0
860,860,Intermediate,No,Intermediate,No,2.0,8.0,No TBI,No,6.0,...,No,Permissive mismatched,Unrelated,MEL,8.0,No,2.0,No,10.0,0.0
15795,15795,TBD cytogenetics,No,Poor,No,,,No TBI,No,,...,No,,Related,"N/A, Mel not given",,No,,No,,0.0


In [None]:
preprocessor = PreprocessingTool(val_folds = True,
                                 n_folds = 5,
                                 seed = 42,
                                 prob_type="regression",
                                 drop_non_categorical_text=False
                                 )

train_dataset = TabularDataset(train_df,
                               label = "efs",
                               preprocessor = preprocessor,
                               type = "train")
eval_dataset = TabularDataset(eval_df,
                              label = "efs",
                              preprocessor = preprocessor,
                              type = "eval")
test_dataset = TabularDataset(test_df,
                              preprocessor = preprocessor,
                              type = "infer")

trainer = Trainer(train_dataset,
                  eval_dataset = eval_dataset,
                  eval_metric = "rmse",
                  models = [
                            "LGB", 
                            "XGB", "CAT", 
                            # "RF",
                            "SGD_LINEAR"
                            ],
                  early_stopping_rounds = 0,
                  of_mitigation_level = 0.2,
                  use_gpu = True,
                  use_cuda = True,
                  n_trials = 1000,
                  timeout = 600,
                  meta_timeout = 200,
                  seed = 42,
                  select_top = 3,
                  train_meta = True)

trainer.train()

LABEL: efs
Problem type: regression. 
-----------------------------------------------------------------------------------------------------------------------------------------
IMPUTER:

Done. 
-----------------------------------------------------------------------------------------------------------------------------------------
DTYPES DETECTOR:

Found 3 unique raw np.dtype(s): [dtype('int64') dtype('O') dtype('float64')].
Dropped 1 column(s) with index ID / non-categorical text features.
Converted 0 column(s) to numeric types.
Remaining features: 1 int feat(s), 21 float feat(s), 35 categorical feat(s).
-----------------------------------------------------------------------------------------------------------------------------------------
5-FOLD STRATIFIEDKFOLDREG TRAIN-VAL SPLITTER:

Done.
-----------------------------------------------------------------------------------------------------------------------------------------
SCALER:

Done.
---------------------------------------------

  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [7]:
trainer.processed_data["data"][0]["val"][0]

Unnamed: 0,hla_match_c_high,hla_high_res_8,hla_low_res_6,hla_high_res_6,hla_high_res_10,hla_match_dqb1_high,hla_nmdp_6,hla_match_c_low,hla_match_drb1_low,hla_match_dqb1_low,...,"melphalan_dose_N/A,_Mel_not_given",melphalan_dose_nan,cardiac_No,cardiac_Not_done,cardiac_Yes,cardiac_nan,pulm_moderate_No,pulm_moderate_Not_done,pulm_moderate_Yes,pulm_moderate_nan
12100,0.5,0.333333,0.25,0.25,0.285714,0.5,0.25,0.5,0.0,0.5,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3062,1.0,1.000000,1.00,1.00,1.000000,1.0,1.00,1.0,1.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
7191,0.5,0.666667,0.25,0.75,0.571429,0.5,0.25,0.5,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
22682,1.0,1.000000,1.00,1.00,1.000000,1.0,1.00,1.0,1.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
21325,0.5,0.333333,0.25,0.25,0.285714,0.5,0.50,0.5,0.0,0.5,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16371,1.0,1.000000,1.00,1.00,1.000000,1.0,1.00,1.0,1.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3561,0.5,0.333333,0.25,0.25,0.285714,0.5,0.25,0.5,0.0,0.5,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3645,1.0,1.000000,1.00,1.00,1.000000,1.0,1.00,1.0,1.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
16907,,,,,,,,,,,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [7]:
test_df.dtypes

ID                          int64
dri_score                  object
psych_disturb              object
cyto_score                 object
diabetes                   object
hla_match_c_high          float64
hla_high_res_8            float64
tbi_status                 object
arrhythmia                 object
hla_low_res_6             float64
graft_type                 object
vent_hist                  object
renal_issue                object
pulm_severe                object
prim_disease_hct           object
hla_high_res_6            float64
cmv_status                 object
hla_high_res_10           float64
hla_match_dqb1_high       float64
tce_imm_match              object
hla_nmdp_6                float64
hla_match_c_low           float64
rituximab                  object
hla_match_drb1_low        float64
hla_match_dqb1_low        float64
prod_type                  object
cyto_score_detail          object
conditioning_intensity     object
ethnicity                  object
year_hct      

In [6]:
test_dataset.preprocessor._impute(test_df)

IMPUTER:



AttributeError: 'NoneType' object has no attribute 'fit_transform'

In [15]:
test_dataset.preprocessor._encode(test_df, fit=False)

Unnamed: 0,ID,hla_match_c_high,hla_high_res_8,hla_low_res_6,hla_high_res_6,hla_high_res_10,hla_match_dqb1_high,hla_nmdp_6,hla_match_c_low,hla_match_drb1_low,...,donor_related_Related,donor_related_Unrelated,melphalan_dose_MEL,"melphalan_dose_N/A,_Mel_not_given",cardiac_No,cardiac_Not_done,cardiac_Yes,pulm_moderate_No,pulm_moderate_Not_done,pulm_moderate_Yes
0,28800,,,6.0,6.0,,2.0,6.0,2.0,2.0,...,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
1,28801,2.0,8.0,6.0,6.0,10.0,2.0,6.0,2.0,2.0,...,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
2,28802,2.0,8.0,6.0,6.0,10.0,2.0,6.0,2.0,2.0,...,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [6]:
test_df

Unnamed: 0,ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,...,karnofsky_score,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10
0,28800,N/A - non-malignant indication,No,,No,,,No TBI,No,6.0,...,90.0,No,,Unrelated,"N/A, Mel not given",8.0,No,2.0,No,10.0
1,28801,Intermediate,No,Intermediate,No,2.0,8.0,"TBI +- Other, >cGy",No,6.0,...,90.0,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,Yes,10.0
2,28802,N/A - non-malignant indication,No,,No,2.0,8.0,No TBI,No,6.0,...,90.0,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,No,10.0


In [5]:
processed_test_data = test_dataset.process()
processed_test_data

Unnamed: 0,hla_match_c_high,hla_high_res_8,hla_low_res_6,hla_high_res_6,hla_high_res_10,hla_match_dqb1_high,hla_nmdp_6,hla_match_c_low,hla_match_drb1_low,hla_match_dqb1_low,...,donor_age,hla_match_b_low,age_at_hct,hla_match_a_low,hla_match_b_high,comorbidity_score,karnofsky_score,hla_low_res_8,hla_match_drb1_high,hla_low_res_10
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.812697,1.0,0.13435,1.0,1.0,0.0,0.833333,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.812697,1.0,0.592632,1.0,1.0,0.3,0.833333,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.812697,1.0,0.460861,1.0,1.0,0.0,0.833333,1.0,1.0,1.0


In [9]:
preds = trainer.predict(processed_test_data, mode="meta")
preds

ValueError: feature_names mismatch: ['hla_match_c_high', 'hla_high_res_8', 'hla_low_res_6', 'hla_high_res_6', 'hla_high_res_10', 'hla_match_dqb1_high', 'hla_nmdp_6', 'hla_match_c_low', 'hla_match_drb1_low', 'hla_match_dqb1_low', 'year_hct', 'hla_match_a_high', 'donor_age', 'hla_match_b_low', 'age_at_hct', 'hla_match_a_low', 'hla_match_b_high', 'comorbidity_score', 'karnofsky_score', 'hla_low_res_8', 'hla_match_drb1_high', 'hla_low_res_10', 'dri_score_High', 'dri_score_High___TED_AML_case__missing_cytogenetics', 'dri_score_Intermediate', 'dri_score_Intermediate___TED_AML_case__missing_cytogenetics', 'dri_score_Low', 'dri_score_Missing_disease_status', 'dri_score_N_A___disease_not_classifiable', 'dri_score_N_A___non_malignant_indication', 'dri_score_N_A___pediatric', 'dri_score_TBD_cytogenetics', 'dri_score_Very_high', 'psych_disturb_No', 'psych_disturb_Not_done', 'psych_disturb_Yes', 'cyto_score_Favorable', 'cyto_score_Intermediate', 'cyto_score_Normal', 'cyto_score_Not_tested', 'cyto_score_Other', 'cyto_score_Poor', 'cyto_score_TBD', 'diabetes_No', 'diabetes_Not_done', 'diabetes_Yes', 'tbi_status_No_TBI', 'tbi_status_TBI___Cy____Other', 'tbi_status_TBI____Other___cGy__fractionated', 'tbi_status_TBI____Other___cGy__single', 'tbi_status_TBI____Other___cGy__unknown_dose', 'tbi_status_TBI____Other____cGy', 'tbi_status_TBI____Other___cGy', 'tbi_status_TBI____Other__unknown_dose', 'arrhythmia_No', 'arrhythmia_Not_done', 'arrhythmia_Yes', 'graft_type_Bone_marrow', 'graft_type_Peripheral_blood', 'vent_hist_No', 'vent_hist_Yes', 'renal_issue_No', 'renal_issue_Not_done', 'renal_issue_Yes', 'pulm_severe_No', 'pulm_severe_Not_done', 'pulm_severe_Yes', 'prim_disease_hct_AI', 'prim_disease_hct_ALL', 'prim_disease_hct_AML', 'prim_disease_hct_CML', 'prim_disease_hct_HD', 'prim_disease_hct_HIS', 'prim_disease_hct_IEA', 'prim_disease_hct_IIS', 'prim_disease_hct_IMD', 'prim_disease_hct_IPA', 'prim_disease_hct_MDS', 'prim_disease_hct_MPN', 'prim_disease_hct_NHL', 'prim_disease_hct_Other_acute_leukemia', 'prim_disease_hct_Other_leukemia', 'prim_disease_hct_PCD', 'prim_disease_hct_SAA', 'prim_disease_hct_Solid_tumor', 'cmv_status____', 'cmv_status_____1', 'cmv_status_____2', 'cmv_status_____3', 'tce_imm_match_G_B', 'tce_imm_match_G_G', 'tce_imm_match_H_B', 'tce_imm_match_H_H', 'tce_imm_match_P_B', 'tce_imm_match_P_G', 'tce_imm_match_P_H', 'tce_imm_match_P_P', 'rituximab_No', 'rituximab_Yes', 'prod_type_BM', 'prod_type_PB', 'cyto_score_detail_Favorable', 'cyto_score_detail_Intermediate', 'cyto_score_detail_Not_tested', 'cyto_score_detail_Poor', 'cyto_score_detail_TBD', 'conditioning_intensity_MAC', 'conditioning_intensity_N_A__F_pre_TED__not_submitted', 'conditioning_intensity_NMA', 'conditioning_intensity_No_drugs_reported', 'conditioning_intensity_RIC', 'conditioning_intensity_TBD', 'ethnicity_Hispanic_or_Latino', 'ethnicity_Non_resident_of_the_U_S_', 'ethnicity_Not_Hispanic_or_Latino', 'obesity_No', 'obesity_Not_done', 'obesity_Yes', 'mrd_hct_Negative', 'mrd_hct_Positive', 'in_vivo_tcd_No', 'in_vivo_tcd_Yes', 'tce_match_Fully_matched', 'tce_match_GvH_non_permissive', 'tce_match_HvG_non_permissive', 'tce_match_Permissive', 'hepatic_severe_No', 'hepatic_severe_Not_done', 'hepatic_severe_Yes', 'prior_tumor_No', 'prior_tumor_Not_done', 'prior_tumor_Yes', 'peptic_ulcer_No', 'peptic_ulcer_Not_done', 'peptic_ulcer_Yes', 'gvhd_proph_CDselect____other', 'gvhd_proph_CDselect_alone', 'gvhd_proph_CSA___MMF____others_not_FK_', 'gvhd_proph_CSA___MTX____others_not_MMF_FK_', 'gvhd_proph_CSA____others_not_FK_MMF_MTX_', 'gvhd_proph_CSA_alone', 'gvhd_proph_Cyclophosphamide____others', 'gvhd_proph_Cyclophosphamide_alone', 'gvhd_proph_FK__MMF____others', 'gvhd_proph_FK__MTX____others_not_MMF_', 'gvhd_proph_FK___others_not_MMF_MTX_', 'gvhd_proph_FKalone', 'gvhd_proph_No_GvHD_Prophylaxis', 'gvhd_proph_Other_GVHD_Prophylaxis', 'gvhd_proph_Parent_Q___yes__but_no_agent', 'gvhd_proph_TDEPLETION____other', 'gvhd_proph_TDEPLETION_alone', 'rheum_issue_No', 'rheum_issue_Not_done', 'rheum_issue_Yes', 'sex_match_F_F', 'sex_match_F_M', 'sex_match_M_F', 'sex_match_M_M', 'race_group_American_Indian_or_Alaska_Native', 'race_group_Asian', 'race_group_Black_or_African_American', 'race_group_More_than_one_race', 'race_group_Native_Hawaiian_or_other_Pacific_Islander', 'race_group_White', 'hepatic_mild_No', 'hepatic_mild_Not_done', 'hepatic_mild_Yes', 'tce_div_match_Bi_directional_non_permissive', 'tce_div_match_GvH_non_permissive', 'tce_div_match_HvG_non_permissive', 'tce_div_match_Permissive_mismatched', 'donor_related_Multiple_donor__non_UCB_', 'donor_related_Related', 'donor_related_Unrelated', 'melphalan_dose_MEL', 'melphalan_dose_N_A__Mel_not_given', 'cardiac_No', 'cardiac_Not_done', 'cardiac_Yes', 'pulm_moderate_No', 'pulm_moderate_Not_done', 'pulm_moderate_Yes'] ['hla_match_c_high', 'hla_high_res_8', 'hla_low_res_6', 'hla_high_res_6', 'hla_high_res_10', 'hla_match_dqb1_high', 'hla_nmdp_6', 'hla_match_c_low', 'hla_match_drb1_low', 'hla_match_dqb1_low', 'year_hct', 'hla_match_a_high', 'donor_age', 'hla_match_b_low', 'age_at_hct', 'hla_match_a_low', 'hla_match_b_high', 'comorbidity_score', 'karnofsky_score', 'hla_low_res_8', 'hla_match_drb1_high', 'hla_low_res_10']
expected peptic_ulcer_Yes, race_group_White, diabetes_Not_done, tce_match_Fully_matched, tce_imm_match_P_P, race_group_Asian, vent_hist_No, prim_disease_hct_AI, cyto_score_detail_TBD, cmv_status_____3, pulm_severe_Yes, prim_disease_hct_HD, melphalan_dose_N_A__Mel_not_given, gvhd_proph_CDselect_alone, conditioning_intensity_MAC, psych_disturb_Not_done, cardiac_Not_done, tce_imm_match_P_H, psych_disturb_Yes, obesity_Yes, cmv_status____, tce_match_HvG_non_permissive, tbi_status_TBI____Other___cGy__single, conditioning_intensity_N_A__F_pre_TED__not_submitted, tce_imm_match_G_G, renal_issue_No, gvhd_proph_CSA____others_not_FK_MMF_MTX_, cyto_score_Normal, cyto_score_detail_Intermediate, cyto_score_Favorable, tce_match_Permissive, tce_imm_match_P_B, melphalan_dose_MEL, prior_tumor_Yes, vent_hist_Yes, gvhd_proph_FKalone, prim_disease_hct_MDS, tbi_status_TBI____Other___cGy__unknown_dose, ethnicity_Not_Hispanic_or_Latino, conditioning_intensity_RIC, tce_div_match_HvG_non_permissive, prim_disease_hct_IPA, gvhd_proph_FK___others_not_MMF_MTX_, sex_match_M_M, tce_div_match_Permissive_mismatched, prod_type_PB, race_group_More_than_one_race, pulm_moderate_No, prim_disease_hct_NHL, tce_div_match_Bi_directional_non_permissive, dri_score_TBD_cytogenetics, tbi_status_TBI____Other__unknown_dose, diabetes_No, cyto_score_detail_Favorable, diabetes_Yes, gvhd_proph_FK__MMF____others, hepatic_mild_No, conditioning_intensity_TBD, prod_type_BM, prim_disease_hct_PCD, tce_imm_match_H_H, gvhd_proph_CSA___MTX____others_not_MMF_FK_, prim_disease_hct_IEA, renal_issue_Yes, in_vivo_tcd_No, dri_score_N_A___non_malignant_indication, rituximab_Yes, prim_disease_hct_IMD, prior_tumor_No, in_vivo_tcd_Yes, graft_type_Bone_marrow, ethnicity_Hispanic_or_Latino, conditioning_intensity_NMA, conditioning_intensity_No_drugs_reported, prim_disease_hct_CML, sex_match_M_F, cyto_score_Not_tested, tce_div_match_GvH_non_permissive, hepatic_severe_No, race_group_American_Indian_or_Alaska_Native, dri_score_High, prim_disease_hct_Solid_tumor, arrhythmia_Yes, prim_disease_hct_Other_acute_leukemia, renal_issue_Not_done, pulm_moderate_Not_done, tbi_status_TBI____Other____cGy, dri_score_Low, hepatic_mild_Not_done, donor_related_Related, cyto_score_Other, tbi_status_TBI____Other___cGy__fractionated, rheum_issue_Not_done, mrd_hct_Positive, prim_disease_hct_AML, tce_imm_match_H_B, arrhythmia_Not_done, ethnicity_Non_resident_of_the_U_S_, cyto_score_TBD, tce_imm_match_G_B, gvhd_proph_Parent_Q___yes__but_no_agent, hepatic_mild_Yes, psych_disturb_No, cmv_status_____1, dri_score_Intermediate___TED_AML_case__missing_cytogenetics, donor_related_Multiple_donor__non_UCB_, prim_disease_hct_HIS, cyto_score_Poor, tbi_status_TBI____Other___cGy, tce_imm_match_P_G, rheum_issue_Yes, gvhd_proph_CSA___MMF____others_not_FK_, sex_match_F_M, mrd_hct_Negative, tbi_status_No_TBI, dri_score_N_A___pediatric, cardiac_No, pulm_moderate_Yes, rheum_issue_No, gvhd_proph_TDEPLETION____other, arrhythmia_No, dri_score_High___TED_AML_case__missing_cytogenetics, prior_tumor_Not_done, tbi_status_TBI___Cy____Other, gvhd_proph_TDEPLETION_alone, race_group_Native_Hawaiian_or_other_Pacific_Islander, gvhd_proph_Other_GVHD_Prophylaxis, dri_score_N_A___disease_not_classifiable, prim_disease_hct_IIS, tce_match_GvH_non_permissive, gvhd_proph_Cyclophosphamide_alone, sex_match_F_F, prim_disease_hct_Other_leukemia, gvhd_proph_CDselect____other, dri_score_Very_high, race_group_Black_or_African_American, pulm_severe_Not_done, obesity_No, hepatic_severe_Not_done, prim_disease_hct_SAA, cmv_status_____2, cyto_score_detail_Poor, gvhd_proph_Cyclophosphamide____others, dri_score_Intermediate, cyto_score_detail_Not_tested, gvhd_proph_CSA_alone, hepatic_severe_Yes, gvhd_proph_FK__MTX____others_not_MMF_, prim_disease_hct_MPN, rituximab_No, obesity_Not_done, gvhd_proph_No_GvHD_Prophylaxis, donor_related_Unrelated, cyto_score_Intermediate, dri_score_Missing_disease_status, peptic_ulcer_No, cardiac_Yes, peptic_ulcer_Not_done, graft_type_Peripheral_blood, pulm_severe_No, prim_disease_hct_ALL in input data

In [12]:
test_df

Unnamed: 0,ID,dri_score,psych_disturb,cyto_score,diabetes,hla_match_c_high,hla_high_res_8,tbi_status,arrhythmia,hla_low_res_6,...,karnofsky_score,hepatic_mild,tce_div_match,donor_related,melphalan_dose,hla_low_res_8,cardiac,hla_match_drb1_high,pulm_moderate,hla_low_res_10
0,28800,N/A - non-malignant indication,No,,No,,,No TBI,No,6.0,...,90.0,No,,Unrelated,"N/A, Mel not given",8.0,No,2.0,No,10.0
1,28801,Intermediate,No,Intermediate,No,2.0,8.0,"TBI +- Other, >cGy",No,6.0,...,90.0,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,Yes,10.0
2,28802,N/A - non-malignant indication,No,,No,2.0,8.0,No TBI,No,6.0,...,90.0,No,Permissive mismatched,Related,"N/A, Mel not given",8.0,No,2.0,No,10.0


In [None]:
submission = pd.DataFrame({
    "ID": test_df["ID"],
    "y": preds
})
submission

Unnamed: 0,ID,y
0,1,78.723767
1,2,94.464950
2,3,78.881864
3,4,77.956778
4,5,112.631084
...,...,...
4204,8410,103.391248
4205,8411,93.705470
4206,8413,92.885264
4207,8414,111.101836


In [None]:
submission.to_csv("submission_meta.csv", index=False)

a
b
d


In [None]:
lista

['a', 'b', 'c']