# scikit-FIBERS: HLA Notebook (First FIBERS Paper Data)


***
## Imports:

In [1]:
import os
import pickle
import pandas as pd
from sklearn.metrics import classification_report
from src.skfibers.fibers import FIBERS

current_working_directory = os.getcwd()
print(current_working_directory)

#Local Run Parameters
local_save = False
folder_path = 'C:/Users/ryanu/Desktop/ASHI_Output'
if not os.path.exists(folder_path):
        os.makedirs(folder_path)
if local_save:
    output_folder = 'local_output'
else:
    output_folder = folder_path

c:\Users\ryanu\OneDrive\Documents\GitHub\scikit-FIBERS


***
## Load and Prepare HLA Data

In [2]:
data_path = 'C:/Users/ryanu/Desktop/ASHI_Abstract_Analysis/NewImp1_v2.csv'
data_name = 'NewImp1_v2'
all_data = pd.read_csv(data_path,low_memory=False)
all_data.shape

(251776, 2101)

In [3]:
all_data.head(3)

Unnamed: 0.1,Unnamed: 0,TX_ID,graftyrs,grf_fail,MM_A_1,MM_A_2,MM_A_3,MM_A_4,MM_A_5,MM_A_6,...,MM_DPB1_177,MM_DPB1_178,MM_DPB1_179,MM_DPB1_180,MM_DPB1_181,MM_DPB1_182,MM_DPB1_183,MM_DPB1_184,MM_DPB1_185,MM_DPB1_186
0,0,1550580,2.992553,1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1554014,11.003724,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1552092,11.003724,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
covariates = ['shared', 'DCD', 'DON_AGE', 'donage_slope_ge18', 'dcadcodanox', 'dcadcodcva', 'dcadcodcnst', 'dcadcodoth', 'don_cmv_negative', 
              'don_htn_0c', 'ln_don_wgt_kg_0c', 'ln_don_wgt_kg_0c_s55', 'don_ecd', 'age_ecd', 'yearslice', 'REC_AGE_AT_TX', 
              'rec_age_spline_35', 'rec_age_spline_50', 'rec_age_spline_65', 'diab_noted', 'age_diab', 'dm_can_age_spline_50', 
              'can_dgn_htn_ndm', 'can_dgn_pk_ndm', 'can_dgn_gd_ndm', 'rec_prev_ki_tx', 'rec_prev_ki_tx_dm', 'rbmi_0c', 'rbmi_miss', 
              'rbmi_gt_20', 'rbmi_DM', 'rbmi_gt_20_DM', 'ln_c_hd_m', 'ln_c_hd_0c', 'ln_c_hd_m_ptx', 'PKPRA_MS', 'PKPRA_1080', 
              'PKPRA_GE80', 'hispanic', 'CAN_RACE_BLACK', 'CAN_RACE_asian', 'CAN_RACE_WHITE', 'mm0', 'mmDR0', 'mmDR1', 'mmA0', 'mmA1', 
              'mmB0', 'mmB1', 'mmC0', 'mmC1', 'mmDQ0', 'mmDQ1']
outcome_label = "graftyrs"
censor_label = "grf_fail"
print(len(covariates))

53


In [5]:
filtered_columns = [col for col in all_data.columns if not col.startswith('MM_')]

print(filtered_columns)

['Unnamed: 0', 'TX_ID', 'graftyrs', 'grf_fail']


In [6]:
found_strings = [s for s in covariates if s in all_data.columns]
print(len(found_strings))

0


In [8]:
# A 1-182
# B 1-182
# C 1-182
# DRB1 6-94
# DRB345 6-94
# DQA1 6-94
# DQB1 6-95  ??
# DPA1 6-94  ??  4-94 ??
# DPB1 6-94
#['MM_DPA1_4', 'MM_DPA1_5'] not in index

locus_list = ['A','B','C','DRB1','DRB345','DQA1','DQB1','DPA1','DPB1']
locus_range_list = [[1,182],[1,182],[1,182],[6,94],[6,94],[6,94],[6,95],[6,94],[6,94]]

In [9]:
MM_feature_list = []
i = 0
for locus in locus_list:
    for j in range(locus_range_list[i][0],locus_range_list[i][1]+1):
        MM_feature_list.append('MM_'+str(locus)+'_'+str(j))
    i +=1
print(MM_feature_list)
print(len(MM_feature_list))

['MM_A_1', 'MM_A_2', 'MM_A_3', 'MM_A_4', 'MM_A_5', 'MM_A_6', 'MM_A_7', 'MM_A_8', 'MM_A_9', 'MM_A_10', 'MM_A_11', 'MM_A_12', 'MM_A_13', 'MM_A_14', 'MM_A_15', 'MM_A_16', 'MM_A_17', 'MM_A_18', 'MM_A_19', 'MM_A_20', 'MM_A_21', 'MM_A_22', 'MM_A_23', 'MM_A_24', 'MM_A_25', 'MM_A_26', 'MM_A_27', 'MM_A_28', 'MM_A_29', 'MM_A_30', 'MM_A_31', 'MM_A_32', 'MM_A_33', 'MM_A_34', 'MM_A_35', 'MM_A_36', 'MM_A_37', 'MM_A_38', 'MM_A_39', 'MM_A_40', 'MM_A_41', 'MM_A_42', 'MM_A_43', 'MM_A_44', 'MM_A_45', 'MM_A_46', 'MM_A_47', 'MM_A_48', 'MM_A_49', 'MM_A_50', 'MM_A_51', 'MM_A_52', 'MM_A_53', 'MM_A_54', 'MM_A_55', 'MM_A_56', 'MM_A_57', 'MM_A_58', 'MM_A_59', 'MM_A_60', 'MM_A_61', 'MM_A_62', 'MM_A_63', 'MM_A_64', 'MM_A_65', 'MM_A_66', 'MM_A_67', 'MM_A_68', 'MM_A_69', 'MM_A_70', 'MM_A_71', 'MM_A_72', 'MM_A_73', 'MM_A_74', 'MM_A_75', 'MM_A_76', 'MM_A_77', 'MM_A_78', 'MM_A_79', 'MM_A_80', 'MM_A_81', 'MM_A_82', 'MM_A_83', 'MM_A_84', 'MM_A_85', 'MM_A_86', 'MM_A_87', 'MM_A_88', 'MM_A_89', 'MM_A_90', 'MM_A_91', 'MM_A_9

In [10]:
missing_sum = all_data.isna().sum().sum()
print("Sum of raw data missing values:", missing_sum)

Sum of raw data missing values: 1669212


In [21]:
#features = MM_feature_list + covariates + [outcome_label] + [censor_label]
features = MM_feature_list + [outcome_label] + [censor_label]
print(str(len(features))+ " Total Columns")
print(str(len(MM_feature_list))+" AAs")
#print(str(len(covariates))+ " Covariates")

data = all_data[features]
print("Shape of New DataFrame:", data.shape)
missing_sum = data.isna().sum().sum()
print("Sum of data missing values:", missing_sum)

data.head(3)

1083 Total Columns
1081 AAs
Shape of New DataFrame: (251776, 1083)
Sum of data missing values: 860476


Unnamed: 0,MM_A_1,MM_A_2,MM_A_3,MM_A_4,MM_A_5,MM_A_6,MM_A_7,MM_A_8,MM_A_9,MM_A_10,...,MM_DPB1_87,MM_DPB1_88,MM_DPB1_89,MM_DPB1_90,MM_DPB1_91,MM_DPB1_92,MM_DPB1_93,MM_DPB1_94,graftyrs,grf_fail
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.992553,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.003724,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.003724,0


In [22]:
filter_all_rare = True
rare_freq = 0.01
if filter_all_rare:
    #Filter out rare AAs (<1%)
    # Calculate the percentage of occurrences greater than 0 for each column
    percentages = data.loc[:,MM_feature_list].apply(lambda x: (x > 0).mean())
    print(percentages)
    columns_to_remove = percentages[percentages < rare_freq].index.tolist()
    data = data.drop(columns=columns_to_remove)
else:
    #Filter out invariant AAs
    # Calculate the percentage of occurrences greater than 0 for each column
    percentages = data.loc[:,MM_feature_list].apply(lambda x: (x > 0).mean())
    print(percentages)
    columns_to_remove = percentages[percentages == 0.0].index.tolist()
    data = data.drop(columns=columns_to_remove)
data.shape

MM_A_1        0.000016
MM_A_2        0.000000
MM_A_3        0.000119
MM_A_4        0.000000
MM_A_5        0.000000
                ...   
MM_DPB1_90    0.000008
MM_DPB1_91    0.003094
MM_DPB1_92    0.000028
MM_DPB1_93    0.000528
MM_DPB1_94    0.000528
Length: 1081, dtype: float64


(251776, 260)

In [23]:
missing_sum = data.isna().sum().sum()
print("Sum of raw data missing values:", missing_sum)

Sum of raw data missing values: 205368


In [24]:
data.head(3)

Unnamed: 0,MM_A_9,MM_A_12,MM_A_17,MM_A_43,MM_A_44,MM_A_56,MM_A_62,MM_A_63,MM_A_65,MM_A_66,...,MM_DPB1_57,MM_DPB1_65,MM_DPB1_69,MM_DPB1_76,MM_DPB1_84,MM_DPB1_85,MM_DPB1_86,MM_DPB1_87,graftyrs,grf_fail
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.992553,1
1,2.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,2.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.003724,0
2,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,11.003724,0


In [27]:
missing_values_count = data.isnull().sum()

# Count number of columns with more than 0 missing values
columns_with_missing_values = (missing_values_count > 0).sum()
print(columns_with_missing_values)

258


In [25]:
for locus in locus_list:
    count = sum(['MM_'+str(locus) in col for col in data.columns])
    print('Remaining '+str(locus)+":"+str(count))

Remaining A:45
Remaining B:45
Remaining C:33
Remaining DRB1:30
Remaining DRB345:31
Remaining DQA1:24
Remaining DQB1:29
Remaining DPA1:5
Remaining DPB1:16


***
## Run FIBERS (Training)

In [None]:
#Load Manual Bin Population to Partially initialize Bin Population
pop_path = 'C:/Users/ryanu/Desktop/HLA/Imp1/HLA_Paper_Pop_Init.csv'
pop = pd.read_csv(pop_path,low_memory=False)

In [None]:

fibers = FIBERS(outcome_label="graftyrs", outcome_type="survival", iterations=50, pop_size=50, tournament_prop=0.5, crossover_prob=0.5, min_mutation_prob=0.1, max_mutation_prob=0.5, merge_prob=0.1, 
                new_gen=0.5, elitism=0.1, diversity_pressure=5, min_bin_size=1, max_bin_size=None, max_bin_init_size=10, fitness_metric="log_rank_residuals", log_rank_weighting=None, censor_label="grf_fail", 
                group_strata_min=0.2, penalty=0.5, group_thresh=None, min_thresh=0, max_thresh=5, int_thresh=True, thresh_evolve_prob=0.5, manual_bin_init=pop, covariates=covariates, report=[0,10,20,30,40], 
                random_seed=None,verbose=False)

fibers = fibers.fit(data)


### Report Run Time

In [None]:
# Report run time
print(fibers.elapsed_time)

***
## Top (or Target) Bin Examination
### Specify Bin to Examine

In [None]:
bin_index = 0 # lowest index is the bin with the highest fitness (only reports the bin ranked at the top, despite possible fitness ties for top)

### Get Bin Details

In [None]:
fibers.get_bin_report(bin_index)

### Plot: Kaplan Meier Survival Curves For Each Group Defined by the Target Bin

In [None]:
fibers.get_kaplan_meir(data,bin_index,save=True,output_folder=output_folder,data_name=data_name)

### Evaluate Target Bin Using Cox Proportional Hazard Model (adjusting for covariates)

In [None]:
summary = fibers.get_cox_prop_hazard(data, bin_index)
bin_hr = summary['exp(coef)'].iloc[0]
bin_low_CI = summary['exp(coef) lower 95%'].iloc[0]
bin_upper_CI = summary['exp(coef) upper 95%'].iloc[0]
bin_p_val = summary['p'].iloc[0]
print("Bin HR: "+str(bin_hr)+" ("+str(bin_low_CI)+"-"+str(bin_upper_CI)+")")
print("Bin HR p-value: "+str(bin_p_val))
summary.to_csv(output_folder+'/'+'Cox_PH_'+str(bin_index)+'_'+data_name+'.csv', index=False)
summary

### Check and View Any Top Bin Ties

In [None]:
fibers.report_ties()

***
## Bin Population Examination
### Plot: Basic Bin Population Heatmap

In [None]:
fibers.get_bin_population_heatmap_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Custom Bin Population Heatmap

In [None]:
group_names=["A", "B", "C", "DQB", "DRB"]
legend_group_info = ['Not in Bin','HLA-A','HLA-B', 'HLA-C','HLA-DQB','HLA-DRB'] #2 default colors first followed by additional color descriptions in legend
color_features = [['B1', 'B10', 'B100', 'B101', 'B102', 'B103', 'B104', 'B105', 'B106', 'B107', 'B108', 'B109', 'B11', 'B110', 'B111', 'B112', 'B113', 'B114', 'B115', 'B116', 'B117', 'B118', 'B119', 'B12', 'B120', 'B121', 'B122', 'B123', 'B124', 'B125', 'B126', 'B127', 'B128', 'B129', 'B13', 'B130', 'B131', 'B132', 'B133', 'B134', 'B135', 'B136', 'B137', 'B138', 'B139', 'B14', 'B140', 'B141', 'B142', 'B143', 'B144', 'B145', 'B146', 'B147', 'B148', 'B149', 'B15', 'B150', 'B151', 'B152', 'B153', 'B154', 'B155', 'B156', 'B157', 'B158', 'B159', 'B16', 'B160', 'B161', 'B162', 'B163', 'B164', 'B165', 'B166', 'B167', 'B168', 'B169', 'B17', 'B170', 'B171', 'B172', 'B173', 'B174', 'B175', 'B176', 'B177', 'B178', 'B179', 'B18', 'B180', 'B181', 'B182', 'B183', 'B184', 'B185', 'B186', 'B187', 'B188', 'B189', 'B19', 'B190', 'B191', 'B192', 'B193', 'B194', 'B2', 'B20', 'B21', 'B22', 'B23', 'B24', 'B25', 'B26', 'B27', 'B28', 'B29', 'B3', 'B30', 'B31', 'B32', 'B33', 'B34', 'B35', 'B36', 'B37', 'B38', 'B39', 'B4', 'B40', 'B41', 'B42', 'B43', 'B44', 'B45', 'B46', 'B47', 'B48', 'B49', 'B5', 'B50', 'B51', 'B52', 'B53', 'B54', 'B55', 'B56', 'B57', 'B58', 'B59', 'B6', 'B60', 'B61', 'B62', 'B63', 'B64', 'B65', 'B66', 'B67', 'B68', 'B69', 'B7', 'B70', 'B71', 'B72', 'B73', 'B74', 'B75', 'B76', 'B77', 'B78', 'B79', 'B8', 'B80', 'B81', 'B82', 'B83', 'B84', 'B85', 'B86', 'B87', 'B88', 'B89', 'B9', 'B90', 'B91', 'B92', 'B93', 'B94', 'B95', 'B96', 'B97', 'B98', 'B99'],['C1', 'C10', 'C100', 'C101', 'C102', 'C103', 'C104', 'C105', 'C106', 'C107', 'C108', 'C109', 'C11', 'C110', 'C111', 'C112', 'C113', 'C114', 'C115', 'C116', 'C117', 'C118', 'C119', 'C12', 'C120', 'C121', 'C122', 'C123', 'C124', 'C125', 'C126', 'C127', 'C128', 'C129', 'C13', 'C130', 'C131', 'C132', 'C133', 'C134', 'C135', 'C136', 'C137', 'C138', 'C139', 'C14', 'C140', 'C141', 'C142', 'C143', 'C144', 'C145', 'C146', 'C147', 'C148', 'C149', 'C15', 'C150', 'C151', 'C152', 'C153', 'C154', 'C155', 'C156', 'C157', 'C158', 'C159', 'C16', 'C160', 'C161', 'C162', 'C163', 'C164', 'C165', 'C166', 'C167', 'C168', 'C169', 'C17', 'C170', 'C171', 'C172', 'C173', 'C174', 'C175', 'C176', 'C177', 'C178', 'C179', 'C18', 'C180', 'C181', 'C182', 'C183', 'C184', 'C185', 'C186', 'C187', 'C188', 'C189', 'C19', 'C190', 'C191', 'C192', 'C193', 'C194', 'C195', 'C196', 'C197', 'C198', 'C199', 'C2', 'C20', 'C200', 'C201', 'C202', 'C203', 'C204', 'C205', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C3', 'C30', 'C31', 'C32', 'C33', 'C34', 'C35', 'C36', 'C37', 'C38', 'C39', 'C4', 'C40', 'C41', 'C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49', 'C5', 'C50', 'C51', 'C52', 'C53', 'C54', 'C55', 'C56', 'C57', 'C58', 'C59', 'C6', 'C60', 'C61', 'C62', 'C63', 'C64', 'C65', 'C66', 'C67', 'C68', 'C69', 'C7', 'C70', 'C71', 'C72', 'C73', 'C74', 'C75', 'C76', 'C77', 'C78', 'C79', 'C8', 'C80', 'C81', 'C82', 'C83', 'C84', 'C85', 'C86', 'C87', 'C88', 'C89', 'C9', 'C90', 'C91', 'C92', 'C93', 'C94', 'C95', 'C96', 'C97', 'C98', 'C99'],['DQB11', 'DQB110', 'DQB111', 'DQB112', 'DQB113', 'DQB114', 'DQB115', 'DQB116', 'DQB117', 'DQB118', 'DQB119', 'DQB12', 'DQB120', 'DQB121', 'DQB122', 'DQB123', 'DQB124', 'DQB125', 'DQB126', 'DQB127', 'DQB128', 'DQB129', 'DQB13', 'DQB130', 'DQB131', 'DQB132', 'DQB133', 'DQB134', 'DQB135', 'DQB136', 'DQB137', 'DQB138', 'DQB139', 'DQB14', 'DQB140', 'DQB141', 'DQB142', 'DQB143', 'DQB144', 'DQB145', 'DQB146', 'DQB147', 'DQB148', 'DQB149', 'DQB15', 'DQB150', 'DQB151', 'DQB152', 'DQB153', 'DQB154', 'DQB155', 'DQB156', 'DQB157', 'DQB158', 'DQB159', 'DQB16', 'DQB160', 'DQB161', 'DQB162', 'DQB163', 'DQB164', 'DQB165', 'DQB166', 'DQB167', 'DQB168', 'DQB169', 'DQB17', 'DQB170', 'DQB171', 'DQB172', 'DQB173', 'DQB174', 'DQB175', 'DQB176', 'DQB177', 'DQB178', 'DQB179', 'DQB18', 'DQB180', 'DQB181', 'DQB182', 'DQB183', 'DQB184', 'DQB185', 'DQB186', 'DQB187', 'DQB188', 'DQB189', 'DQB19', 'DQB190', 'DQB191', 'DQB192', 'DQB193', 'DQB194'],[ 'DRB11', 'DRB110', 'DRB111', 'DRB112', 'DRB113', 'DRB114', 'DRB115', 'DRB116', 'DRB117', 'DRB118', 'DRB119', 'DRB12', 'DRB120', 'DRB121', 'DRB122', 'DRB123', 'DRB124', 'DRB125', 'DRB126', 'DRB127', 'DRB128', 'DRB129', 'DRB13', 'DRB130', 'DRB131', 'DRB132', 'DRB133', 'DRB134', 'DRB135', 'DRB136', 'DRB137', 'DRB138', 'DRB139', 'DRB14', 'DRB140', 'DRB141', 'DRB142', 'DRB143', 'DRB144', 'DRB145', 'DRB146', 'DRB147', 'DRB148', 'DRB149', 'DRB15', 'DRB150', 'DRB151', 'DRB152', 'DRB153', 'DRB154', 'DRB155', 'DRB156', 'DRB157', 'DRB158', 'DRB159', 'DRB16', 'DRB160', 'DRB161', 'DRB162', 'DRB163', 'DRB164', 'DRB165', 'DRB166', 'DRB167', 'DRB168', 'DRB169', 'DRB17', 'DRB170', 'DRB171', 'DRB172', 'DRB173', 'DRB174', 'DRB175', 'DRB176', 'DRB177', 'DRB178', 'DRB179', 'DRB18', 'DRB180', 'DRB181', 'DRB182', 'DRB183', 'DRB184', 'DRB185', 'DRB186', 'DRB187', 'DRB188', 'DRB189', 'DRB19', 'DRB190', 'DRB191', 'DRB192', 'DRB193', 'DRB194']]
colors = [(1, 0, 0),(0.3,1,0.2),(1, 0.5, 0),(1,0.5,1)] # blue,red,green,orange,pink ---Alternatively orange (1, 0.5, 0)
default_colors = [(.95, .95, 1),(0, 0, 1)] #very light blue and blue
max_bins = 100
max_features = 100

fibers.get_custom_bin_population_heatmap_plot(group_names,legend_group_info,color_features,colors,default_colors,max_bins,max_features,save=True,output_folder=output_folder,data_name=data_name)

### Plot: Bin Population Pareto Front
In plot, dot colors indicate the 'group strata prop' of the given bin, and dot size is relative to the 'group threshold of that bin'.

In [None]:
fibers.get_pareto_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Estimated Feature Tracking Scores
These scores accumulate throughout the training process, and do not nesessarily reflect feature importance of individual bins or the final bin population.

In [None]:
fibers.get_feature_tracking_plot(max_features=40,save=True,output_folder=output_folder,data_name=data_name)

### Plot: Dataset Covariate Residuals (if applicable)

In [None]:
if fibers.fitness_metric == 'residuals' or fibers.fitness_metric == 'log_rank_residuals':  
    fibers.get_residuals_histogram(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Bin Log-Rank Scores Vs. Residuals Scores (if applicable)
In plot, dot colors indicate the 'group strata prop' of the given bin, and dot size is relative to the 'group threshold of that bin'.

In [None]:
if fibers.fitness_metric == 'log_rank_residuals':
    fibers.get_log_rank_residuals_plot(save=True,output_folder=output_folder,data_name=data_name)

###  Evaluate All Bins in Population using Cox PH Model (Can be slow)

In [None]:
fibers.calculate_cox_prop_hazards(data)

### Plot: Bin Log-Rank Scores Vs. Adjusted Hazard Ratios (if applicable)
In plot, dot colors indicate the 'group strata prop' of the given bin, and dot size is relative to the 'group threshold of that bin'.

In [None]:
if fibers.fitness_metric == 'log_rank' or fibers.fitness_metric == 'log_rank_residuals':  
    fibers.get_log_rank_adj_HR_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Bin Adjusted Hazard Ratios Vs. Residuals Scores (if applicable)
In plot, dot colors indicate the 'group strata prop' of the given bin, and dot size is relative to the 'group threshold of that bin'.

In [None]:
if fibers.fitness_metric == 'residuals' or fibers.fitness_metric == 'log_rank_residuals':   
    fibers.get_adj_HR_residuals_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Bin Adjusted Hazard Ratios Vs. Log Rank * Residuals Scores (if applicable)
In plot, dot colors indicate the 'group strata prop' of the given bin, and dot size is relative to the 'group threshold of that bin'.

In [None]:
if fibers.fitness_metric == 'log_rank_residuals':   
    fibers.get_adj_HR_metric_product_plot(save=True,output_folder=output_folder,data_name=data_name)

***
## History of Bin Evolution (Top Bin Each Generation)
### Plot: Fitness of top bin across training iterations

In [None]:
fibers.get_fitness_progress_plot()

### Plot: Scoring Metric and Pre-Fitness of top bin across training iterations

In [None]:
fibers.get_perform_progress_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Normalized Top-Bin Stats Across Training Iterations

In [None]:
fibers.get_misc_progress_plot(save=True,output_folder=output_folder,data_name=data_name)

### View Top Bin Information Across all Iterations/Generations

In [None]:
fibers.perform_track_df

***
## Save Bin Population
### Save Bin Population Details to CSV

In [None]:
pop_df = fibers.get_pop()
pop_df.to_csv(output_folder+'/'+'Pop_'+data_name+'.csv', index=False)

### Pickle Trained FIBERS Object (For Future Use)

In [None]:
with open(output_folder+'/'+data_name+'.pickle', 'wb') as f:
    pickle.dump(fibers, f)

***
## Transforming Bins Into New Features (i.e. Feature Learning) and Save as New CSV Files
### Transform Bins Using Total Sums (i.e. Respective Bin Thresholds Not Applied)

In [None]:
tdf = fibers.transform(data,full_sums=True)
tdf.to_csv(output_folder+'/'+'Transformed_FullSums_'+data_name+'.csv', index=False)
tdf

### Transform Bins Using Respective Bin Threshold (i.e. 0 = At/Under Threshold Group and 1 = Over Threshold Group)

In [None]:
tdf = fibers.transform(data,full_sums=False)
tdf.to_csv(output_folder+'/'+'Transformed_Threshold'+data_name+'.csv', index=False)
tdf

***
## Open Pickled FIBERS Object (Example)

In [None]:
with open(output_folder+'/'+data_name+'.pickle', 'rb') as f:
    fibers = pickle.load(f)

fibers.get_bin_report(bin_index)