# scikit-FIBERS: Demonstration Notebook
This notebook is set up as a demonstration for running scikit-FIBERS.

***
## Installation:

In [None]:
#Add Installation code

***
## Imports:

In [None]:
import os
import pickle
from sklearn.metrics import classification_report
from src.skfibers.fibers import FIBERS
#from src.skfibers.experiments.datagen import create_data_simulation_bin
from src.skfibers.experiments.datagen_evolvable_threshold import create_data_simulation_bin_evolve

current_working_directory = os.getcwd()
print(current_working_directory)

***
## Set Up Local Run Parameters


In [None]:
local_save = False
if local_save:
    output_folder = 'local_output'
else:
    output_folder = 'C:/Users/ryanu/Desktop/FIBERS_test_output'

***
## Survial Data Simulation

In [None]:
data_name = 'sampledata'
data = create_data_simulation_bin_evolve(number_of_instances=10000, number_of_features=100, number_of_features_in_bin=10,
                                  no_fail_proportion=0.5, mm_frequency_range=(0.4, 0.5), noise_frequency=0.0,
                                  class0_time_to_event_range=(1.5, 0.2), class1_time_to_event_range=(1, 0.2),
                                  censoring_frequency=0.5, random_seed=42, negative=False, threshold=1)
data.to_csv(output_folder+'/'+data_name+'.csv', index=False)
#data = pd.read_csv(output_folder+'/'+data_name+'.csv')
true_risk_group = data[['TrueRiskGroup']]
data = data.drop('TrueRiskGroup', axis=1)

***
## Running FIBERS (Training)

In [None]:
fibers = FIBERS(outcome_label="Duration", outcome_type="survival", iterations=50, pop_size = 50, crossover_prob=0.5, mutation_prob=0.1, 
                new_gen=1.0, elitism=0.1, min_bin_size=1, fitness_metric="log_rank", log_rank_weighting=None,
                censor_label="Censoring", group_strata_min=0.2, group_thresh=None, min_thresh=0, max_thresh=3, int_thresh=True, thresh_evolve_prob=0.5,
                manual_bin_init=None, covariates=None, report=None, random_seed=None, verbose=False)

fibers = fibers.fit(data)

***
## Save Bin Population
### Save Bin Population Details to CSV

In [None]:
pop_df = fibers.get_pop()
pop_df.to_csv(output_folder+'/'+'Pop_'+data_name+'.csv', index=False)


### Pickle Trained FIBERS Object (For Future Use)

In [None]:
with open(output_folder+'/'+data_name+'.pickle', 'wb') as f:
    pickle.dump(fibers, f)

***
## Bin Population Examination
### Bin Population Pareto Front

In [None]:
fibers.get_pareto_plot(save=True,output_folder=output_folder,data_name=data_name)

### Examine Feature Tracking Scores

In [None]:
fibers.get_feature_tracking_plot(max_features=50,save=True,output_folder=output_folder,data_name=data_name)

### Bin Population Details
The dataframe containing all top-bin statistics/characteristics across training iterations

In [None]:
pop_df

***
## Top (or Individual) Bin Examination

In [None]:
bin_index = 0 # lowest index is the bin with the highest fitness (only reports the bin ranked at the top, despite possible fitness ties for top)

In [None]:
fibers.get_bin_report(bin_index)

### Kaplan Meier Survival Plot (For Top Bin)

In [None]:
fibers.get_kaplan_meir(data,bin_index,save=True,output_folder=output_folder,data_name=data_name)

### Check and View Top Bin Ties

In [None]:
fibers.report_ties()


***
## History of Bin Evolution (Top Bin Each Generation)
### Plot: Fitness of top bin each training iteration

In [None]:
fibers.get_fitness_progress_plot()

### Plot: Scoring Metric and Pre-Fitness of top bin each training iteration

In [None]:
fibers.get_perform_progress_plot(save=True,output_folder=output_folder,data_name=data_name)

### Plot: Normalized Top-Bin Stats Across Training Iterations

In [None]:
fibers.get_misc_progress_plot(save=True,output_folder=output_folder,data_name=data_name)

### Raw Top Bin / Generation Details

In [None]:
fibers.perform_track_df

***
## Transforming Bins Into New Features (Feature Learning)

In [None]:
tdf = fibers.transform(data)
tdf.to_csv(output_folder+'/'+'Transformed_'+data_name+'.csv', index=False)
tdf

***
## Prediction (of Strata)
### Predict Strata (Low vs. High) Using Top Bin

In [None]:
predictions = fibers.predict(data,bin_number=0)
print(classification_report(predictions, true_risk_group))



### Predict Strata (Low vs. High) Using Whole Bin Population (Weighted Voting Scheme)

In [None]:
predictions = fibers.predict(data)
print(classification_report(predictions, true_risk_group))

***
## Open Pickled FIBERS Object (Example)

In [None]:
with open(output_folder+'/'+data_name+'.pickle', 'rb') as f:
    fibers = pickle.load(f)

fibers.get_bin_report(bin_index)