In [5]:
from data import load_data

# loading clinical, gene expressions, treatment, 
# and clincial outcome data, ignoring fish markers 
# those are not used in experiments performed below
clinical_markers, _, gene_expressions, treatment_markers, clinical_outcome = load_data()

In [6]:
treatment_markers

Unnamed: 0_level_0,therapy_first_line_Bor,therapy_first_line_Bor-Cyc-Dex,therapy_first_line_Bor-Dex,therapy_first_line_Bor-Len-Dex,therapy_first_line_Len,therapy_first_line_Len-Dex
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MMRF1007,0,0,0,0,0,0
MMRF1011,0,0,1,0,0,0
MMRF1013,0,0,0,1,0,0
MMRF1014,0,0,1,0,0,0
MMRF1016,0,1,0,0,0,0
MMRF1017,0,0,0,0,0,1
MMRF1018,0,0,0,0,0,1
MMRF1020,0,0,0,1,0,0
MMRF1021,0,0,0,1,0,0
MMRF1024,0,0,0,0,0,0


In [2]:
import pipeline
from constants import RANDOM_STATE, N_FOLDS
from util import analyser, combine_markers_for_stratification

# creating analyser object to compute and group 
# classification matrics grouped by training and validation
# dataset and by experiment id
analyser = Analyser()

# create a stratification flag by combining treatment and clinical outcome
# in order to avoid bias in the models generated by umbalanced treatments
# or clinical outcome
stratification_flag = combine_markers_for_stratification(treatment_markers, clinical_outcome)

# split data in 10-fold stratified by 
# treatment and treatment sensitivity outcome
kfold = StratifiedKFold(N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

for experiment, train_index, valid_index in enumerate(kfold.split(_, straficiation_flag)):
    
    # selecting clinical markers
    # alpha defined as the probability of a marker be randonly choose
    selected_clinical_markers = select_markers(
        clinical_markers.iloc[train_index], 
        alpha=1./clinical_markers.shape[1], beta=0.75)
    
    # selecting gene expressions
    # alpha defined as the probability of a marker be randonly choose
    selected_gene_expressions = select_markers(
        gene_expressions.iloc[train_index], 
        alpha=1./gene_expressions.shape[1], beta=0.75)
    
    # joining markers
    x = clinical_markers[selected_clinical_markers].join(
        gene_expressions[selected_gene_expressions])
       
    # split data set into train and valid
    x_train, y_train = x.iloc[train_index, :].values, clinical_outcome.iloc[train_index, :].values
    x_valid, y_valid = x.iloc[valid_index, :].values, clinical_outcome.iloc[valid_index, :].values
    
    # create an independent TS predictor for each ML algorithm
    for model in ['mlp', 'svm', 'lightgbm', 'lr']:
        
        snma = pipeline.SNMA(model=model)
        
        # fit model based on SMNA pipeline
        snma.fit(x_train, y_train)
        
        # predict for trained dataset, 
        # just to compare results
        y_hat_train = snma.predict(x_train)
        
        # predict for valid dataset, 
        # used to compute main results
        y_hat_valid = snma.predict(x_valid)
        
        # compute classification metrics for training dataset
        # each experiment is named "exp_#_train"
        analyser.compute_classification_metrics(
            y_train, y_hat_train, experiment_id=experiment,  experiment_group='train')
        
        # compute classification metrics for validation dataset
        # each experiment is named "exp_#_valid"
        analyser.compute_classification_metrics(
            y_valid, y_hat_valid, experiment_id=experiment, experiment_group='valid')
        
# print result summarization
analyser.summarize()

ImportError: cannot import name 'RANDOM_STATE' from 'constants' (C:\Users\Venezian\git\multiple-myeloma\constants\__init__.py)