In [None]:
import warnings
warnings.filterwarnings('ignore')

# Libraries

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.model_selection import LeaveOneGroupOut
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.base import clone
from sklearn.linear_model import Lasso

from stabl.stabl import Stabl, plot_stabl_path, plot_fdr_graph, save_stabl_results, export_stabl_to_csv
from stabl.preprocessing import LowInfoFilter, remove_low_info_samples

%config InlineBackend.figure_formats=['retina']

ModuleNotFoundError: No module named 'stabl'

In [None]:
from stabl.multi_omic_pipelines import multi_omic_stabl, multi_omic_stabl_cv, late_fusion_lasso_cv
from stabl.single_omic_pipelines import single_omic_stabl, single_omic_stabl_cv
from stabl.pipelines_utils import compute_features_table

# Data

In [None]:
X_Celldensities = pd.read_csv('./UOPfinal_celldensities.csv',index_col=0)
X_Function = pd.read_csv('./UOPfinal_functional.csv',index_col=0)
X_Metavariables = pd.read_csv('./UOPfinal_metavariables.csv',index_col=0)
X_Neighborhood = pd.read_csv('./UOPfinal_neighborhood.csv',index_col=0)

y = pd.read_csv('./UOPfinal_outcome.csv',index_col=0)
y = y.grade-1

In [None]:
train_data_dict = {
    "Celldensities": X_Celldensities, 
    "Function": X_Function,
    "Neighborhood": X_Neighborhood,
    "Metavariables": X_Metavariables
    }

# Define Leave-one-patient-out CV

In [5]:
patient_groups = pd.read_csv('./UOPfinal_patient_groups.csv', index_col=0)

array([ 1,  1,  1,  2,  2,  3,  3,  3,  4,  4,  4,  5,  5,  5,  6,  6,  6,
        7,  7,  7,  8,  8,  8,  9,  9,  9, 10, 10, 10, 11, 11, 11, 12, 12,
       12, 13, 13, 13, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21,
       21, 21, 22, 22, 22, 24, 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27,
       28, 28, 28])

# Results folder

In [None]:
result_folder = "./Results UOPStabilitySelection"

# Main script

In [None]:
for omic_name, X_omic in train_data_dict.items():
    X_omic = remove_low_info_samples(X_omic)
    train_data_dict[omic_name] = X_omic

In [None]:
stabl = Stabl(
    lambda_name='C',
    lambda_grid=np.linspace(0.01, 5, 10),
    n_bootstraps=500,
    artificial_type="random_permutation",
    artificial_proportion=1.,
    replace=False,
    fdr_threshold_range=np.arange(0.2, 1, 0.01),
    sample_fraction=.5,
    random_state=11
 )

outer_splitter = LeaveOneGroupOut()

stability_selection = clone(stabl).set_params(artificial_type=None, hard_threshold=0.3)

# Multi-omic Training-CV

In [None]:
np.random.seed(11)

In [None]:
predictions_dict = multi_omic_stabl_cv(
    data_dict=train_data_dict,
    y=y,
    outer_splitter=outer_splitter,
    stabl=stabl,
    stability_selection=stability_selection,
    task_type="binary",
    save_path=Path(result_folder),
    outer_groups=patient_groups
)

# Multiomic Training to derive coefficients

In [None]:
np.random.seed(11)

In [None]:
stabl_multi = Stabl(
    lambda_grid=np.linspace(0.01, 5, 30),
    n_bootstraps=5000,
    artificial_proportion=1.,
    artificial_type="random_permutation",
    hard_threshold=None,
    replace=False,
    fdr_threshold_range=np.arange(0.2, 1, 0.01),
    sample_fraction=.5,
    random_state=11
)

stability_selection = clone(stabl_multi).set_params(artificial_type=None, hard_threshold=.3)

In [None]:
predictions_dict = multi_omic_stabl(
    data_dict=train_data_dict,
    y=y,
    stabl=stabl_multi,
    stability_selection=stability_selection,
    task_type="binary",
    save_path=Path(result_folder),
)

# Late fusion lasso

In [None]:
late_fusion_lasso_cv(
    train_data_dict=train_data_dict,
    y=y,
    outer_splitter=outer_splitter,
    task_type="binary",
    save_path=result_folder,
    groups=None
)

# Features Table

In [None]:
selected_features_dict = dict()
for model in ["STABL", "EF Lasso", "SS 03", "SS 05", "SS 08"]:
    path = Path(result_folder, "Training-Validation", f"{model} coefficients.csv")
    try:
        selected_features_dict[model] = list(pd.read_csv(path, index_col=0).iloc[:, 0].index)
    except:
        selected_features_dict[model] = []

In [None]:
features_table = compute_features_table(
    selected_features_dict,
    X_train=pd.concat(train_data_dict.values(), axis=1),
    y_train=y,
    task_type="binary"
)

In [None]:
features_table.to_csv(Path(result_folder, "Training-Validation", "Table of features.csv"))