# SurvivalLCS Experiment Runs

## Import and Setup

### Load packages

In [1]:
import os
import pandas as pd
import numpy as np
import random
import sys
import glob
from datetime import date
import argparse
from random import shuffle
from random import sample
import matplotlib.pyplot as plt
import sys
import shutil
import sksurv
import pickle
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
from survival_LCS_coxchecks import survivalLCS_coxChecks as survivalLCS

In [2]:
sys.path.append("/home/bandheyh/common/survival-lcs")

In [3]:
plt.ioff()
plt.ioff()

<contextlib.ExitStack at 0x1555501cfeb0>

## Survival-LCS Parameters

### Set file names and necessary parameters

In [4]:
# parameter to run using hpc resources
HPC = True

homedir = "/home/bandheyh/common/survival-lcs/pipeline"
models = ['me', 'epi', 'het', 'add']
m0s = []

c = [0.1,0.4,0.8]
nfeat = ['f100','f1000', 'f10000'] #add f10000 when on cluster
maf = ['maf0.2','maf0.4']

iterations = 50000
cv_splits = 5

DEBUG = False
if DEBUG:
    models = ['me']
    c = [0.1]
    nfeat = ['f100', 'f1000']
    maf = ['maf0.2', 'maf0.4']
    iterations = 1000
    cv_splits = 3

### Create empty brier score DataFrame
brier_df = pd.DataFrame()
cox_brier_df = pd.DataFrame()

# other non-parameters

simulated = True # CHANGE THIS TO FALSE IF RUNNING REAL DATA

lcs_run = True
dtype_list = []

### Import the survival_LCS pipeline

In [5]:
from survival_LCS_coxchecks import survivalLCS_coxChecks as survivalLCS

### Run the survival_LCS pipeline

In [6]:
def get_parameters(models, nfeat, maf, i, j, k):

    g = homedir + '/' + 'simulated_datasets/' + \
        'EDM-1_one_of_each/'+str(models[i]) + \
        '_' + str(nfeat[j]) + '_' + str(maf[k]) + '_' + 'EDM-1_01.txt'
    dtype = str(models[i]) + '_' + str(nfeat[j]) + '_' + str(maf[k])
    dtype_list.append(dtype)
    print(g)

    d = homedir + '/' + 'cv_sim_data/cv_' + str(models[i]) + '/' + dtype
    m = homedir + '/' + 'pickled_cv_models/' + str(models[i]) + '/' + dtype
    o = homedir + '/' + 'sim_lcs_output/' + str(models[i]) + '/' + dtype

    ### Set m0_path
    if models[i] in ['me','add','het']:
        m0_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/me_h0.2_'+str(maf[k])+'_Models.txt'
    else:
        m0_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/epi_h0.2_'+str(maf[k])+'_Models.txt'

    ### Set m1_path
    if models[i] in ['me','epi']:
        m1_path = None
    else:
        m1_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/epi_h0.2_'+str(maf[k])+'_Models.txt'

    ### Set m0_type
    if models[i] in ['me','add','het']:
        m0_type = 'main_effect'
    else:
        m0_type = '2way_epistasis'

    ### Set m1_type
    if models[i] in ['me', 'epi']:
        m1_type = None
    else:
        m1_type = '2way_epistasis'

    ### Set mtype
    if models[i] == 'me':
        mtype = 'main_effect'
    elif models[i] == 'epi':
        mtype = '2way_epistasis'
    elif models[i] == 'add':
        mtype = 'additive'
    else:
        mtype = 'heterogeneous'


    e = "testallsims"
    print(str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k]))

    return g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type



In [7]:
def run_slcs(survivalLCS):

    lcs_run = True

    if lcs_run == True:
        survivalLCS.returnCVModelFiles()
        ibs, cox_ibs, perm = survivalLCS.return_all_results()

    else:
        print("Datasets generated only")

    print(survivalLCS.model_type)

    return ibs, cox_ibs, perm

In [8]:
def make_breir_output(brier_df_list, output_path, model_type, models, dtype_list, i, df_type):
    brier_df = pd.concat(brier_df_list, axis = 1, sort = False).reset_index()

    brier_df.to_csv(homedir +'/'+'sim_lcs_output/'+str(models[i])+'/'+df_type+'_data_'+mtype+'.txt', index = False)

    plt.figure(figsize=(10, 10))
    plt.xlabel('Time')
    plt.ylabel('Brier score')
    plt.ylim(0,1)

    for i in range(1,len(dtype_list)):
        plt.plot(brier_df['times'], brier_df[dtype_list[i]],label = brier_df[dtype_list[i]].name)
        plt.fill_between(brier_df['times'], brier_df[dtype_list[i]+'_ci_lower'], brier_df[dtype_list[i]+'_ci_upper'], color='b', alpha=.1)
    plt.savefig(output_path+'/'+df_type+'_scores_'+model_type + '.png')

In [9]:
from survival_LCS_coxchecks import survivalLCS_coxChecks as survivalLCS
job_obj_list = list()
for i in range(0,len(models)):
    for j in range(0,len(nfeat)):
        brier_df_list = list()
        cox_brier_df_list = list()
        for k in range(0,len(maf)):
            g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type = get_parameters(models, nfeat, maf, i, j, k)
            slcs = survivalLCS(g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type, 
                                      c = c,iterations = iterations, cv = cv_splits)
            if HPC == False:
                ibs, cox_ibs, perm = run_slcs(slcs)
                brier_df_list.append(ibs)
                cox_brier_df_list.append(cox_ibs)
            else:
                job_obj_list.append(slcs)
        if HPC == False:
            if lcs_run == True:
                make_breir_output(brier_df_list, survivalLCS.output_path, survivalLCS.model_type, models, dtype_list, i, 'ibs')
                make_breir_output(brier_df_list, survivalLCS.output_path, survivalLCS.model_type, models, dtype_list, i, 'cox_ibs')
            else:
                print('LCS not run, no brier scores available')

/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f100_maf0.2_EDM-1_01.txt
me_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f100_maf0.4_EDM-1_01.txt
me_f100_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f1000_maf0.2_EDM-1_01.txt
me_f1000_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f1000_maf0.4_EDM-1_01.txt
me_f1000_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f10000_maf0.2_EDM-1_01.txt
me_f10000_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/me_f10000_maf0.4_EDM-1_01.txt
me_f10000_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline_copy/simulated_datasets/EDM-1_one_of_each/epi_f100_maf0.2_EDM-1_01.txt
epi_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline_copy

## HPC Code

In [10]:
import dask
from dask.distributed import Client
from dask_jobqueue import SLURMCluster, LSFCluster, SGECluster

In [11]:
def get_cluster(cluster_type='SLURM', output_path=".", queue='defq', memory=4):
    client = None
    try:
        if cluster_type == 'SLURM':
            cluster = SLURMCluster(queue=queue,
                                   cores=1,
                                   memory=str(memory) + "G",
                                   walltime="24:00:00",
                                   log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == "LSF":
            cluster = LSFCluster(queue=queue,
                                 cores=1,
                                 mem=memory * 1000000000,
                                 memory=str(memory) + "G",
                                 walltime="24:00",
                                 log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == 'UGE':
            cluster = SGECluster(queue=queue,
                                 cores=1,
                                 memory=str(memory) + "G",
                                 resource_spec="mem_free=" + str(memory) + "G",
                                 walltime="24:00:00",
                                 log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == 'Local':
            c = Client()
            cluster = c.cluster
        else:
            raise Exception("Unknown or Unsupported Cluster Type")
        client = Client(cluster)
    except Exception as e:
        print(e)
        raise Exception("Exception: Unknown Exception")
    print("Running dask-cluster")
    print(client.scheduler_info())
    return client

In [12]:
cluster = get_cluster(output_path=homedir)

Running dask-cluster
{'type': 'Scheduler', 'id': 'Scheduler-dee33231-73f9-46bd-807e-13c5839221a6', 'address': 'tcp://10.17.134.112:45343', 'services': {'dashboard': 43363}, 'started': 1712617294.8960447, 'workers': {}}


Perhaps you already have a cluster running?
Hosting the HTTP server on port 43363 instead


In [13]:
def run_parallel(model):
    try:
        ibs, cox_ibs, perm = run_slcs(model)
    except Exception as e:
        raise e
        brier_df = e
    return ibs, cox_ibs, perm

In [14]:
job_obj_list

[<survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555501cfc40>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555247a0130>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555501c6e50>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555501cfd60>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555501cffa0>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555247a52b0>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555501cfe80>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x155524751970>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555247a1c70>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555247569d0>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x155524751eb0>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x15552475ba30>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x1555247a5670>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x15552475eca0>,
 <survival_LCS_coxchecks.survivalLCS_coxChecks at 0x15552475bd

In [15]:
if HPC == True:
    delayed_results = []
    for model in job_obj_list:
        brier_df = dask.delayed(run_parallel)(model)
        delayed_results.append(brier_df)
    results = dask.compute(*delayed_results)

In [16]:
# if HPC:
#     results = dask.compute([dask.delayed(run_parallel)(model) for model in job_obj_list])

In [17]:
with open(homedir+'/results_cox.pkl', 'wb') as file:
    pickle.dump(results, file, pickle.HIGHEST_PROTOCOL)

### Error Checking

In [18]:
error_idxs = list()
for i in range(len(results)):
    if type(results[i]) ==  ValueError:
        print(i, results[i])
        error_idxs.append(i)


In [19]:
arr = np.arange(len(results)).reshape(len(models), len(nfeat), len(maf))

# Convert a 1D index to a 3D index
for x in error_idxs:
    i, j, k = np.unravel_index(x, arr.shape)
    print(models[i], nfeat[j], maf[k])

### IBS Tables

In [20]:
brier_df_list = list()
arr = np.arange(len(results)).reshape(len(models), len(nfeat), len(maf))
for x in range(len(results)):
    i, j, k = np.unravel_index(x, arr.shape)
    print(models[i], nfeat[j], maf[k])
    current_ibs = results[x][0]
    current_ibs = current_ibs.rename(columns={"mean": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k]), 
                                            "ci_lower": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_lower', 
                                            "ci_upper": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_upper'})
    brier_df_list.append(current_ibs)
brier_df = pd.concat(brier_df_list, axis = 1, sort = False).reset_index()
#print('brier_df:', brier_df)
brier_df.to_csv(homedir+'/ibs_data_all.csv', index = False)
brier_df

me f100 maf0.2
me f100 maf0.4
me f1000 maf0.2
me f1000 maf0.4
me f10000 maf0.2
me f10000 maf0.4
epi f100 maf0.2
epi f100 maf0.4
epi f1000 maf0.2
epi f1000 maf0.4
epi f10000 maf0.2
epi f10000 maf0.4
het f100 maf0.2
het f100 maf0.4
het f1000 maf0.2
het f1000 maf0.4
het f10000 maf0.2
het f10000 maf0.4
add f100 maf0.2
add f100 maf0.4
add f1000 maf0.2
add f1000 maf0.4
add f10000 maf0.2
add f10000 maf0.4


Unnamed: 0,times,me_f100_maf0.2_cens0.1,me_f100_maf0.2_cens0.1_ci_lower,me_f100_maf0.2_cens0.1_ci_upper,me_f100_maf0.2_cens0.4,me_f100_maf0.2_cens0.4_ci_lower,me_f100_maf0.2_cens0.4_ci_upper,me_f100_maf0.2_cens0.8,me_f100_maf0.2_cens0.8_ci_lower,me_f100_maf0.2_cens0.8_ci_upper,...,add_f10000_maf0.2_cens0.8_ci_upper,add_f10000_maf0.4_cens0.1,add_f10000_maf0.4_cens0.1_ci_lower,add_f10000_maf0.4_cens0.1_ci_upper,add_f10000_maf0.4_cens0.4,add_f10000_maf0.4_cens0.4_ci_lower,add_f10000_maf0.4_cens0.4_ci_upper,add_f10000_maf0.4_cens0.8,add_f10000_maf0.4_cens0.8_ci_lower,add_f10000_maf0.4_cens0.8_ci_upper
0,0.0,,,,,,,,,,...,,,,,,,,,,
1,1.0,0.213422,0.150764,0.287686,0.188425,0.130804,0.256719,0.198263,0.153941,0.250794,...,0.0,0.010006,-0.002644,0.024999,0.005000,0.005000,0.005000,0.002018,-0.006053,0.011583
2,2.0,0.208439,0.158641,0.267461,0.189553,0.152714,0.233215,0.220543,0.206041,0.237732,...,0.0,0.010005,-0.002645,0.024998,0.005000,0.005000,0.005000,0.005063,-0.006011,0.018189
3,3.0,0.216471,0.186533,0.251953,0.206836,0.184525,0.233280,0.257546,0.221498,0.300272,...,0.0,0.010004,-0.002647,0.024998,0.005000,0.005000,0.005000,0.006087,-0.003810,0.017817
4,4.0,0.226400,0.207318,0.249016,0.226758,0.217877,0.237285,0.292960,0.227421,0.370638,...,0.0,0.010003,-0.002648,0.024998,0.005000,0.005000,0.005000,0.006087,-0.003810,0.017817
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,96.0,,,,,,,,,,...,,0.020680,0.009290,0.034180,0.065028,-0.011193,0.157337,0.534299,-0.140229,1.333767
97,97.0,,,,,,,,,,...,,0.018811,0.005251,0.035232,0.043842,0.022488,0.070428,0.585393,-0.262527,1.612274
98,98.0,,,,,,,,,,...,,0.024115,-0.002458,0.058421,,,,1.264741,-0.114296,3.045070
99,99.0,,,,,,,,,,...,,,,,,,,,,


In [30]:
# results[x][1]

In [21]:
brier_df_list = list()
arr = np.arange(len(results)).reshape(len(models), len(nfeat), len(maf))
for x in range(len(results)):
    i, j, k = np.unravel_index(x, arr.shape)
    print(models[i], nfeat[j], maf[k])
    current_ibs = results[x][1]
    current_ibs = current_ibs.rename(columns={"mean": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k]), 
                                            "ci_lower": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_lower', 
                                            "ci_upper": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_upper'})
    brier_df_list.append(current_ibs)
brier_df = pd.concat(brier_df_list, axis = 1, sort = False).reset_index()
#print('brier_df:', brier_df)
brier_df.to_csv(homedir+'/cox_ibs_data_all.csv', index = False)
brier_df

me f100 maf0.2
me f100 maf0.4
me f1000 maf0.2
me f1000 maf0.4
me f10000 maf0.2
me f10000 maf0.4
epi f100 maf0.2
epi f100 maf0.4
epi f1000 maf0.2
epi f1000 maf0.4
epi f10000 maf0.2
epi f10000 maf0.4
het f100 maf0.2
het f100 maf0.4
het f1000 maf0.2
het f1000 maf0.4
het f10000 maf0.2
het f10000 maf0.4
add f100 maf0.2
add f100 maf0.4
add f1000 maf0.2
add f1000 maf0.4
add f10000 maf0.2
add f10000 maf0.4


Unnamed: 0,times,me_f100_maf0.2,me_f100_maf0.2_ci_lower,me_f100_maf0.2_ci_upper,me_f100_maf0.2.1,me_f100_maf0.2_ci_lower.1,me_f100_maf0.2_ci_upper.1,me_f100_maf0.2.2,me_f100_maf0.2_ci_lower.2,me_f100_maf0.2_ci_upper.2,...,add_f10000_maf0.2_ci_upper,add_f10000_maf0.4,add_f10000_maf0.4_ci_lower,add_f10000_maf0.4_ci_upper,add_f10000_maf0.4.1,add_f10000_maf0.4_ci_lower.1,add_f10000_maf0.4_ci_upper.1,add_f10000_maf0.4.2,add_f10000_maf0.4_ci_lower.2,add_f10000_maf0.4_ci_upper.2
0,0,,,,,,,,,,...,,,,,,,,,,
1,1,0.214721,0.173951,0.263041,0.188084,0.131211,0.255492,0.212159,0.131769,0.307438,...,,,,,,,,,,
2,2,0.235617,0.198902,0.279133,0.205034,0.155867,0.263309,0.240294,0.146737,0.351180,...,,,,,,,,,,
3,3,0.256035,0.227579,0.289761,0.229189,0.182878,0.284079,0.255152,0.156785,0.371740,...,,,,,,,,,,
4,4,0.268694,0.244105,0.297838,0.244617,0.202983,0.293962,0.254905,0.156702,0.371298,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,96,,,,,,,,,,...,,,,,,,,,,
97,97,,,,,,,,,,...,,,,,,,,,,
98,98,,,,,,,,,,...,,,,,,,,,,
99,99,,,,,,,,,,...,,,,,,,,,,


In [28]:
pd.DataFrame(results[0][2].mean().sort_values(ascending=False)).T

Unnamed: 0,M0P1,N15,N18,N5,N26,N25,N80,N91,N65,N4,...,N46,N93,N23,N81,N68,N61,N87,N39,N16,N53
0,0.073887,0.004361,0.003242,0.003121,0.002799,0.00274,0.00273,0.002716,0.002543,0.002508,...,0.000352,0.000341,0.00027,0.000265,0.000261,0.000206,0.000195,0.000192,0.000184,0.000138
