# SurvivalLCS Experiment Runs

## Import and Setup

### Load packages

In [22]:
import os
import pandas as pd
import numpy as np
import random
import sys
import glob
from datetime import date
import argparse
from random import shuffle
from random import sample
import matplotlib.pyplot as plt
import sys
import shutil
import sksurv
import pickle
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
from survival_LCS_permutations import survivalLCS_permutations as survivalLCS

In [23]:
sys.path.append("/home/bandheyh/common/survival-lcs")

In [24]:
plt.ioff()
plt.ioff()

<contextlib.ExitStack at 0x15550c87d490>

## Survival-LCS Parameters

### Set file names and necessary parameters

In [25]:
# parameter to run using hpc resources
HPC = True

homedir = "/home/bandheyh/common/survival-lcs/pipeline"
models = ['me', 'epi', 'het', 'add']
m0s = []

c = [0.1,0.4,0.8]
nfeat = ['f100'] #add f10000 when on cluster
maf = ['maf0.2','maf0.4']

iterations = 50000
cv_splits = 5

DEBUG = False
if DEBUG:
    models = ['me']
    c = [0.1]
    nfeat = ['f100', 'f1000']
    maf = ['maf0.2', 'maf0.4']
    iterations = 1000
    cv_splits = 3

### Create empty brier score DataFrame
brier_df = pd.DataFrame()
cox_brier_df = pd.DataFrame()

# other non-parameters

simulated = True # CHANGE THIS TO FALSE IF RUNNING REAL DATA

lcs_run = True
dtype_list = []

### Import the survival_LCS pipeline

In [26]:
from survival_LCS_permutations import survivalLCS_permutations as survivalLCS

### Run the survival_LCS pipeline

In [27]:
def get_parameters(models, nfeat, maf, i, j, k):

    g = homedir + '/' + 'simulated_datasets/' + \
        'EDM-1_one_of_each/'+str(models[i]) + \
        '_' + str(nfeat[j]) + '_' + str(maf[k]) + '_' + 'EDM-1_01.txt'
    dtype = str(models[i]) + '_' + str(nfeat[j]) + '_' + str(maf[k])
    dtype_list.append(dtype)
    print(g)

    d = homedir + '/' + 'cv_sim_data/cv_' + str(models[i]) + '/' + dtype
    m = homedir + '/' + 'pickled_cv_models/' + str(models[i]) + '/' + dtype
    o = homedir + '/' + 'sim_lcs_output/' + str(models[i]) + '/' + dtype

    ### Set m0_path
    if models[i] in ['me','add','het']:
        m0_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/me_h0.2_'+str(maf[k])+'_Models.txt'
    else:
        m0_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/epi_h0.2_'+str(maf[k])+'_Models.txt'

    ### Set m1_path
    if models[i] in ['me','epi']:
        m1_path = None
    else:
        m1_path = homedir+'/'+'simulated_datasets/'+'EDM-1_one_of_each/model_files/epi_h0.2_'+str(maf[k])+'_Models.txt'

    ### Set m0_type
    if models[i] in ['me','add','het']:
        m0_type = 'main_effect'
    else:
        m0_type = '2way_epistasis'

    ### Set m1_type
    if models[i] in ['me', 'epi']:
        m1_type = None
    else:
        m1_type = '2way_epistasis'

    ### Set mtype
    if models[i] == 'me':
        mtype = 'main_effect'
    elif models[i] == 'epi':
        mtype = '2way_epistasis'
    elif models[i] == 'add':
        mtype = 'additive'
    else:
        mtype = 'heterogeneous'


    e = "testallsims"
    print(str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k]))

    return g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type



In [28]:
def run_slcs(survivalLCS):

    lcs_run = True

    if lcs_run == True:
        survivalLCS.returnCVModelFiles()
        brier_df = survivalLCS.returnIBSresults()

    else:
        print("Datasets generated only")

    return brier_df

In [29]:
from survival_LCS_permutations import survivalLCS_permutations as survivalLCS
job_obj_list = list()
for i in range(0,len(models)):
    for j in range(0,len(nfeat)):
        brier_df_list = list()
        for k in range(0,len(maf)):
            g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type = get_parameters(models, nfeat, maf, i, j, k)
            slcs = survivalLCS(g, mtype, d, m, o, e,brier_df,cox_brier_df, m0_path, m0_type, m1_path, m1_type, 
                                      c = c,iterations = iterations, cv = cv_splits)
            if HPC == False:
                brier_df = run_slcs(slcs)
                brier_df_list.append(brier_df)
            else:
                job_obj_list.append(slcs)
        if HPC == False:
            pass

/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/me_f100_maf0.2_EDM-1_01.txt
me_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/me_f100_maf0.4_EDM-1_01.txt
me_f100_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/epi_f100_maf0.2_EDM-1_01.txt
epi_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/epi_f100_maf0.4_EDM-1_01.txt
epi_f100_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/het_f100_maf0.2_EDM-1_01.txt
het_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/het_f100_maf0.4_EDM-1_01.txt
het_f100_maf0.4
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/add_f100_maf0.2_EDM-1_01.txt
add_f100_maf0.2
None
/home/bandheyh/common/survival-lcs/pipeline/simulated_datasets/EDM-1_one_of_each/add_f1

## HPC Code

In [30]:
import dask
from dask.distributed import Client
from dask_jobqueue import SLURMCluster, LSFCluster, SGECluster

In [31]:
def get_cluster(cluster_type='SLURM', output_path=".", queue='defq', memory=4):
    client = None
    try:
        if cluster_type == 'SLURM':
            cluster = SLURMCluster(queue=queue,
                                   cores=1,
                                   memory=str(memory) + "G",
                                   walltime="24:00:00",
                                   log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == "LSF":
            cluster = LSFCluster(queue=queue,
                                 cores=1,
                                 mem=memory * 1000000000,
                                 memory=str(memory) + "G",
                                 walltime="24:00",
                                 log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == 'UGE':
            cluster = SGECluster(queue=queue,
                                 cores=1,
                                 memory=str(memory) + "G",
                                 resource_spec="mem_free=" + str(memory) + "G",
                                 walltime="24:00:00",
                                 log_directory=output_path + "/dask_logs/")
            cluster.adapt(maximum_jobs=400)
        elif cluster_type == 'Local':
            c = Client()
            cluster = c.cluster
        else:
            raise Exception("Unknown or Unsupported Cluster Type")
        client = Client(cluster)
    except Exception as e:
        print(e)
        raise Exception("Exception: Unknown Exception")
    print("Running dask-cluster")
    print(client.scheduler_info())
    return client

In [2]:
# cluster = get_cluster(output_path=homedir)

In [None]:
def run_parallel(model):
    try:
        brier_df = run_slcs(model)
    except Exception as e:
        raise e
        brier_df = e
    return brier_df

In [None]:
job_obj_list

[<survival_LCS_permutations.survivalLCS_permutations at 0x15550db25b80>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x15550db254f0>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x15550db251f0>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x1555126c2910>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x15550db25100>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x15550db16970>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x155524d47310>,
 <survival_LCS_permutations.survivalLCS_permutations at 0x15550db41ac0>]

In [None]:
if HPC == True:
    delayed_results = []
    for model in job_obj_list:
        brier_df = dask.delayed(run_parallel)(model)
        delayed_results.append(brier_df)
    results = dask.compute(*delayed_results)

In [None]:
# if HPC:
#     results = dask.compute([dask.delayed(run_parallel)(model) for model in job_obj_list])

In [None]:
with open(homedir+'/results_perm_final.pkl', 'wb') as file:
    pickle.dump(results, file, pickle.HIGHEST_PROTOCOL)

### Error Checking

In [None]:
error_idxs = list()
for i in range(len(results)):
    if type(results[i]) ==  ValueError:
        print(i, results[i])
        error_idxs.append(i)

In [None]:
arr = np.arange(len(results)).reshape(len(models), len(nfeat), len(maf))

# Convert a 1D index to a 3D index
for x in error_idxs:
    i, j, k = np.unravel_index(x, arr.shape)
    print(models[i], nfeat[j], maf[k])

### IBS Tables

In [1]:
brier_df_list = list()
arr = np.arange(len(results)).reshape(len(models), len(nfeat), len(maf))
for x in range(len(results)):
    i, j, k = np.unravel_index(x, arr.shape)
    print(models[i], nfeat[j], maf[k])
    current_ibs = results[x]
    # current_ibs = current_ibs.rename(columns={"mean": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k]), 
    #                                         "ci_lower": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_lower', 
    #                                         "ci_upper": str(models[i])+'_'+str(nfeat[j])+'_'+str(maf[k])+'_ci_upper'})
    brier_df_list.append(current_ibs)
brier_df = pd.concat(brier_df_list, axis = 1, sort = False).reset_index()
#print('brier_df:', brier_df)
brier_df.to_csv(homedir+'/perm_ibs_data_all.csv', index = False)
brier_df