In [1]:
import os
import sys

import knpackage.toolbox as kn

NOTEBOOK_CLOUD = False
if NOTEBOOK_CLOUD == True:
    pipelines_directory = os.path.abspath('/pipelines')
else:
    pipelines_directory = os.path.join('../../../')

# import the notebook setup utilities:
sys.path.insert(1, os.path.join(pipelines_directory, 'notebooks_KnowEnG/src'))
from notebooks_KnowEnG import *
setup_test_dir(notebook_location=os.getcwd())

# define the path locations for the Feature_Prioritization_Pipeline (clone in a terminal if not installed)

feature_prioritization_path = os.path.join(pipelines_directory, 'Feature_Prioritization_Pipeline')
src_files_path = os.path.join(feature_prioritization_path, 'src')
sys.path.insert(1, src_files_path)
import feature_prioritization_toolbox as feature_tbx

spreadsheet_files_path = os.path.join(feature_prioritization_path, 'data/spreadsheets')
network_files_path = os.path.join(feature_prioritization_path, 'data/networks')
run_files_path = os.path.join(feature_prioritization_path, 'data/run_files')

# define the path locations for running and results     WILL BE RUTHLESSLY AND CRUELLY OVERWRITTEN WITHOUT IT PITY
run_directory = os.path.join(os.getcwd(), 'test/run_dir')
results_directory = os.path.join(os.getcwd(), 'test/run_dir/results')

# import the yaml files to the run_directroy:
copy_yaml_with_path_edit(pipeline_directory_full_path=feature_prioritization_path, 
                         run_directory=run_directory, 
                         results_directory=results_directory)

In [2]:
print('\tparameter sets available:\n')
files_available = os.listdir(run_directory)
yaml_files_available = []
for f_name in files_available:
    if '.yml' in f_name:
        yaml_files_available.append(f_name)
        print(f_name)

	parameter sets available:

BENCHMARK_1_FP_pearson.yml
BENCHMARK_2_FP_bootstrap_pearson.yml
BENCHMARK_3_FP_t_test.yml
BENCHMARK_4_FP_bootstrap_t_test.yml
TEST_1_FP_single_drug_pearson.yml
TEST_2_FP_many_drugs_pearson.yml
TEST_3_FP_single_drug_t_test.yml
TEST_4_FP_many_drugs_t_test.yml
zPhenotype_Expander.yml
zTEMPLATE_FP_BENCHMARKS.yml


## Feature Prioritization Pipeline: 

In [3]:
# Select and view the parameter set for running the toolbox
yaml_file_name = 'TEST_1_FP_single_drug_pearson.yml'
run_parameters = kn.get_run_parameters(run_directory, yaml_file_name)
display_run_parameters(run_parameters)

                 run_directory : /Users/mojo/git_clone/dlanier/notebooks_KnowEnG/test/run_dir/test/run_dir
             results_directory : /Users/mojo/git_clone/dlanier/notebooks_KnowEnG/test/run_dir/test/run_dir/results
    spreadsheet_name_full_path : ../../../Feature_Prioritization_Pipeline/data/spreadsheets/TEST_1_feature_sample.tsv
              top_beta_of_sort : 2
                      run_file : TEST_1_FP_single_drug_pearson.yml
      phenotype_name_full_path : ../../../Feature_Prioritization_Pipeline/data/spreadsheets/TEST_1_response_pearson.tsv
           correlation_measure : pearson
             top_gamma_of_sort : 100
                        method : correlation


In [4]:
# View the input spreadsheet and phenotype files
view_spreadsheet_file_head(run_parameters['spreadsheet_name_full_path'])

TEST_1_feature_sample.tsv  size: (7, 4)


Unnamed: 0,s1,s2,s3,s4
g1,0,-1,1,1
g2,1,0,-1,1
g3,1,-1,0,-1
g4,-1,1,-1,0
g5,1,-1,0,-1
g6,1,0,-1,1
g7,0,-1,1,1


In [5]:
view_spreadsheet_file_head(run_parameters['phenotype_name_full_path'])

TEST_1_response_pearson.tsv  size: (4, 1)


Unnamed: 0,drug_name
s1,0.714286
s2,0.357143
s3,0.178571
s4,0.089286


In [6]:
"""                         Run the method                                                          """
if run_parameters['method'] == 'correlation':
    feature_tbx.run_correlation(run_parameters)
elif run_parameters['method'] == 'bootstrap_correlation':
    feature_tbx.run_bootstrap_correlation(run_parameters)

In [15]:
print('Output files in:', results_directory)
file_name_prefix_list = ['drug_name_correlation', 
                         'ranked_features_per_response_correlation', 
                         'top_features_per_response_correlation']
output_dict = {}
for f_name in os.listdir(results_directory):
    full_f_name = os.path.join(results_directory, f_name)
    if os.path.isfile(full_f_name):
        for prefix in file_name_prefix_list:
            if prefix in f_name:
                output_dict[prefix] = f_name
for k, v in output_dict.items():
    print('\n%s:\n\t%s'%(k,v))

Output files in: /Users/mojo/git_clone/dlanier/notebooks_KnowEnG/test/run_dir/test/run_dir/results

ranked_features_per_response_correlation:
	ranked_features_per_response_correlation_pearson_Mon_26_Mar_2018_08_35_56.177772045_download.tsv

drug_name_correlation:
	drug_name_correlation_pearson_Mon_26_Mar_2018_08_35_56.080070018_viz.tsv

top_features_per_response_correlation:
	top_features_per_response_correlation_pearson_Mon_26_Mar_2018_08_35_56.179318904_download.tsv


In [10]:
view_spreadsheet_file_head(os.path.join(
    results_directory, output_dict['drug_name_correlation']))

drug_name_correlation_pearson_Mon_26_Mar_2018_08_35_56.080070018_viz.tsv  size: (7, 4)


Unnamed: 0_level_0,Feature_ID,quantitative_sorting_score,visualization_score,baseline_score
Response,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
drug_name,g3,0.759134,1.0,0.759134
drug_name,g5,0.759134,1.0,0.759134
drug_name,g1,0.534205,0.555556,-0.534205
drug_name,g7,0.534205,0.555556,-0.534205
drug_name,g2,0.365509,0.222222,0.365509
drug_name,g6,0.365509,0.222222,0.365509
drug_name,g4,0.253045,0.0,-0.253045


In [11]:
view_spreadsheet_file_head(os.path.join(
    results_directory, output_dict['ranked_features_per_response_correlation']))

ranked_features_per_response_correlation_pearson_Mon_26_Mar_2018_08_35_56.177772045_download.tsv  size: (7, 1)


Unnamed: 0,drug_name
1,g3
2,g5
3,g1
4,g7
5,g2
6,g6
7,g4


In [12]:
view_spreadsheet_file_head(os.path.join(
    results_directory, output_dict['top_features_per_response_correlation']))

top_features_per_response_correlation_pearson_Mon_26_Mar_2018_08_35_56.179318904_download.tsv  size: (7, 1)


Unnamed: 0,drug_name
g1,1
g2,1
g3,1
g4,1
g5,1
g6,1
g7,1
