### Run Samples Clustering Pipeline in full
* Clone the repositories for Data_Cleanup_Pipeline and Samples_Clustering_Pipeline if necessary.
* Run the cleanup and clustering code in accordance with /data/run_files/samples_clustering_4_notebook.yml
* Display the run timing and results (or outcome).

#### Load IPython and Python libraries, change to this repository's root directory and check for the pipelines.

In [None]:
from IPython.display import display
#         load python library code
import os
import sys
import time

import knpackage.toolbox as kn

sys.path.insert(1, '../src')
import KnowEnG_graphics as gu
import run_file_utility as rfu

#         clone pipelines if not installed here: ../../
rfu.git_clone_Samples_Clustering(os.path.abspath('../../'))

#### If either pipeline is not installed then clone it, and switch back to the starting directory.

##### Set the path and name of the edited run_file (all parameters needed to run data cleanup and samples clustering)

In [None]:
#                   open a yaml run file - copy and set the paths to absolute for distant calling
run_dir             = '../test/run_dir'
base_path           = os.path.abspath('../../')
notebook_run_file   = os.path.join(base_path, 'knoweng_dev_tools/data/run_files/samples_clustering_4_notebook.yml')
run_file_name       = rfu.set_run_file_path_to_abs(notebook_run_file, run_dir)

not_needed_name, yaml_file = os.path.split(run_file_name)
print('Run File Name Is:\n',yaml_file, '\nIn\n', run_file_name)

#### Run the data cleanup pipeline to insure that the gene names and data values are usable by samples clustering pipeline.

In [None]:
SUCCESS, log_dict, data_cleanup_run_time = rfu.run_data_cleanup(run_dir, yaml_file)
print('SUCCESS = ', SUCCESS, 'data_cleanup_run_time', data_cleanup_run_time)

#### If cleaning was successfull this function will enter the cleaned run file names into the run file and call samples clustering.

In [None]:
#                   Update the run file with the names of the cleaned data files - or view the error log
Cleanup_Completed = rfu.update_run_file_post_clean(run_file_name)

#                   Run Samples Clustering
if Cleanup_Completed == True
    samples_clustering_run_time = rfu.run_samples_clustering(run_dir, yaml_file)
    print(' Samples Clustering running time: ',samples_clustering_run_time)
else:
    print('Cleanup_Completed = ', Cleanup_Completed)

#### Read the output files and display the overview of the clustering.

In [None]:
#                   Display Samples Clustering (most recent) Results

results_directory, status_null  = rfu.get_run_file_key_data(run_file_name, 'results_directory')
consensus_matrix, cc_file_name  = rfu.read_consensus_result(results_directory)
number_of_clusters, status_null = rfu.get_run_file_key_data(run_file_name, 'number_of_clusters')
I0                              = rfu.form_consensus_matrix_graphic(consensus_matrix, number_of_clusters)
display(gu.mat_to_blue(I0))

cluster_eval_df, ce_file_name   = rfu.read_cluster_evaluation_result(results_directory)
if cluster_eval_df is not None:
    display(cluster_eval_df)

#### Show the results directory name and the list of files therein.

In [None]:
#                   uncomment below and run the cell to Show contents of results directory

rfu.show_result_directory(results_directory)

#### Show the run file contents.

In [None]:
#                   uncomment below and run the cell to Display the Run File

print(run_file_name, "\nContents:\n")
rfu.display_run_file(run_file_name)

In [None]:
#                   uncomment below and run the cell to Display the Workspace Variables
%whos