In [8]:
from pyNBS import data_import_tools as dit
from pyNBS import network_propagation as prop
from pyNBS import pyNBS_core as core
from pyNBS import pyNBS_single
from pyNBS import consensus_clustering as cc
from pyNBS import pyNBS_plotting as plot
import time
import pandas as pd
import numpy as np

### Running Parameters

|Param|Help|
|:------|:------|
|sm_data_file|Path to binary mutation matrix file. May be a csv or 2-column list where each line is a sample and the gene mutated separated by a common delimiter.|
|network_path|Path to molecular network file. File must be table where each line is a gene interaction separated by a common delimiter and the first 2 columns represent interacting proteins.|
|mut_filedelim|Delimiter used in binary mutation file. Default is tab white space.|
|net_filedelim|Delimiter used in network file between columns. Default is tab white space.|
|degree_preserved_shuffle|Determination of whether or not to shuffle the network edges (while preserving node degree) when loading network.|
|node_label_shuffle|Determination of whether or not to shuffle the network node labels (while preserving network topology) when loading network.|
|regularize_network|Determination of whether or not to calculate influence matrix regularization network for regularized NMF step.|
|reg_net_gamma|Value of adjustment on propagation network graph laplacian to calculate influence matrix for (via Vandin 2011).|
|k_nearest_neighbors|Number of nearest neighbors to add to the regularization network during construction.|
|



In [None]:
run_pyNBS_params = {'sm_data_file' : 'path',
                    'network_path' : 'path',
                    'mut_filedelim' : '\t',
                    'net_filedelim' : '\t',
                    'degree_preserved_shuffle' : False,
                    'node_label_shuffle' : False,
                    'regularize_network' : False,
                    'reg_net_gamma' : 0.01,
                    'k_nearest_neighbors' : 11,
                    
                    
                    }

### NBS Options

|Option|Type|Default|Help|
|:------|:------|:------|:------|
|pats_subsample_p|float|0.8|Proportion of samples to sub-sample|
|gene_subsample_p|float|0.8|Proportion of mutated genes to sub-sample|
|min_muts|positive_int|10|Minimum number of mutations for a sample to contain after sub-sampling to be considered for further analysis|
|prop_data|bool|True|Determination of whether or not to propagate sub-sampled binary mutation data over given molecular network |
|prop_alpha  |restricted_float|0.7|Propagation constant to use in the propagation of mutations over molecular network. Range is 0.0-1.0 exclusive. |
|prop_symmetric_norm|bool|False|Network degree normalization method for random walk-propagation. |
|qnorm_data  |bool  |True |Determination of whether or not to qunatile normalize mutation profiles. |
|netNMF_niter|positive_int|1000|Number of iterations to perform sub-sampling and network-regularized NMF before consensus clustering.|
|netNMF_k  |positive_int  |4 |Number of components to decompose patient mutation data into. Same as the number of clusters of patients to separate data into. |
|netNMF_gamma  |positive_int  |200 |Regularization constant to scale network regularization term in netNMF. |
|netNMF_update_gamma |bool |False |Determination of whether or not to constantly update regularization constant based on balance between reconstruction error and regularization term.|
|netNMF_gamma_factor |positive_int |1 |Scaling factor for regularization constant updates if 'netNMF_update_gamma' is True. |
|netNMF_niter  |positive_int  |250 |Maximum umber of multiplicative updates to perform within network-regularized NMF if result does not converge. |
|netNMF_eps  |float  |1e-15  |Epsilon error value to adjust 0 values during multiplicative matrix updates in netNMF |
|netNMF_err_tol  |float  |1e-4  |Minimum error tolerance for matrix reconstruction of original data for convergence. |
|netNMF_err_delta_tol  |float  |1e-4  |Minimum error tolerance for l2 norm of difference in matrix reconstructions between iterations of netNMF for convergence. |

In [9]:
NBS_options = {'pats_subsample_p' : 0.8, 
               'gene_subsample_p' : 0.8, 
               'min_muts' : 10,
               'prop_data' : True, 
               'prop_alpha' : 0.7, 
               'prop_symmetric_norm' : False, 
               'qnorm_data' : True,
               'netNMF_niter' : 1000,
               'netNMF_k' : 4, 
               'netNMF_gamma' : 200, 
               'netNMF_update_gamma' : False, 
               'netNMF_gamma_factor' : 1,
               'netNMF_niter' : 250, 
               'netNMF_eps' : 1e-15, 
               'netNMF_err_tol' : 1e-4, 
               'netNMF_err_delta_tol' : 1e-4}


### Load data

### knnGlap

### Network propagation kernel

### Sub-sampling and netNMF decomposition

### Consensus Clustering