# hyperparameterTunningResults5
Andrew E. Davidson  
aedavids@ucsc.edu  
4/23/2024

see extraCellularRNA/deconvolutionAnalysis/doc/bestCurratedNotes.md for a discussion about tunning 

**<span style="color:red;background-color:yellow">AEDWIP</span>**  


In [1]:
import ipynbname

# use display() to print an html version of a data frame
# useful if dataFrame output is not generated by last like of cell
from IPython.display import display

import numpy as np
import pandas as pd
# display all columns
pd.set_option('display.max_columns', None)

import pathlib as pl
import pprint as pp
import os
import sys

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# setting the python path allows us to run python scripts from using
# the CLI. 
ORIG_PYTHONPATH = os.environ['PYTHONPATH']

notebookPath = ipynbname.path()
deconvolutionModules = notebookPath.parent.joinpath("../../python")
print("deconvolutionModules: {}\n".format(deconvolutionModules))

PYTHONPATH = ORIG_PYTHONPATH + f':{deconvolutionModules}'
print("PYTHONPATH: {}\n".format(PYTHONPATH))

os.environ["PYTHONPATH"] = PYTHONPATH
PYTHONPATH = os.environ["PYTHONPATH"]
print("PYTHONPATH: {}\n".format(PYTHONPATH))

# to be able to import our local python files we need to set the sys.path
# https://stackoverflow.com/a/50155834
sys.path.append( str(deconvolutionModules) )
print("\nsys.path:\n{}\n".format(sys.path))

deconvolutionModules: /private/home/aedavids/extraCellularRNA/deconvolutionAnalysis/jupyterNotebooks/hyperParameterTunning/../../python

PYTHONPATH: :/private/home/aedavids/extraCellularRNA/src:/private/home/aedavids/extraCellularRNA/deconvolutionAnalysis/jupyterNotebooks/hyperParameterTunning/../../python

PYTHONPATH: :/private/home/aedavids/extraCellularRNA/src:/private/home/aedavids/extraCellularRNA/deconvolutionAnalysis/jupyterNotebooks/hyperParameterTunning/../../python


sys.path:
['/private/home/aedavids/extraCellularRNA/deconvolutionAnalysis/jupyterNotebooks/hyperParameterTunning', '/private/home/aedavids/extraCellularRNA/deconvolutionAnalysis/jupyterNotebooks/hyperParameterTunning', '/private/home/aedavids/extraCellularRNA/src', '/private/home/aedavids/miniconda3/envs/extraCellularRNA/lib/python311.zip', '/private/home/aedavids/miniconda3/envs/extraCellularRNA/lib/python3.11', '/private/home/aedavids/miniconda3/envs/extraCellularRNA/lib/python3.11/lib-dynload', '', '/private/h

In [3]:
from analysis.hyperParameterTunningMetrics import metricsRunner, elifeCols, lungCols
from analysis.hyperParameterTunningMetrics import findSummaryMetricsCols
from analysis.utilities import findAllCategories, findAllGenes
from analysis.utilities import findIntersectionsWithDegree
from analysis.utilities import loadDictionary

In [4]:
root = "/private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category"
notebookName = ipynbname.name()
outDir = f'{root}/hyperParameter/{notebookName}.out'
print( f'output dir: \n{outDir}' )
os.makedirs(outDir, exist_ok=True)

output dir: 
/private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category/hyperParameter/hyperparameterTunningResults5.out


In [5]:
def evaluate(
    root: str,
    outDir: str,    
    resultsDirs : list[str],
    outFilePrefix : str,
    metric : str,
    stageName : str,
    threshold : float,
    verbose : bool = False,
    ) -> tuple[pd.DataFrame, pd.DataFrame] :
    '''
    returns:
        df : aedwip

        bellowThresholdDF:
            example:
                    stage	        category	               value
                id			
                0	best10GTEx_TCGA	ACC	                        True
                2	best10GTEx_TCGA	Adipose_Visceral_Omentum	True
                5	best10GTEx_TCGA	Artery_Coronary	            True
                7	best10GTEx_TCGA	BLCA	                    True
                8	best10GTEx_TCGA	BRCA	                    True
            
    '''

    retDF, retBellowThresholdDF = metricsRunner(root, outDir, outFilePrefix, resultsDirs, 
                           metric=metric, threshold=threshold, verbose=verbose)

    display( retDF.loc[:, findSummaryMetricsCols(metric) + elifeCols  ] )

    print(f'\n {stageName} classes < {threshold} {metric}')
    selectRowsBellow = retBellowThresholdDF.loc[:, "stage"] == stageName
    
    display( retBellowThresholdDF.loc[selectRowsBellow, 'category'] )

    return (retDF, retBellowThresholdDF)

In [6]:
def evaluateBest10LCFResults(
        threshold : float = 0.7,
        metric : str = 'sensitivity',
        verbose : bool = False):
    bestBes10LCFResultsDirs = [    
        "best10GTEx_TCGA",
        "bestLfc10GTEx_TCGA", 
        "best10CuratedDegree1_ce467ff",
    ]
    
    print(f'metric : {metric} threshold: {threshold}')
    stageName =  "best20FindAllDegree1_wl5" # stageName is part of the outputfile prefix
    best10LFCDF, bestLFCBellowThresholdDF = evaluate(
                                            root=root,
                                            outDir=outDir,
                                            resultsDirs=bestBes10LCFResultsDirs, 
                                            outFilePrefix=stageName,
                                            metric=metric,
                                            stageName=stageName,
                                            threshold=threshold,
                                            verbose=verbose,
                                            )
    
    return(best10LFCDF, bestLFCBellowThresholdDF)

evalBest10LFCSpecificityDF, evalBestLFCSpecificityBellowThresholdDF = \
    evaluateBest10LCFResults(threshold=0.96, metric='specificity')

# if we do not bind the results, jupyter lab will print them.
evalBest10LFCSensitivityDF, evalBestLFCSensitivityBellowThresholdDF = \
    evaluateBest10LCFResults(threshold=0.70, metric='sensitivity', verbose=False)

metric : specificity threshold: 0.96

saving : /private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category/hyperParameter/hyperparameterTunningResults5.out/best20FindAllDegree1_wl5.specificity.0.96.csv

saving : /private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category/hyperParameter/hyperparameterTunningResults5.out/best20FindAllDegree1_wl5.specificity.bellow.0.96.csv


id,mean_specificity,std_specificity,median_specificity,numGenes,numTypes,numDegree1,numAboveThreshold,LUAD,LUSC,COAD,READ,ESCA,LIHC,STAD,Whole_Blood
best10GTEx_TCGA,0.996361,0.00421,0.997,136,83,30,83,0.999,0.996,0.995,0.994,0.999,1.0,0.996,1.0
bestLfc10GTEx_TCGA,0.996229,0.006842,0.999,702,83,83,82,0.999,0.992,0.994,0.997,1.0,0.999,0.998,0.999
best10CuratedDegree1_ce467ff,0.997855,0.002922,0.999,716,83,83,83,0.998,0.995,0.995,0.996,0.999,1.0,0.999,0.999



 best20FindAllDegree1_wl5 classes < 0.96 specificity


Series([], Name: category, dtype: object)

metric : sensitivity threshold: 0.7

saving : /private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category/hyperParameter/hyperparameterTunningResults5.out/best20FindAllDegree1_wl5.sensitivity.0.7.csv

saving : /private/groups/kimlab/aedavids/deconvolution/1vsAll-~gender_category/hyperParameter/hyperparameterTunningResults5.out/best20FindAllDegree1_wl5.sensitivity.bellow.0.7.csv


id,mean_sensitivity,std_sensitivity,median_sensitivity,numGenes,numTypes,numDegree1,numAboveThreshold,LUAD,LUSC,COAD,READ,ESCA,LIHC,STAD,Whole_Blood
best10GTEx_TCGA,0.675398,0.230238,0.703,136,83,30,42,0.453,0.455,0.5,0.536,0.27,0.789,0.347,0.987
bestLfc10GTEx_TCGA,0.673639,0.235245,0.701,702,83,83,42,0.379,0.462,0.684,0.482,0.027,0.659,0.209,0.996
best10CuratedDegree1_ce467ff,0.808831,0.202235,0.871,716,83,83,63,0.841,0.691,0.652,0.607,0.369,0.874,0.409,1.0



 best20FindAllDegree1_wl5 classes < 0.7 sensitivity


Series([], Name: category, dtype: object)

## which classes in bestLfc10GTEx_TCGA under performed?
<span style="color:red;background-color:yellow">aedwip: look at output of foo() looks like we could manually add genes from best10CuratedDegree1_ce467ff" </span>


In [19]:
def foo(
    df : pd.DataFrame,
    stage : str = "bestLfc10GTEx_TCGA",
    ) -> pd.Series:
    '''
    TODO
    '''
    
    selectRows = df.loc[:,'stage'] == stage
    retSeries = df.loc[selectRows, "category"]
    return retSeries



stage = "bestLfc10GTEx_TCGA"
belowSeries = foo(evalBestLFCSensitivityBellowThresholdDF, stage)
print(f'belowSeries.shape : {belowSeries.shape}')

print('\n\n^^^^^^^^^^^^^^^')
display( evalBest10LFCSensitivityDF.loc[:, belowSeries.values] )

belowSeries.shape : (41,)


^^^^^^^^^^^^^^^


id,ACC,Adipose_Visceral_Omentum,Artery_Coronary,BLCA,BRCA,Bladder,Brain_Amygdala,Brain_Anterior_cingulate_cortex_BA24,Brain_Caudate_basal_ganglia,Brain_Frontal_Cortex_BA9,Brain_Hippocampus,Brain_Hypothalamus,Brain_Nucleus_accumbens_basal_ganglia,Brain_Putamen_basal_ganglia,Breast_Mammary_Tissue,CESC,COAD,Colon_Sigmoid,Colon_Transverse,ESCA,Esophagus_Gastroesophageal_Junction,Esophagus_Mucosa,Esophagus_Muscularis,GBM,HNSC,KICH,LIHC,LUAD,LUSC,Lung,MESO,PAAD,READ,SARC,SKCM,STAD,Small_Intestine_Terminal_Ileum,THCA,UCEC,UCS,Vagina
best10GTEx_TCGA,0.604,0.495,0.444,0.332,0.665,0.444,0.56,0.245,0.459,0.563,0.314,0.661,0.503,0.455,0.525,0.401,0.5,0.924,0.523,0.27,0.511,0.955,0.524,0.846,0.625,0.615,0.789,0.453,0.455,0.965,0.736,0.589,0.536,0.123,0.677,0.347,0.393,0.306,0.462,0.647,0.245
bestLfc10GTEx_TCGA,0.521,0.477,0.646,0.48,0.368,0.444,0.56,0.245,0.351,0.627,0.585,0.256,0.653,0.496,0.424,0.22,0.684,0.656,0.564,0.027,0.396,0.538,0.605,0.549,0.663,0.667,0.659,0.379,0.462,0.64,0.491,0.411,0.482,0.058,0.5,0.209,0.661,0.578,0.642,0.235,0.67
best10CuratedDegree1_ce467ff,0.958,0.763,0.681,0.75,0.618,0.778,0.286,0.377,0.514,0.762,0.534,0.785,0.81,0.423,0.562,0.775,0.652,0.942,0.584,0.369,0.636,0.991,0.741,0.945,0.683,0.897,0.874,0.841,0.691,0.994,0.811,0.86,0.607,0.006,0.855,0.409,0.661,0.957,0.811,0.735,0.649


In [18]:
selectBellowSenitivityRows = evalBest10LFCSensitivityDF.index == stage
display( evalBest10LFCSensitivityDF.loc[selectBellowSenitivityRows, belowSeries.values] )
aedwipDF = evalBest10LFCSensitivityDF.loc[selectBellowSenitivityRows, belowSeries.values]
aedwipDF.transpose().sort_values(by=stage)

id,ACC,Adipose_Visceral_Omentum,Artery_Coronary,BLCA,BRCA,Bladder,Brain_Amygdala,Brain_Anterior_cingulate_cortex_BA24,Brain_Caudate_basal_ganglia,Brain_Frontal_Cortex_BA9,Brain_Hippocampus,Brain_Hypothalamus,Brain_Nucleus_accumbens_basal_ganglia,Brain_Putamen_basal_ganglia,Breast_Mammary_Tissue,CESC,COAD,Colon_Sigmoid,Colon_Transverse,ESCA,Esophagus_Gastroesophageal_Junction,Esophagus_Mucosa,Esophagus_Muscularis,GBM,HNSC,KICH,LIHC,LUAD,LUSC,Lung,MESO,PAAD,READ,SARC,SKCM,STAD,Small_Intestine_Terminal_Ileum,THCA,UCEC,UCS,Vagina
bestLfc10GTEx_TCGA,0.521,0.477,0.646,0.48,0.368,0.444,0.56,0.245,0.351,0.627,0.585,0.256,0.653,0.496,0.424,0.22,0.684,0.656,0.564,0.027,0.396,0.538,0.605,0.549,0.663,0.667,0.659,0.379,0.462,0.64,0.491,0.411,0.482,0.058,0.5,0.209,0.661,0.578,0.642,0.235,0.67


Unnamed: 0_level_0,bestLfc10GTEx_TCGA
id,Unnamed: 1_level_1
ESCA,0.027
SARC,0.058
STAD,0.209
CESC,0.22
UCS,0.235
Brain_Anterior_cingulate_cortex_BA24,0.245
Brain_Hypothalamus,0.256
Brain_Caudate_basal_ganglia,0.351
BRCA,0.368
LUAD,0.379


# <span style="color:red;background-color:yellow">AEDWIP </span>
