In [None]:
%cd ..

: 

In [None]:
import pandas as pd
from readii_2_roqc.utils.loaders import load_dataset_config
from readii.io.writers.correlation_writer import CorrelationWriter
from readii.io.loaders import loadFileToDataFrame
from readii.process.subset import getOnlyPyradiomicsFeatures
from damply import dirs

: 

In [7]:
dirs

DamplyDirs<Structure: NESTED>
Project Root: /home/bhkuser/bhklab/katy/readii_2_roqc
CONFIG       : ├── config
LOGS         : ├── logs
METADATA     : ├── metadata
NOTEBOOKS    : ├── workflow/notebooks
PROCDATA     : ├── data/procdata
RAWDATA      : ├── data/rawdata
RESULTS      : ├── data/results
SCRIPTS      : └── workflow/scripts

In [43]:
def self_correlate(dataset: str,
                   correlation_method:str,
                   extract_method:str,
                   extract_settings:str,
                   readii_permutation:str = "original",
                   readii_region:str = "full",
                   overwrite:bool = False
                   ):
    dataset_config, dataset_name, full_dataset_name = load_dataset_config(dataset)

    # Set up CorrelationWriter from readii
    corr_matrix_writer = CorrelationWriter(root_directory = dirs.RESULTS / full_dataset_name / "correlation" / "self" / extract_method / extract_settings,
                                           filename_format = "{Permutation}_{Region}_{CorrelationMethod}.csv",
                                           overwrite = overwrite,
                                           create_dirs = True
    )

    corr_matrix_outpath = corr_matrix_writer.resolve_path(Permutation=readii_permutation, Region=readii_region, CorrelationMethod=correlation_method)
    if corr_matrix_outpath.exists() and not overwrite:
        print(f"Correlation matrix already exists at {corr_matrix_outpath}. Skipping computation, loading existing matrix.")
        correlations = pd.read_csv(corr_matrix_outpath, index_col=0)
    
    else:
        print(f"Computing correlation matrix for {dataset_name} with method {correlation_method}...")

        features_path = dirs.RESULTS / full_dataset_name / "features" / extract_method / extract_settings / f"{readii_permutation}_{readii_region}_features.csv"
        labelled_features = pd.read_csv(features_path, index_col=0)
        
        match extract_method:
            case "pyradiomics":
                features_only = getOnlyPyradiomicsFeatures(labelled_features)
            case _:
                raise ValueError(f"Unsupported extract_method: {extract_method}")

        # correlations = features.corr(method=correlation_method)
        # corr_matrix_writer.save(correlations,
        #                         Permutation=readii_permutation,
        #                         Region=readii_region,
        #                         CorrelationMethod=correlation_method)

    return features_only

In [44]:
self_correlate('NSCLC-Radiomics',
               correlation_method = "pearson",
               extract_method = "pyradiomics",
               extract_settings = "pyradiomics_h4h_all_images_features",
               readii_permutation = "original",
               readii_region = "full")

Computing correlation matrix for NSCLC-Radiomics with method pearson...


Unnamed: 0_level_0,original_shape_VoxelVolume,original_shape_MeshVolume,original_shape_SurfaceArea,original_shape_SurfaceVolumeRatio,original_shape_Compactness1,original_shape_Sphericity,original_shape_Maximum3DDiameter,original_shape_Maximum2DDiameterSlice,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,...,gradient_gldm_LargeDependenceLowGrayLevelEmphasis,gradient_gldm_LowGrayLevelEmphasis,gradient_gldm_SmallDependenceEmphasis,gradient_gldm_SmallDependenceHighGrayLevelEmphasis,gradient_gldm_SmallDependenceLowGrayLevelEmphasis,gradient_ngtdm_Busyness,gradient_ngtdm_Coarseness,gradient_ngtdm_Complexity,gradient_ngtdm_Contrast,gradient_ngtdm_Strength
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LUNG1-001_0000,160563.0,160447.333333,24701.027694,0.153951,0.023318,0.578084,96.803926,95.425364,96.332757,85.158675,...,164.337525,0.462141,0.069586,4.688905,0.005493,253.355908,0.000058,225.501500,0.044098,0.040108
LUNG1-002_0001,359124.0,358974.000000,37293.405301,0.103889,0.028122,0.654979,125.255738,103.942292,117.290238,101.833197,...,287.290876,0.606060,0.045207,3.016503,0.005340,363.586051,0.000028,350.456768,0.012500,0.062913
LUNG1-003_0002,34917.0,34829.125000,9833.771445,0.282343,0.020151,0.524476,84.593144,62.265560,57.384667,83.528438,...,48.008212,0.260010,0.110625,8.065392,0.008842,45.947192,0.000272,269.258417,0.080780,0.090946
LUNG1-004_0003,89010.0,88871.125000,20623.301847,0.232059,0.016930,0.466982,117.732748,70.491134,112.641023,110.675200,...,122.023552,0.406656,0.065306,3.916552,0.007463,133.124355,0.000103,242.047407,0.031529,0.072080
LUNG1-005_0004,81963.0,81828.166667,20402.615234,0.249335,0.015842,0.446752,104.737768,84.646323,77.620873,87.664132,...,123.982765,0.448279,0.078031,8.088570,0.007763,43.827078,0.000092,1894.732040,0.010984,1.097342
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LUNG1-418_0417,98490.0,98383.666667,18842.079292,0.191516,0.021461,0.546979,147.739636,147.231111,104.048066,120.813906,...,324.705421,0.621806,0.052527,5.571872,0.004637,36.026038,0.000120,1251.230761,0.005574,1.565026
LUNG1-419_0418,136794.0,136652.291667,23249.441111,0.170136,0.021748,0.551844,111.049538,98.310732,70.880181,109.128365,...,314.000037,0.592533,0.050871,2.891671,0.004695,200.661082,0.000093,128.417559,0.026284,0.052608
LUNG1-420_0419,20301.0,20256.333333,5000.087673,0.246841,0.032324,0.718699,44.966654,43.011626,43.600459,42.011903,...,240.941069,0.462763,0.068355,3.559006,0.004650,30.251580,0.000701,77.866283,0.064059,0.127918
LUNG1-421_0420,43722.0,43657.125000,9340.691647,0.213956,0.027284,0.641912,69.519781,64.899923,59.665736,59.665736,...,255.084275,0.492037,0.093544,7.274437,0.005265,71.787234,0.000255,163.713274,0.081503,0.094527
