In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import sys; sys.path.insert(0, '..')
from scripts.python.readii_analysis.data.helpers import loadFeatureFilesFromImageTypes, loadImageDatasetConfig, savePlotFigure, makeProcessedDataFolders
from scripts.python.readii_analysis.analyze.correlation_functions import getFeatureCorrelations, plotCorrelationHeatmap, plotCorrelationDistribution

# Load config file

In [2]:
dataset_name = "RADCURE"
config = loadImageDatasetConfig(dataset_name,
                                config_dir_path="../config")

# Set image feature extraction method

In [None]:
# Must be one of the directories in the procdata/{dataset_name} directory (not clinical)
extraction_method = "radiomics"
# extraction_method = "deep_learning"

print(f"Analyzing {extraction_method} features.")

# Setup Results directory

In [3]:
RESULTS_DATA_PATH = f"../../../results/"

In [None]:
# Results data folder creation
makeProcessedDataFolders(dataset_name=dataset_name,
                         proc_data_path=RESULTS_DATA_PATH,
                         data_sources=[extraction_method],
                         data_types=[""],
                         train_test_split=config["train_test_split"]["split"])

# Load all feature sets

In [None]:
extracted_image_feature_dir = f"../../procdata/{dataset_name}/{extraction_method}/train_test_split/train_features"
image_feature_sets = loadFeatureFilesFromImageTypes(extracted_feature_dir = extracted_image_feature_dir,
                                image_types=config["image_types"],
                                drop_labels=True)

print("Feature sets available for analysis:")
for feature_set in image_feature_sets.keys():
    print("  ->", feature_set)

# Correlation Calculations

In [None]:
# Set up which feature sets to analyze
# This feature set will be used for the vertical axis (y-axis)
feature_set_vertical = "original"
# This feature set will be used for the horizontal axis (x-axis)
feature_set_horizontal = "shuffled_full"

In [None]:
# Set up features for correlation calculation
vertical_features = image_feature_sets[feature_set_vertical]
horizontal_features = image_feature_sets[feature_set_horizontal]

# Select method
correlation_method = "pearson"

# Calculate Pearson correlation matrix
feature_correlation_matrix = getFeatureCorrelations(vertical_features = vertical_features, 
                                                    horizontal_features = horizontal_features,
                                                    method = correlation_method,
                                                    vertical_feature_name=feature_set_vertical,
                                                    horizontal_feature_name=feature_set_horizontal)

# Get different correlation matrix combinations

In [None]:
# Select out correlations for vertical vs vertical and horizontal vs horizontal
num_vertical_features = len(vertical_features.columns)
num_horizontal_features = len(horizontal_features.columns)

# Get the correlation matrix for vertical vs vertical
# This is the top left corner of the matrix
vertical_self_correlations = feature_correlation_matrix.iloc[0:num_vertical_features, 0:num_vertical_features]

# Get the correlation matrix for horizontal vs horizontal
# This is the bottom right corner of the matrix
horizontal_self_correlations = feature_correlation_matrix.iloc[num_vertical_features:, num_vertical_features:]

# Get the correlation matrix for vertical vs horizontal
# want the top right quadrant of the full correlation matrix so vertical and horizontal features are correctly labeled
img_type_comparison_correlations = feature_correlation_matrix.iloc[0:num_vertical_features, num_horizontal_features:]

# Plotting Correlations

### Vertical Features Self-correlation

In [None]:
vertical_self_plot = plotCorrelationHeatmap(vertical_self_correlations,
                       diagonal = True,
                       triangle = "lower",
                       cmap = "nipy_spectral",
                       xlabel = feature_set_vertical,
                       ylabel = feature_set_vertical,
                       title = f"{correlation_method.capitalize()} Correlations for {dataset_name} {extraction_method.capitalize()} Features",
                       subtitle = f"{feature_set_vertical} vs. {feature_set_vertical}")

savePlotFigure(vertical_self_plot,
               plot_name=f"{dataset_name}_{correlation_method.lower()}_corr_{feature_set_vertical}_v_{feature_set_vertical}_{extraction_method.lower()}_plot.png",
               output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_heatmap_plots")

### Horizontal Feature Self-Correlation

In [None]:
horizontal_self_plot = plotCorrelationHeatmap(correlation_matrix_df = horizontal_self_correlations,
                                              diagonal = True,
                                              triangle = "lower",
                                              cmap = "nipy_spectral",
                                              xlabel = feature_set_horizontal,
                                              ylabel = feature_set_horizontal,
                                              title = f"{correlation_method.capitalize()} Correlations for {dataset_name} {extraction_method.capitalize()} Features",
                                              subtitle = f"{feature_set_horizontal} vs. {feature_set_horizontal}")

savePlotFigure(horizontal_self_plot,
               plot_name=f"{dataset_name}_{correlation_method.lower()}_corr_{feature_set_horizontal}_v_{feature_set_horizontal}_{extraction_method.lower()}_plot.png",
               output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_heatmap_plots")

### Vertical vs. Horizontal Feature Correlations

In [None]:
img_type_comparison_plot = plotCorrelationHeatmap(img_type_comparison_correlations,
                                                  diagonal=False,
                                                  cmap="nipy_spectral",
                                                  xlabel=f"{feature_set_horizontal}",
                                                  ylabel=f"{feature_set_vertical}",
                                                  title = f"{correlation_method.capitalize()} Correlations for {dataset_name} {extraction_method.capitalize()} Features",
                                                  subtitle = f"{feature_set_vertical} vs. {feature_set_horizontal}")

savePlotFigure(img_type_comparison_plot,
               plot_name=f"{dataset_name}_{correlation_method.lower()}_corr_{feature_set_vertical}_v_{feature_set_horizontal}_{extraction_method.lower()}_plot.png",
               output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_heatmap_plots")

# Distribution of corrrelations between features

### Vertical Self-correlation

In [None]:
vertical_corr_dist_fig, vertical_corr_bin_vals, vertical_bin_edges = plotCorrelationDistribution(vertical_self_correlations,
                                       num_bins = 450,
                                       xlabel = f"{correlation_method.capitalize()} Correlation",
                                       ylabel = "Frequency",
                                       y_upper_bound=None,
                                       title = f"Distribution of {correlation_method.capitalize()} Correlations for {extraction_method.capitalize()} Features",
                                       subtitle = f"{feature_set_vertical} vs. {feature_set_vertical}"
                                      )

# savePlotFigure(vertical_corr_dist_fig,
#                plot_name = f"{dataset_name}_{extraction_method}_{correlation_method}_correlation_distribution_{feature_set_vertical}_vs_{feature_set_vertical}.png",
#                output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_distribution_plots")

### Horizontal Self-Correlation

In [None]:
horizontal_corr_dist_fig, horizontal_corr_bin_vals, _ = plotCorrelationDistribution(horizontal_self_correlations,
                                       num_bins = 450,
                                       xlabel = f"{correlation_method.capitalize()} Correlation",
                                       ylabel = "Frequency",
                                       y_upper_bound=None,
                                       title = f"Distribution of {correlation_method.capitalize()} Correlations for {extraction_method.capitalize()} Features",
                                       subtitle = f"{feature_set_horizontal} vs. {feature_set_horizontal}"
                                      )

savePlotFigure(horizontal_corr_dist_fig,
               plot_name = f"{dataset_name}_{extraction_method}_correlation_distribution_{feature_set_horizontal}_vs_{feature_set_horizontal}.png",
               output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_distribution_plots")

### Vertical vs Horizontal Feature Correlations

In [None]:
img_type_comparison_dist_fig, img_type_comparison_corr_bin_vals, img_compare_bin_edges = plotCorrelationDistribution(img_type_comparison_correlations.round(3),
                                                            num_bins = 450,
                                       xlabel = f"{correlation_method.capitalize()} Correlation",
                                       ylabel = "Frequency",
                                       y_upper_bound=None,
                                       title = f"Distribution of {correlation_method.capitalize()} Correlations for {extraction_method.capitalize()} Features",
                                       subtitle = f"{feature_set_vertical} vs. {feature_set_horizontal}"
                                      )

savePlotFigure(img_type_comparison_dist_fig,
               plot_name = f"{dataset_name}_{extraction_method}_correlation_distribution_{feature_set_vertical}_vs_{feature_set_horizontal}.png",
               output_dir_path=f"../../results/{dataset_name}/{extraction_method.lower()}/correlation_distribution_plots")