# Remove layers with spearman correlation

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files # For download images and fiels

## Functions

In [2]:
def Get_Variables_With_Highest_Correlation(correlation_matrix):
    """
    Function to find variables with the highest correlation in a correlation matrix.

    Parameters:
        - correlation_matrix (pd.DataFrame): Correlation matrix

    Returns:
        - variables (list): List containing the names of variables with the highest correlation
    """
    variables = []

    # Iterate through each variable
    for variable in correlation_matrix.columns:
        # Find the variable(s) with the highest correlation
        max_corr = correlation_matrix[variable].max()
        highest_corr_vars = correlation_matrix.index[correlation_matrix[variable] == max_corr]

        # Append the variable(s) to the list
        variables.extend(highest_corr_vars)

    return variables

In [3]:
def Show_Correlation_Heatmap(correlation_matrix):

  plt.figure(figsize=(30, 24))

  # Create a heatmap with color gradient
  sns.heatmap(correlation_matrix, cmap='coolwarm', annot=True)

  # Set plot title
  plt.title('Spearman Correlation Heatmap', size=30)

  # Display the plot
  plt.show()

## Import the data

In [5]:
file_name = 'Duck backward elimination with support vector classification (10 featurs selected) - prepared data' + '.csv'
data = pd.read_csv(file_name)

# The file is read with a column of indexes and this line removes it
data = data.drop(data.columns[0], axis=1)

# Remove the segmented image layer
data = data.drop('segmented_image', axis=1)

## Spearman correlation

In [6]:
# Convert to absolute values
correlation_matrix = data.corr(method='spearman').abs()

# Leaving only half matrix (upper triangle)
correlation_matrix = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape),k=1).astype(bool))

In [None]:
Show_Correlation_Heatmap(correlation_matrix)

In [None]:
# save the correlation matrix as csv
file_name = 'Duck spearman correlation' + '.csv'

correlation_matrix.to_csv(file_name)
files.download(file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Remove the layers with the highest correlation

In [None]:
layers_to_remove = Get_Variables_With_Highest_Correlation(correlation_matrix)
data_after_layers_removed = data.drop(layers_to_remove, axis=1)

In [None]:
data_after_layers_removed

In [None]:
# Show new spearman correlation

# Convert to absolute values
correlation_matrix = data_after_layers_removed.corr(method='spearman').abs()

# Leaving only half matrix (upper triangle)
correlation_matrix = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape),k=1).astype(bool))

Show_Correlation_Heatmap(correlation_matrix)

## Save data after removing high correlation layers

In [None]:
# save the correlation after layers removed matrix as csv
file_name = 'Duck data after layers removed' + '.csv'

data_after_layers_removed.to_csv(file_name)
files.download(file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>