In [34]:
import numpy as np
import pandas as pd

import seaborn as sns

from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import PatternFill
from openpyxl.drawing.image import Image


In [35]:

def find_redundant_pairs(correlation_matrix, threshold):
    redundant_pairs = []
    # Iterate through the upper triangle of the correlation matrix
    for i in range(len(correlation_matrix.columns)):
        for j in range(i+1, len(correlation_matrix.columns)):
            if abs(correlation_matrix.iloc[i, j]) >= threshold:
                redundant_pairs.append((correlation_matrix.columns[i], correlation_matrix.columns[j]))
    return redundant_pairs

In [36]:
# Read correlation matrix from CSV file
correlation_matrix_df = pd.read_excel('filt_CorrMatrix-Biweekly-AllFractures-wrt30sec_heatmap.xlsx', index_col=0)
correlation_matrix_df.head()

Unnamed: 0,Type of fracture,position-count,position-duration,position-maximum-distance,Time spent outside (mins),distance travelled from home,motion-sum,motion-ratio,motion-mean,motion-max,...,oks-07,oks-08,oks-09,oks-10,oks-11,oks-12,oks,tug,chairstand,sex
Type of fracture,1.0,0.0,0.0,0.0,0.0,-0.473861,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.778652,0.0,-0.492086
position-count,0.0,1.0,0.953821,0.0,0.948281,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
position-duration,0.0,0.953821,1.0,0.0,0.971872,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
position-maximum-distance,0.0,0.0,0.0,1.0,0.0,0.564417,0.504375,0.0,0.0,0.0,...,0.0,0.0,0.0,0.630652,0.0,0.0,0.0,0.0,0.0,0.0
Time spent outside (mins),0.0,0.948281,0.971872,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
# Find redundant pairs
redundant_pairs = find_redundant_pairs(correlation_matrix_df, threshold=0.9)

In [38]:
redundant_pairs_df = pd.DataFrame(redundant_pairs, columns=['Column 1', 'Column 2'])
redundant_pairs_df

Unnamed: 0,Column 1,Column 2
0,position-count,position-duration
1,position-count,Time spent outside (mins)
2,position-duration,Time spent outside (mins)
3,motion-sum,motion-mean
4,l2_mean,l2_sum
5,l2_mean,l2_energy
6,l2_mean,l2_skewness
7,l2_mean,l2_rms
8,l2_mean,l2_n_above_mean
9,l2_mean,l2_n_below_mean


In [39]:
columns_to_drop = redundant_pairs_df['Column 2'].explode().unique()

In [40]:
columns_to_drop

array(['position-duration', 'Time spent outside (mins)', 'motion-mean',
       'l2_sum', 'l2_energy', 'l2_skewness', 'l2_rms', 'l2_n_above_mean',
       'l2_n_below_mean', 'l2_entropy', 'l2_kurtosis', 'l2_ptp',
       'l2_lineintegral', 'l2_perm_entropy'], dtype=object)

In [41]:
correlation_matrix_df = correlation_matrix_df.drop(columns=columns_to_drop, index=columns_to_drop)
correlation_matrix_df.to_excel('CorrMatrix-Biweekly-AllFractures-wrt30sec-thres-0.9.xlsx')

In [42]:
correlation_matrix_df

Unnamed: 0,Type of fracture,position-count,position-maximum-distance,distance travelled from home,motion-sum,motion-ratio,motion-max,motion-max-timestamp,step-sum,step-ratio,...,oks-07,oks-08,oks-09,oks-10,oks-11,oks-12,oks,tug,chairstand,sex
Type of fracture,1.000000,0.0,0.000000,-0.473861,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.778652,0.0,-0.492086
position-count,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
position-maximum-distance,0.000000,0.0,1.000000,0.564417,0.504375,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.630652,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
distance travelled from home,-0.473861,0.0,0.564417,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.850522,...,0.0,0.000000,0.000000,0.662698,0.000000,0.526492,0.455753,-0.461513,0.0,0.000000
motion-sum,0.000000,0.0,0.504375,0.000000,1.000000,0.000000,0.806597,0.536856,0.000000,0.000000,...,0.0,0.000000,0.000000,0.639676,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
oks-12,0.000000,0.0,0.000000,0.526492,0.000000,0.000000,0.524933,0.000000,0.646769,0.000000,...,0.0,0.000000,0.576292,0.452222,0.663477,1.000000,0.703311,0.000000,0.0,0.000000
oks,0.000000,0.0,0.000000,0.455753,0.000000,0.000000,0.574827,0.000000,0.563858,0.000000,...,0.0,0.556703,0.834979,0.000000,0.880634,0.703311,1.000000,0.000000,0.0,0.000000
tug,0.778652,0.0,0.000000,-0.461513,0.000000,0.000000,0.000000,0.000000,-0.503741,-0.506976,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,-0.550116
chairstand,0.000000,0.0,0.000000,0.000000,0.000000,-0.520324,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0,0.000000


In [43]:
# Save the correlation matrix to an Excel file with heatmap
excel_filename = 'CorrMatrix-Biweekly-AllFractures-wrt30sec-thres-0.9-heatmap.xlsx'
correlation_matrix_df.to_excel(excel_filename, sheet_name='Correlation Matrix')

# Open the Excel workbook and select the active sheet
wb = Workbook()
ws = wb.active

# Create a color map for seaborn heatmap
cmap = sns.color_palette("coolwarm", as_cmap=True)

# Apply color formatting to the Excel cells based on the seaborn heatmap
for row_idx, row in enumerate(correlation_matrix_df.index, start=2):
    for col_idx, col in enumerate(correlation_matrix_df.columns, start=2):
        cell = ws.cell(row=row_idx, column=col_idx)
        value = correlation_matrix_df.at[row, col]
        # Normalize the correlation value to the range [0, 1]
        normalized_value = (value + 1) / 2
        # Map the normalized value to RGB color using the seaborn colormap
        rgb_color = cmap(normalized_value)[:3]
        # Convert RGB to aRGB by adding an alpha channel (255)
        argb_color = (255,) + tuple(int(255 * x) for x in rgb_color)
        # Convert aRGB to HEX
        hex_color = '%02x%02x%02x%02x' % argb_color
        # Apply fill color to the cell
        cell.fill = PatternFill(start_color=hex_color, end_color=hex_color, fill_type="solid")

# Save the modified Excel file
wb.save(excel_filename)

wb.close()