In [8]:
import numpy as np
from scipy.signal import correlate
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.stats import zscore
from matplotlib import pyplot as plt
import pandas as pd

In [3]:
def preprocess_signals(signal1, signal2):
    """Subtract the mean from each signal."""
    signal1_centered = signal1 - np.mean(signal1)
    signal2_centered = signal2 - np.mean(signal2)
    return signal1_centered, signal2_centered

In [4]:

def calculate_cross_correlation(signal1, signal2, num_lags):
    """Calculate the cross-correlation between two signals with given lags."""
    cross_corr = correlate(signal1, signal2, mode='full')
    center = len(cross_corr) // 2
    lags = np.arange(-center, center + 1)
    relevant_corrs = cross_corr[(center-num_lags):(center+num_lags+1)]
    return relevant_corrs, lags


In [5]:
from scipy.stats import zscore

def standardize_cross_correlation(cross_correlations):
    """Standardize the cross-correlation values using Z-score."""
    standardized = zscore(cross_correlations)
    return standardized

In [6]:
import matplotlib.pyplot as plt

def plot_cross_correlation(lags, cross_correlations):
    """Plot the cross-correlation between two signals."""
    plt.figure(figsize=(10, 4))
    plt.stem(lags, cross_correlations, use_line_collection=True)
    plt.xlabel('Lags')
    plt.ylabel('Cross-Correlation')
    plt.title('Cross-Correlation between Two Signals')
    plt.show()

In [14]:
fish2_df = pd.read_csv("data/DatasetClusters/fishes/fish02/fish_02_mts.csv")
fish2_df


Unnamed: 0,cluster_1_red_s1,cluster_2_red_s1,cluster_3_red_s1,cluster_4_red_s1,cluster_5_red_s1,cluster_6_red_s1,cluster_7_red_s1,cluster_8_red_s1,cluster_1_green_s1,cluster_2_green_s1,...,cluster_3_green_s2,cluster_4_green_s2,cluster_5_green_s2,cluster_6_green_s2,cluster_7_green_s2,cluster_8_green_s2,cluster_9_green_s2,cluster_10_green_s2,cluster_11_green_s2,cluster_12_green_s2
0,-0.465304,-0.039182,-0.145111,-0.820533,-0.043201,0.065635,-0.133898,-1.221447,0.030856,0.054824,...,-1.189544,-0.075210,-0.147581,0.185621,-0.060962,-0.473671,0.185250,0.030095,-0.209130,-0.144704
1,-0.360442,0.565558,-0.029222,-0.730112,0.269952,0.325601,-0.065495,-0.490533,-0.013391,0.069814,...,-0.395168,-0.056063,-0.006011,0.487558,-0.075057,-0.169431,0.217182,0.116141,-0.176625,0.107027
2,-0.204723,0.364503,0.119252,-0.529757,-0.244755,0.430598,0.164129,-0.567888,0.055926,0.025640,...,-0.048424,-0.048751,0.055521,0.295386,-0.042303,-0.181962,-0.048547,0.121210,-0.249625,0.125422
3,0.062240,0.274655,-0.139845,-0.408598,-0.201634,0.225573,-0.259828,-0.356369,-0.027593,0.141087,...,-0.474683,-0.127960,0.069146,0.442235,-0.217588,-0.262486,0.504183,0.332504,-0.223825,-0.148232
4,-0.331344,0.372020,0.076066,-0.736249,-0.149454,0.482838,-0.297057,-0.240255,0.002930,0.113149,...,-0.634815,-0.091399,0.041809,-0.073000,-0.019247,-0.518799,0.294697,0.039957,-0.372642,0.124027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1026,-0.776604,-0.709191,0.198898,-0.416232,0.183228,-0.129994,-0.675661,-0.102535,0.145693,0.066204,...,-1.851342,0.175049,-0.142010,-0.974908,-0.414522,-0.456138,0.050827,-0.193651,-0.424621,0.389169
1027,-0.740787,-0.690548,0.037362,-0.570908,0.100399,-0.310062,-0.334169,-0.667639,0.211966,0.029136,...,-1.479970,0.049355,-0.030024,-0.930895,-0.387339,-0.516276,-0.024889,-0.308469,-0.394260,0.269652
1028,-1.049494,-0.800837,0.240324,-0.657034,-0.188345,-0.117036,-0.609144,-0.770036,0.018852,0.047636,...,-1.947750,-0.099697,-0.141195,-0.890876,-0.282241,-0.895772,-0.009157,-0.551247,-0.579031,0.126761
1029,-0.939454,-0.660230,0.058730,-0.215216,0.011012,-0.073275,-0.749173,-0.605966,0.160901,0.074371,...,-1.799051,0.255089,-0.004100,-1.003566,-0.337831,-0.705154,-0.023417,-0.165208,-0.427929,0.236471


In [23]:

red_columns = [col for col in fish2_df.columns if 'red' in col]
green_columns = [col for col in fish2_df.columns if 'green' in col]

# Creating separate DataFrames for 'red' and 'green' columns
df_red = fish2_df[red_columns]
df_green = fish2_df[green_columns]
df_red

Unnamed: 0,cluster_1_red_s1,cluster_2_red_s1,cluster_3_red_s1,cluster_4_red_s1,cluster_5_red_s1,cluster_6_red_s1,cluster_7_red_s1,cluster_8_red_s1,cluster_1_red_s2,cluster_2_red_s2,cluster_3_red_s2,cluster_4_red_s2,cluster_5_red_s2,cluster_6_red_s2,cluster_7_red_s2,cluster_8_red_s2
0,-0.465304,-0.039182,-0.145111,-0.820533,-0.043201,0.065635,-0.133898,-1.221447,-0.406197,-0.169901,-0.549074,0.067861,0.258649,-0.290043,0.153131,-0.284629
1,-0.360442,0.565558,-0.029222,-0.730112,0.269952,0.325601,-0.065495,-0.490533,-0.037651,0.008507,-0.584747,-0.041764,0.110168,-0.295480,-0.046057,-0.023037
2,-0.204723,0.364503,0.119252,-0.529757,-0.244755,0.430598,0.164129,-0.567888,0.255091,-0.185669,-0.517427,0.069241,-0.081216,-0.110184,0.236548,-0.387574
3,0.062240,0.274655,-0.139845,-0.408598,-0.201634,0.225573,-0.259828,-0.356369,-0.198299,0.117125,-0.377562,0.018571,-0.062791,-0.277613,-0.186753,-0.239590
4,-0.331344,0.372020,0.076066,-0.736249,-0.149454,0.482838,-0.297057,-0.240255,0.064082,0.035815,-0.724730,0.083564,-0.458581,0.185382,-0.178813,0.414888
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1026,-0.776604,-0.709191,0.198898,-0.416232,0.183228,-0.129994,-0.675661,-0.102535,-0.832115,-1.197272,-0.325981,-0.129523,-0.653169,0.398716,0.324269,-0.110567
1027,-0.740787,-0.690548,0.037362,-0.570908,0.100399,-0.310062,-0.334169,-0.667639,-1.238036,-1.186098,-0.907297,-0.028007,-0.327920,0.298482,0.117556,-0.055410
1028,-1.049494,-0.800837,0.240324,-0.657034,-0.188345,-0.117036,-0.609144,-0.770036,-0.973764,-0.829883,-0.547810,-0.035794,-0.499269,0.140116,-0.008294,-0.057421
1029,-0.939454,-0.660230,0.058730,-0.215216,0.011012,-0.073275,-0.749173,-0.605966,-1.030073,-0.851305,-0.767435,-0.072588,-0.581328,0.130048,-0.221229,-0.120232


In [22]:
import re

# Assuming df_red and df_green are your DataFrames

# Function to extract cluster numbers and sort columns based on that
def sorted_columns_by_cluster(df, color):
    pattern = re.compile(rf"cluster_(\d+)_({color})")
    sorted_columns = sorted(df.columns, key=lambda x: int(pattern.search(x).group(1)))
    return sorted_columns

# Extract and sort columns for red and green based on cluster number
red_columns_sorted = sorted_columns_by_cluster(df_red, 'red')
green_columns_sorted = sorted_columns_by_cluster(df_green, 'green')



['cluster_1_green_s1',
 'cluster_1_green_s2',
 'cluster_2_green_s1',
 'cluster_2_green_s2',
 'cluster_3_green_s1',
 'cluster_3_green_s2',
 'cluster_4_green_s1',
 'cluster_4_green_s2',
 'cluster_5_green_s1',
 'cluster_5_green_s2',
 'cluster_6_green_s1',
 'cluster_6_green_s2',
 'cluster_7_green_s1',
 'cluster_7_green_s2',
 'cluster_8_green_s1',
 'cluster_8_green_s2',
 'cluster_9_green_s1',
 'cluster_9_green_s2',
 'cluster_10_green_s1',
 'cluster_10_green_s2',
 'cluster_11_green_s1',
 'cluster_11_green_s2',
 'cluster_12_green_s1',
 'cluster_12_green_s2']

In [25]:
import itertools
red_columns = df_red.columns
green_columns = df_green.columns

# Generate all possible pairs of red and green columns
all_pairs = list(itertools.product(red_columns, green_columns))

all_pairs


[('cluster_1_red_s1', 'cluster_1_green_s1'),
 ('cluster_1_red_s1', 'cluster_2_green_s1'),
 ('cluster_1_red_s1', 'cluster_3_green_s1'),
 ('cluster_1_red_s1', 'cluster_4_green_s1'),
 ('cluster_1_red_s1', 'cluster_5_green_s1'),
 ('cluster_1_red_s1', 'cluster_6_green_s1'),
 ('cluster_1_red_s1', 'cluster_7_green_s1'),
 ('cluster_1_red_s1', 'cluster_8_green_s1'),
 ('cluster_1_red_s1', 'cluster_9_green_s1'),
 ('cluster_1_red_s1', 'cluster_10_green_s1'),
 ('cluster_1_red_s1', 'cluster_11_green_s1'),
 ('cluster_1_red_s1', 'cluster_12_green_s1'),
 ('cluster_1_red_s1', 'cluster_1_green_s2'),
 ('cluster_1_red_s1', 'cluster_2_green_s2'),
 ('cluster_1_red_s1', 'cluster_3_green_s2'),
 ('cluster_1_red_s1', 'cluster_4_green_s2'),
 ('cluster_1_red_s1', 'cluster_5_green_s2'),
 ('cluster_1_red_s1', 'cluster_6_green_s2'),
 ('cluster_1_red_s1', 'cluster_7_green_s2'),
 ('cluster_1_red_s1', 'cluster_8_green_s2'),
 ('cluster_1_red_s1', 'cluster_9_green_s2'),
 ('cluster_1_red_s1', 'cluster_10_green_s2'),
 ('clu