In [1]:
import numpy as np
from scipy.signal import correlate
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.stats import zscore
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import zscore


In [2]:
paired_df = pd.read_csv("data/DatasetClusters/fishes/fish02/fish_02_pairs.csv")

## Filters

In [3]:
import pywt
import numpy as np

def filter_wavelets(dataframe, wavelet, threshold=0.2):
    filtered_data = pd.DataFrame(index=dataframe.index)
    
    for column in dataframe.columns:
        # Wavelet decomposition
        coeffs = pywt.wavedec(dataframe[column], wavelet, mode='per')
        
        # Thresholding
        coeffs_filtered = [pywt.threshold(c, threshold * np.max(c), mode='soft') for c in coeffs]
        
        # Reconstruct the series from modified coefficients
        reconstructed = pywt.waverec(coeffs_filtered, wavelet, mode='per')
        
        # Handle the case where the length might differ
        if len(reconstructed) > len(dataframe):
            reconstructed = reconstructed[:len(dataframe)]
        
        filtered_data[column] = reconstructed
    
    return filtered_data

In [4]:
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt

def apply_butterworth_bandpass_filter(df, column_names, lowcut, highcut, fs, order=5):
    # Design the Butterworth bandpass filter
    nyq = 0.5 * fs  # Nyquist Frequency
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')

    # Apply the filter to each specified column in the DataFrame
    for column in column_names:
        df[column] = filtfilt(b, a, df[column].values)
    
    return df

In [5]:
import numpy as np
from scipy.ndimage import gaussian_filter1d

def apply_gaussian_filter(data, sigma):
  
    filtered_data = np.zeros_like(data)
    for i in range(data.shape[0]):
        filtered_data[i] = gaussian_filter1d(data[i], sigma=sigma)
    return filtered_data


# Testing

In [6]:
def get_column_pair(df, pair_number):
    # Calculate indices for the desired pair directly from pair_number using zero-based indexing
    index1 = pair_number * 2
    index2 = index1 + 1
    
    # Check if indices are within the column range
    if index2 < len(df.columns):
        # Return the selected columns
        return df.iloc[:, index1:index2 + 1]
    else:
        # Return an empty DataFrame or raise an error if the pair number is invalid
        return pd.DataFrame()

In [7]:
import plotly.graph_objects as go

def plot_pair(pair):
    # Create a Plotly figure
    fig = go.Figure()

    # Add traces for each column in the DataFrame
    fig.add_trace(go.Scatter(x=pair.index, y=pair.iloc[:, 0], mode='lines', name=pair.columns[0], line=dict(color='red')))
    fig.add_trace(go.Scatter(x=pair.index, y=pair.iloc[:, 1], mode='lines', name=pair.columns[1], line=dict(color='green')))

    # Update layout for a better visualization
    fig.update_layout(
        title='Interactive Plot of the First Two Columns of paired_df',
        xaxis_title='Index',
        yaxis_title='Values',
        legend_title='Variable',
        dragmode='zoom'  # This enables dragging behavior
    )

    # Show the figure
    fig.show()

In [8]:
test = get_column_pair(paired_df,2)
plot_pair(test)

test

Unnamed: 0,cluster_1_red_s1_paired.2,cluster_3_green_s1_paired
0,-0.210637,0.005784
1,0.005327,0.600644
2,0.326035,0.520806
3,0.875854,0.552595
4,0.065255,0.699797
...,...,...
1026,-0.851769,-0.902760
1027,-0.778003,-0.769632
1028,-1.413795,-1.421847
1029,-1.187163,-0.585294


In [9]:
fs = 200  # Sampling frequency in Hz
lowcut = 0.1  # Low cutoff frequency in Hz
highcut = 22 # High cutoff frequency in Hz

#df_filtered = apply_butterworth_bandpass_filter(test, list(test.columns), lowcut, highcut, fs)
df = apply_gaussian_filter(test.values,10)
df_filtered2 = pd.DataFrame(df)
df_filtered2
filtered_df = filter_wavelets(test, 'db8', threshold=0.7)


In [10]:
def analyze_time_series(df, window_size=100, stride=100, threshold=1.0):
    # Initialize a dictionary to hold the intermediate results
    results_dict = {}

    # Iterate through each column in the DataFrame
    for column in df.columns:
        # Lists to store results for the current column
        window_avg = []
        window_changes = []
        interesting_windows = []

        # Calculate the averages for each window
        for start in range(0, len(df), stride):
            end = start + window_size
            if end <= len(df):
                current_window = df[column][start:end]
                current_avg = current_window.mean()
                window_avg.append(current_avg)
                
                # Calculate absolute change if there's a previous window
                if len(window_avg) > 1:
                    change = abs(window_avg[-1] - window_avg[-2])
                    window_changes.append(change)
                    interesting_windows.append(change > threshold)
                else:
                    window_changes.append(None)
                    interesting_windows.append(False)
            else:
                break  # Break if the next window exceeds the data length

        # Store results for this column in the dictionary
        results_dict[f'{column}_Window_Avg'] = window_avg
        results_dict[f'{column}_Changes'] = window_changes
        results_dict[f'{column}_Interesting'] = interesting_windows

    # Convert dictionary to DataFrame for easier manipulation and viewing
    results_df = pd.DataFrame(results_dict)
    
    return results_df

In [11]:
import numpy as np
import pandas as pd

def analyze_time_series_frq(df, window_size=200, stride=100, threshold=1.0):
    # Initialize a dictionary to hold the intermediate results
    results_dict = {}

    # Iterate through each column in the DataFrame
    for column in df.columns:
        # Lists to store results for the current column
        dominant_frequencies = []
        frequency_changes = []
        interesting_windows = []

        # Calculate the FFT and dominant frequency for each window
        for start in range(0, len(df), stride):
            end = start + window_size
            if end <= len(df):
                current_window = df[column][start:end]
                fft_result = np.fft.fft(current_window)
                magnitude = np.abs(fft_result)
                # Compute frequency bins
                frequencies = np.fft.fftfreq(len(current_window), d=1)  # Assuming unit sampling rate

                # Find the index of the peak frequency
                peak_index = np.argmax(magnitude)
                dominant_frequency = frequencies[peak_index]
                dominant_frequencies.append(dominant_frequency)

                # Calculate absolute frequency change if there's a previous window
                if len(dominant_frequencies) > 1:
                    change = abs(dominant_frequencies[-1] - dominant_frequencies[-2])
                    frequency_changes.append(change)
                    interesting_windows.append(change > threshold)
                else:
                    frequency_changes.append(None)
                    interesting_windows.append(False)
            else:
                break  # Break if the next window exceeds the data length

        # Store results for this column in the dictionary
        results_dict[f'{column}_Dom_Freq'] = dominant_frequencies
        results_dict[f'{column}_Freq_Changes'] = frequency_changes
        results_dict[f'{column}_Interesting'] = interesting_windows

    # Convert dictionary to DataFrame for easier manipulation and viewing
    results_df = pd.DataFrame(results_dict)
    
    return results_df


In [12]:
import numpy as np
import pandas as pd

def analyze_time_series_cross_correlation(df, window_size=100, stride=25, max_lag=50):
    # Initialize a dictionary to hold the intermediate results
    results_dict = {}

    # Check if DataFrame has at least two columns
    if len(df.columns) < 2:
        raise ValueError("DataFrame must contain at least two columns for cross-correlation analysis.")

    # Use the first two columns for cross-correlation analysis
    signal1 = df.columns[0]
    signal2 = df.columns[1]

    # Lists to store results
    correlation_uniformity = []

    # Calculate the cross-correlation for each window
    for start in range(0, len(df) - window_size + 1, stride):
        end = start + window_size
        signal1_window = df[signal1][start:end]
        signal2_window = df[signal2][start:end]

        # Compute cross-correlation with lags ranging from -max_lag to max_lag
        cross_corr = [np.corrcoef(signal1_window, np.roll(signal2_window, lag))[0, 1] for lag in range(-max_lag, max_lag + 1)]
        print(cross_corr)
        # Check if all cross-correlation values are the same
        if np.allclose(cross_corr, cross_corr[0], atol=1e-8):
            correlation_uniformity.append(False)  # All values are the same
        else:
            correlation_uniformity.append(True)  # Values differ

    # Store results in the dictionary
    results_dict[f'Correlation_Variability'] = correlation_uniformity

    # Convert dictionary to DataFrame for easier manipulation and viewing
    results_df = pd.DataFrame(results_dict)
    
    return results_df


In [13]:
import numpy as np
import pandas as pd

def analyze_time_series_low_correlation(df, window_size=100,stride=25, max_lag=50, threshold=0.3):
    # Initialize a dictionary to hold the intermediate results
    results_dict = {}

    # Check if DataFrame has at least two columns
    if len(df.columns) < 2:
        raise ValueError("DataFrame must contain at least two columns for cross-correlation analysis.")

    # Use the first two columns for cross-correlation analysis
    signal1 = df.columns[0]
    signal2 = df.columns[1]

    # Lists to store results
    correlation_results = []
    start_indices = []
    end_indices = []

    # Calculate the cross-correlation for each window
    for start in range(0, len(df) - window_size + 1, stride):
        end = start + window_size
        signal1_window = df[signal1][start:end]
        signal2_window = df[signal2][start:end]

        # Compute cross-correlation with lags ranging from -max_lag to max_lag
        cross_corr = [np.corrcoef(signal1_window, np.roll(signal2_window, lag))[0, 1] for lag in range(-max_lag, max_lag + 1)]

        # Check if all cross-correlation values are below the threshold
        if all(corr < threshold for corr in cross_corr):
            correlation_results.append(False)  # Mark window as false if all correlations are low
        else:
            correlation_results.append(True)  # Mark window as true if any correlation exceeds the threshold

        # Record the start and end indices of the current window
        start_indices.append(start)
        end_indices.append(end)

    # Store results in the dictionary
    results_dict[f'Start_Index'] = start_indices
    results_dict[f'End_Index'] = end_indices
    results_dict[f'All_Low_Correlation'] = correlation_results

    # Convert dictionary to DataFrame for easier manipulation and viewing
    results_df = pd.DataFrame(results_dict)
    
    return results_df


In [14]:
test = get_column_pair(paired_df,13)
plot_pair(test)
test = apply_butterworth_bandpass_filter(test,test.columns,0.1,5,80)
#test = filter_wavelets(test)
#results = analyze_time_series_low_correlation(test)
plot_pair(test)
#results



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [15]:
import torch
def generate_sparse_mask( num_patches, num_heads, sparsity=2):
    """Generate a sparse attention mask where each patch attends to exactly `sparsity` number of patches."""
    mask = torch.full((num_heads, num_patches, num_patches), float('-inf'))

    # Assuming a simple scenario where each patch attends to its corresponding patch and the next one in series B
    for h in range(num_heads):
        for i in range(num_patches):
            allowed_patches = [(i -j) % num_patches for j in range(0, sparsity + 1)]
            for j in allowed_patches:
                mask[h, i, j] = 0
     
    
    mask[0,0,num_patches-1] = float('-inf')
    
    mask[0,0,num_patches-2] = float('-inf')
    mask[0,0,1] = 0
    mask[0,0,2] = 0
    mask[0,num_patches-2,0] = float('-inf')
    return mask

In [16]:
import torch

def generate_circular_sparse_mask(num_patches, num_heads, sparsity=1):
    """
    Generate a sparse attention mask where each patch attends to itself and two additional neighbors,
    which could be to the left or right, with wrapping at boundaries.
    """
    # Initialize the mask with negative infinity (blocking attention)
    mask = torch.full((num_heads, num_patches, num_patches), float('-inf'))

    for h in range(num_heads):
        for i in range(num_patches):
            # Allowed patches include the current index
            allowed_patches = [i]

            # Add neighbors with potential wrapping
            # Add sparsity neighbors to the left
            allowed_patches += [(i - j - 1) % num_patches for j in range(sparsity)]
            # Add sparsity neighbors to the right
            allowed_patches += [(i + j + 1) % num_patches for j in range(sparsity)]

            # Set the mask to zero for allowed indices
            for j in allowed_patches:
                mask[h, i, j] = 0

    return mask


In [17]:
import torch

def generate_non_circular_sparse_mask(num_patches, num_heads, sparsity):
    """
    Generate a sparse attention mask where each patch attends to itself and two additional neighbors,
    which could be to the left or right, without wrapping at boundaries.
    """
    # Initialize the mask with negative infinity (blocking attention)
    mask = torch.full((num_heads, num_patches, num_patches), float('-inf'))

    for h in range(num_heads):
        for i in range(num_patches):
            # Allowed patches include the current index
            allowed_patches = [i]

            # Add neighbors to the left, checking boundaries
            for j in range(1, sparsity + 1):
                if i - j >= 0:
                    allowed_patches.append(i - j)

            # Add neighbors to the right, checking boundaries
            for j in range(1, sparsity + 1):
                if i + j < num_patches:
                    allowed_patches.append(i + j)

            # Set the mask to zero for allowed indices
            for j in allowed_patches:
                mask[h, i, j] = 0

    return mask


In [18]:
import torch

def generate_non_circular_sparse_mask2(num_patches, num_heads, sparsity):
    """
    Generate a sparse attention mask where each patch does not attend to itself but only to its neighbors,
    which could be to the left or right, without wrapping at boundaries. Each side of a diagonal will have 
    half the elements set to zero based on the specified sparsity.
    """
    # Initialize the mask with negative infinity (blocking attention)
    mask = torch.full((num_heads, num_patches, num_patches), float('-inf'))

    for h in range(num_heads):
        for i in range(num_patches):
            # Allowed patches initially exclude the current index (i) for zero diagonal
            allowed_patches = []

            # Determine the number of neighbors to include from each side
            num_neighbors = sparsity // 2

            # Add neighbors to the left, checking boundaries
            for j in range(1, num_neighbors + 1):
                if i - j >= 0:
                    allowed_patches.append(i - j)

            # Add neighbors to the right, checking boundaries
            for j in range(1, num_neighbors + 1):
                if i + j < num_patches:
                    allowed_patches.append(i + j)

            # Set the mask to zero for allowed indices
            for j in allowed_patches:
                mask[h, i, j] = 0

    return mask


In [19]:
import torch

def generate_non_circular_sparse_mask3(num_patches, num_heads, sparsity):
    """
    Generate a sparse attention mask where each patch attends to itself and a specified number of its neighbors,
    which could be to the left or right, without wrapping at boundaries. The mask will not include
    circular references (no wrapping around the array edges).
    """
    # Initialize the mask with negative infinity (blocking attention)
    mask = torch.full((num_heads, num_patches, num_patches), float('-inf'))

    for h in range(num_heads):
        for i in range(num_patches):
            # Allowed patches include the current index (self-attention allowed)
            allowed_patches = [i]

            # Determine the number of neighbors to include from each side
            num_neighbors = sparsity // 2

            # Add neighbors to the left, checking boundaries
            for j in range(1, num_neighbors + 1):
                if i - j >= 0:
                    allowed_patches.append(i - j)

            # Add neighbors to the right, checking boundaries
            for j in range(1, num_neighbors + 1):
                if i + j < num_patches:
                    allowed_patches.append(i + j)

            # Set the mask to zero for allowed indices
            for j in allowed_patches:
                mask[h, i, j] = 0

    return mask



In [28]:
a = generate_non_circular_sparse_mask3(12,1,6)
a

tensor([[[0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
         [0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf, -inf],
         [0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf, -inf],
         [0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf, -inf],
         [-inf, 0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf, -inf],
         [-inf, -inf, 0., 0., 0., 0., 0., 0., 0., -inf, -inf, -inf],
         [-inf, -inf, -inf, 0., 0., 0., 0., 0., 0., 0., -inf, -inf],
         [-inf, -inf, -inf, -inf, 0., 0., 0., 0., 0., 0., 0., -inf],
         [-inf, -inf, -inf, -inf, -inf, 0., 0., 0., 0., 0., 0., 0.],
         [-inf, -inf, -inf, -inf, -inf, -inf, 0., 0., 0., 0., 0., 0.],
         [-inf, -inf, -inf, -inf, -inf, -inf, -inf, 0., 0., 0., 0., 0.],
         [-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, 0., 0., 0., 0.]]])

In [21]:
def generate_ts_d_specific_mask(num_time_series, num_segments_per_series, num_heads):
    """Generate an attention mask where each time series only attends to its own segments."""
    # Create a mask filled with negative infinity (blocks attention)
    mask = torch.full((num_heads, num_time_series * num_segments_per_series, num_time_series * num_segments_per_series,256), float('-inf'))
    
    for ts_d in range(num_time_series):
        start_index = ts_d * num_segments_per_series
        end_index = start_index + num_segments_per_series
        mask[:, start_index:end_index, start_index:end_index] = 0
    
    return mask

In [22]:
f = generate_ts_d_specific_mask(2,1,1)
f

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [-inf, -inf, -inf,  ..., -inf, -inf, -inf]],

         [[-inf, -inf, -inf,  ..., -inf, -inf, -inf],
          [0., 0., 0.,  ..., 0., 0., 0.]]]])

In [23]:
a = torch.from_numpy(np.zeros((1,4,3,4)))
x = a.view(1,2, 2, 3, 4)

x

tensor([[[[[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]],

          [[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]]],


         [[[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]],

          [[0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.]]]]], dtype=torch.float64)

In [24]:
m = None

if m is not None:
    print(m)

In [25]:
class AttentionLayerCrossSegments(nn.Module):
    def __init__(self, d_model, n_heads, sparsity, d_keys=None, d_values=None, cross_flag=False, dropout=0.1, return_attention=True):
        super(AttentionLayerCrossSegments, self).__init__()

        d_keys = d_keys or (d_model // n_heads)
        d_values = d_values or (d_model // n_heads)
        
        self.return_attention = return_attention
        self.inner_attention_linear = FullAttention(scale=None, attention_dropout=dropout, return_attention=self.return_attention)
        self.inner_attention_nonlinear = FullAttention(scale=None, attention_dropout=dropout, return_attention=self.return_attention)

        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads * 2, d_model)

        self.n_heads = n_heads
        self.sparsity = sparsity
        self.mask = None
        self.cross_flag = cross_flag
        
        if cross_flag:
            self.cross_attention = FullAttention(scale=None, attention_dropout=dropout, return_attention=self.return_attention)
        
    def forward(self, queries, keys, values, num_patches):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads
        
        queries_projected = self.query_projection(queries).view(B, L, H, -1)
        keys_projected = self.key_projection(keys).view(B, S, H, -1)
        values_projected = self.value_projection(values).view(B, S, H, -1)
    
        if self.cross_flag:
            outC, attention_weightsC = self.cross_attention(
                queries_projected,
                keys_projected,
                values_projected,
                self.mask,
                head_type="linear"
            )
            outC = outC.view(B, L, -1)
            
            return self.out_projection(outC)

        self.mask = self.generate_non_circular_sparse_mask(num_patches, H)
        q = queries_projected[:, :num_patches, :, :]
        k = keys_projected[:, -num_patches:, :, :]
        v = values_projected[:, -num_patches:, :, :]
        L = q.shape[1]
        
        if self.return_attention:
            out_linear, attention_weights_linear = self.inner_attention_linear(
                q[:, :, :H//2, :],
                k[:, :, :H//2, :],
                v[:, :, :H//2, :],
                self.mask[:H//2, :, :],
                head_type="linear"
            )
            
            out_nonlinear, attention_weights_nonlinear = self.inner_attention_nonlinear(
                q[:, :, H//2:, :],
                k[:, :, H//2:, :],
                v[:, :, H//2:, :],
                self.mask[H//2:, :, :],
                head_type="nonlinear"
            )
            
            out_linear = out_linear.view(B, L, -1)
            out_nonlinear = out_nonlinear.view(B, L, -1)
            concatenated = torch.cat([out_linear, out_nonlinear], dim=-1)
            
            return self.out_projection(concatenated), attention_weights_linear, attention_weights_nonlinear
        
        else:
            out_linear = self.inner_attention_linear(
                q[:, :, :H//2, :],
                k[:, :, :H//2, :],
                v[:, :, :H//2, :],
                self.mask[:H//2, :, :],
                head_type="linear"
            )
            
            out_nonlinear = self.inner_attention_nonlinear(
                q[:, :, H//2:, :],
                k[:, :, H//2:, :],
                v[:, :, H//2:, :],
                self.mask[H//2:, :, :],
                head_type="nonlinear"
            )
            
            out_linear = out_linear.view(B, L, -1)
            out_nonlinear = out_nonlinear.view(B, L, -1)
            concatenated = torch.cat([out_linear, out_nonlinear], dim=-1)
            
            return self.out_projection(concatenated)

NameError: name 'nn' is not defined

In [None]:
l = [1,3,4,5]
l[:-3:-1]

In [None]:
for i in range(4):
    for j in range(4):
        print(i)
        break