In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow.keras.layers as tfkl

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from scipy.stats import pearsonr
from collections import defaultdict
from tslearn.metrics import dtw
import itertools

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = np.load("../input/time-series-anndl/training_data.npy")
categories = np.load("../input/time-series-anndl/categories.npy")
valid_periods = np.load("../input/time-series-anndl/valid_periods.npy")
n_categories = len(np.unique(categories))

data.shape, categories.shape

In [None]:
signals_reshaped = [data[i][valid_periods[i][0]:valid_periods[i][1]] for i in range(len(data))]

In [None]:
# Grouping the reshaped signals by category
signals_by_category = defaultdict(list)
for sig, cat in zip(signals_reshaped, categories):
    signals_by_category[cat].append(sig)

In [None]:
# Find the length of the longest valid signal
max_length = max(len(sig) for sig in signals_reshaped)

In [None]:
# Calculate and plot the correlation matrix for each category
category= 'F'
category_df = category_dfs[category]

# Calculate the correlation matrix for the current category
correlation_matrix = category_df.corr()

# Plotting the heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm')
plt.title(f'Correlation Matrix of {category} Class Time Series Signals')
plt.show()
del category_dfs[category]

In [None]:
def split_and_combine_dataframes(df1, df2):
    # Check which dataframe is smaller
    smaller_df = df1 if len(df1) <= len(df2) else df2
    larger_df = df2 if smaller_df is df1 else df1

    # Calculate the size of the smaller dataframe
    small_size = len(smaller_df)

    # Split the larger dataframe into batches of size equal to the smaller dataframe
    batches = [larger_df[i:i + small_size] for i in range(0, len(larger_df), small_size)]

    # Combine each batch with the smaller dataframe
    combined_dfs = [pd.concat([batch, smaller_df], axis=0) for batch in batches]

    return combined_dfs

In [None]:
class_a = 'A'
a = category_dfs[class_a].T
class_b = 'F'
b = category_dfs[class_b].T
dfs = split_and_combine_dataframes(a, b)

In [None]:
for batch_df in dfs:
    batch_df = batch_df.T
    # Calculate the correlation matrix for the current category
    print("Correlating")
    correlation_matrix = batch_df.corr()

    # Plotting the heatmap
    plt.figure(figsize=(12, 10))
    sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm')
    plt.title(f'Correlation Matrix of A-F intersection Class Time Series Signals')
    plt.show()
    print("Done")