In [6]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Generate multivariate time series
# Random data generation with 5 features and 100 sequence length
np.random.seed(0)  # For reproducibility
time_series_data = np.random.rand(100, 10)  # 100 sequences, 5 features

# Step 2: Generate adjacency matrix based on cosine similarity
# Transpose time_series_data to get features as rows for similarity calculation
features = time_series_data.T  # 5 features, each with 100 data points
adjacency_matrix = cosine_similarity(features)

In [7]:
for i in range(adjacency_matrix.shape[0]):
    # Find indices of the top 6 values excluding the self-connection (i.e., diagonal)
    top_indices = np.argsort(adjacency_matrix[i])[-7:]  # Get top 7, including self-connection
    top_indices = top_indices[top_indices != i]  # Remove self-connection index
    # Set all other values to zero
    mask = np.ones(adjacency_matrix.shape[1], dtype=bool)
    mask[top_indices] = False
    adjacency_matrix[i, mask] = 0

In [27]:
full_adjacency_matrix = adjacency_matrix.copy()

In [29]:
# Prepare lists for data subsets and adjacency matrices
datasets = []
partial_adj_matrices = []


for i in range(full_adjacency_matrix.shape[0]):
    connected_features_indices = np.where(full_adjacency_matrix[i] != 0)[0]
    connected_features_indices = np.append(connected_features_indices, i)  # Include self
    connected_features_indices = np.unique(connected_features_indices)  # Remove duplicates

    partial_dataset = features[connected_features_indices]
    partial_adj_matrix = full_adjacency_matrix[np.ix_(connected_features_indices, connected_features_indices)]

    datasets.append(partial_dataset)
    partial_adj_matrices.append(partial_adj_matrix)

## Initialize the count dictionary
feature_counts = {i: 0 for i in range(10)}  # Assuming 10 features as before

# We need to keep track of which features are used in each dataset.
# I'm adjusting the previous code snippet to directly use these indices for counting.
for i in range(full_adjacency_matrix.shape[0]):
    connected_features_indices = np.where(full_adjacency_matrix[i] != 0)[0]
    connected_features_indices = np.append(connected_features_indices, i)  # Include self
    connected_features_indices = np.unique(connected_features_indices)  # Remove duplicates

    # Count the appearances of each feature
    for feature_index in connected_features_indices:
        feature_counts[feature_index] += 1

# Display results
print("Feature appearances across different groups:")
for feature, count in feature_counts.items():
    print(f"Feature {feature + 1}: {count} times")

Feature appearances across different groups:
Feature 1: 8 times
Feature 2: 6 times
Feature 3: 6 times
Feature 4: 5 times
Feature 5: 7 times
Feature 6: 6 times
Feature 7: 9 times
Feature 8: 6 times
Feature 9: 10 times
Feature 10: 7 times
