In [1]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_1.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.05):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    return max_corr if max_corr >= threshold else 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename):
    submatrix = pd.DataFrame(index=columns, columns=columns)
    for col1, col2 in product(columns, repeat=2):
        submatrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    submatrix.to_csv(filename)
    print(f"submatrix {filename} has been saved")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


filename_sub = "1_sub_mat_1.csv"
compute_submatrix(data, group_1, filename_sub)

#filename_cross = "cross_matrix_1_3.csv"
#compute_cross_group_matrix(data, group_1, group_3, filename_cross)


submatrix 1_sub_mat_1.csv has been saved


Unnamed: 0,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,1019.0,...,21063.0,21065.0,21067.0,21069.0,21071.0,21073.0,21075.0,21077.0,21079.0,21081.0
1001.0,1.0,0.529642,0.534112,0.554562,0.627929,0.369869,0.259625,0.480949,0.258392,0.535817,...,0.328997,0.272051,0.531967,0.452233,0.276971,0.487837,0.457578,0.246939,0.457867,0.145796
1003.0,0.529642,1.0,0.503745,0.607159,0.798915,0.181106,0,0.835932,0.32273,0.622007,...,0.3136,0.222267,0.587643,0.495421,0.374177,0.289242,0.413487,0.078278,0.511405,0.173159
1005.0,0.534112,0.503745,1.0,0.565358,0.599503,0.382276,0.376406,0.521075,0.176476,0.533845,...,0.313507,0.338253,0.553554,0.327193,0.278934,0.348219,0.478909,0.355847,0.466281,0.260364
1007.0,0.554562,0.607159,0.565358,1.0,0.59114,0.265299,0.161823,0.555181,0.315312,0.55514,...,0.351542,0.255377,0.494281,0.58044,0.343274,0.349165,0.384711,0.268746,0.468538,0.196867
1009.0,0.627929,0.798915,0.599503,0.59114,1.0,0.187616,0.053187,0.757073,0.354104,0.571054,...,0.45252,0.364955,0.579857,0.473188,0.36576,0.337289,0.413025,0.236412,0.618561,0.234229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21073.0,0.487837,0.289242,0.348219,0.349165,0.337289,0.455812,0.266825,0.213562,0.297029,0.368022,...,0.280799,0.511762,0.394396,0.481714,0.25181,1.0,0.368647,0.559194,0.432696,0.223124
21075.0,0.457578,0.413487,0.478909,0.384711,0.413025,0.4186,0.331682,0.370991,0.235755,0.352487,...,0.254593,0.359932,0.501039,0.452233,0.250161,0.368647,1.0,0.298697,0.469301,0.146976
21077.0,0.246939,0.078278,0.355847,0.268746,0.236412,0.54452,0.393887,0.083554,0.200021,0.10855,...,0.251502,0.675541,0.326261,0.312501,0.209495,0.559194,0.298697,1.0,0.334856,0.223392
21079.0,0.457867,0.511405,0.466281,0.468538,0.618561,0.262719,0.143241,0.599254,0.267511,0.48763,...,0.484301,0.306355,0.448851,0.485048,0.457385,0.432696,0.469301,0.334856,1.0,0.188356
