In [36]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_6.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.05):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    return max_corr if max_corr >= threshold else 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename):
    submatrix = pd.DataFrame(index=columns, columns=columns)
    for col1, col2 in product(columns, repeat=2):
        submatrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    submatrix.to_csv(filename)
    print(f"submatrix {filename} has been saved")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub = "6_sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub)

filename_cross = "6_cross_matrix_2_3.csv"
compute_cross_group_matrix(data, group_2, group_3, filename_cross)


submatrix6_cross_matrix_2_3.csv has been saved


Unnamed: 0,38099.0,38101.0,38103.0,38105.0,39001.0,39003.0,39005.0,39007.0,39009.0,39011.0,...,56027.0,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,56045.0
21083.0,0.886362,0.83989,0.387208,0.483039,0.729428,0.729252,0.742632,0.800409,0.661428,0.656833,...,0.671665,0.556453,0.553732,0.692044,0.711775,0.718669,0.775182,0.691995,0.739468,0.450373
21085.0,0.649351,0.654218,0.365962,0.418098,0.630999,0.648816,0.561372,0.707381,0.551022,0.634872,...,0.521464,0.519761,0.460953,0.583745,0.595057,0.624321,0.546927,0.654454,0.560976,0.405422
21087.0,0.496756,0.442531,0.237415,0.254228,0.373129,0.38446,0.312569,0.403221,0.225097,0.346286,...,0.338417,0.361569,0.344168,0.389473,0.301493,0.334785,0.340618,0.412874,0.3972,0.348344
21089.0,0.772826,0.771193,0.259843,0.374806,0.736486,0.762802,0.668832,0.800191,0.640308,0.690763,...,0.746945,0.456819,0.524138,0.743389,0.680306,0.702313,0.712632,0.772177,0.709982,0.406567
21091.0,0.645874,0.607905,0.356168,0.267671,0.582735,0.61743,0.460942,0.594872,0.496638,0.584231,...,0.535153,0.566219,0.573586,0.570864,0.52775,0.555434,0.561657,0.645824,0.604223,0.541891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38089.0,0.285191,0.337314,0.177019,0.196987,0.232854,0.300759,0.224504,0.244492,0.404211,0.267399,...,0.268715,0.315359,0.239682,0.234422,0.44488,0.401036,0.337403,0.282293,0.408033,0.502369
38091.0,0.528896,0.549457,0.398663,0.27144,0.461771,0.587377,0.443004,0.548132,0.42088,0.568176,...,0.483587,0.459878,0.435398,0.516956,0.515681,0.550082,0.561579,0.551023,0.500769,0.477401
38093.0,0.714554,0.704965,0.446801,0.354851,0.645216,0.686424,0.476194,0.710548,0.460059,0.59366,...,0.685723,0.588617,0.611118,0.637585,0.64529,0.689229,0.665326,0.692541,0.70816,0.550665
38095.0,0.489627,0.474664,0.324797,0.221304,0.425045,0.435628,0.256591,0.471451,0.302941,0.487317,...,0.413382,0.511773,0.508921,0.475139,0.390965,0.465798,0.493936,0.509813,0.378011,0.484647
