In [18]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_5.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.05):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    max_index = np.argmax(cross_corrs)
    best_tau = max_index - max_tau  # Convert array index back to tau value

    # Apply threshold check and adjust max_corr based on tau direction
    if max_corr >= threshold:
        return max_corr if best_tau >= 0 else -max_corr
    else:
        return 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename, max_tau):
    submatrix = pd.DataFrame(index=columns, columns=columns)

    # Loop through column pairs, skipping redundant calculations
    for col1, col2 in product(columns, repeat=2):
        if col1 <= col2:  # Ensures each pair is calculated only once
            correlation = time_delayed_cross_correlation(df, col1, col2, max_tau)
            submatrix.loc[col1, col2] = correlation
            submatrix.loc[col2, col1] = correlation  # Fill symmetric position

    submatrix.to_csv(filename)
    print(f"Submatrix {filename} has been saved.")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub = "5_sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub, max_tau)

filename_cross = "5_cross_matrix_1_2.csv"
compute_cross_group_matrix(data, group_1, group_2, filename_cross)


submatrix5_cross_matrix_1_2.csv has been saved


Unnamed: 0,21083.0,21085.0,21087.0,21089.0,21091.0,21093.0,21095.0,21097.0,21099.0,21101.0,...,38079.0,38081.0,38083.0,38085.0,38087.0,38089.0,38091.0,38093.0,38095.0,38097.0
1001.0,-0.298291,-0.182935,0.308928,0.128951,-0.177144,-0.245117,-0.233528,0.126723,-0.189931,0.120448,...,0.254731,-0.164965,-0.224971,-0.290139,0.177203,0.11573,-0.392072,-0.225117,-0.176814,-0.230528
1003.0,0.153825,-0.182547,0,-0.090535,-0.116488,-0.137202,0.361859,0.302298,-0.123476,0.255652,...,-0.056715,-0.235752,-0.10175,0.141574,0.413942,-0.133305,-0.093528,-0.078017,0.193264,-0.102917
1005.0,0.163907,0.113757,0,-0.111707,-0.111937,0.166262,0.236737,0.220572,-0.1831,0.184165,...,0.203356,0.10209,-0.191679,0.252461,-0.189214,0.168151,0.168772,0.121709,0.127899,0.108325
1007.0,-0.2498,0.169225,0.379,0.177962,-0.245626,0.213519,-0.363685,-0.249307,0.285243,-0.246862,...,0.20519,-0.142952,-0.223718,0.254536,-0.136164,0.158028,0.228497,0.187661,-0.224899,-0.239669
1009.0,0.510421,0.402211,0.247,0.281531,-0.346556,0.469229,0.23216,-0.282428,-0.524933,-0.127138,...,0.48412,0.239509,-0.373674,0.325078,-0.151458,0.079199,-0.446731,0.393267,-0.326607,-0.402661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21073.0,0.453127,0.391455,0.335573,0.392389,-0.401944,0.495013,0.212463,0.287735,-0.426606,0.324213,...,0.467358,-0.339359,-0.279367,0.392613,-0.252943,0.30006,-0.346928,0.39139,-0.307578,-0.355762
21075.0,-0.113271,0.11581,0.181684,0.069345,-0.144406,0.172606,-0.276179,0.258541,0.280431,-0.262401,...,0.083904,-0.220433,0.292878,0.269464,0.251354,-0.182976,0.237727,0.063636,0.238964,0.197069
21077.0,0.308029,0.397636,0.197226,0.296252,0.422341,0.439272,-0.167741,0.32765,0.372514,-0.352322,...,0.40079,-0.326292,-0.425839,0.398014,-0.297402,-0.178751,-0.341612,-0.429429,-0.456172,-0.411218
21079.0,0.762108,0.640047,0.29848,0.636023,0.586624,0.661057,0.290638,0.338861,0.578769,-0.520913,...,0.776123,0.460886,-0.53379,0.496386,-0.297717,-0.232989,0.523196,0.768109,-0.553755,0.714618
