In [19]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_10.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.05):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    return max_corr if max_corr >= threshold else 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename):
    submatrix = pd.DataFrame(index=columns, columns=columns)
    for col1, col2 in product(columns, repeat=2):
        submatrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    submatrix.to_csv(filename)
    print(f"submatrix {filename} has been saved")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub = "10_sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub)

filename_cross = "10_cross_matrix_2_3.csv"
compute_cross_group_matrix(data, group_2, group_3, filename_cross)


submatrix10_cross_matrix_1_3.csv has been saved


Unnamed: 0,38099.0,38101.0,38103.0,38105.0,39001.0,39003.0,39005.0,39007.0,39009.0,39011.0,...,56027.0,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,56045.0
1001.0,0.473516,0.503077,0.315085,0.517715,0.483521,0.649973,0.574929,0.288108,0.534418,0.618965,...,0.319959,0.638535,0.462955,0.507632,0.40112,0.459767,0.461739,0.619636,0.466665,0.66253
1003.0,0.541692,0.653,0.207951,0.572093,0.642649,0.746463,0.681899,0.356479,0.675119,0.758573,...,0.251235,0.647055,0.496353,0.6304,0.443766,0.504503,0.498215,0.652932,0.533512,0.270517
1005.0,0.457309,0.578467,0.254385,0.53854,0.562711,0.534107,0.563096,0.343082,0.504288,0.599697,...,0.336843,0.552253,0.452616,0.483725,0.434161,0.492878,0.473628,0.614756,0.521223,0.525936
1007.0,0.523801,0.56953,0.315599,0.522887,0.584188,0.71596,0.658031,0.297495,0.601258,0.750206,...,0.3204,0.616199,0.490352,0.558985,0.401932,0.623124,0.490062,0.6472,0.508271,0.323773
1009.0,0.459797,0.543497,0.312847,0.603598,0.528475,0.611138,0.54634,0.287335,0.523181,0.638072,...,0.216095,0.468228,0.494121,0.45453,0.375676,0.445596,0.380963,0.56225,0.413652,0.254478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21073.0,0.640057,0.479671,0.184459,0.528733,0.54715,0.490909,0.427359,0.49785,0.505918,0.527633,...,0.346825,0.594768,0.478291,0.522936,0.234851,0.311915,0.515524,0.444787,0.33859,0.275618
21075.0,0.428301,0.396212,0.344243,0.324445,0.408018,0.480353,0.445653,0.250231,0.466427,0.426473,...,0.211761,0.466595,0.429163,0.472906,0.380048,0.364996,0.372921,0.46931,0.390196,0.447567
21077.0,0.327878,0.358408,0.192037,0.380479,0.431056,0.423328,0.425653,0.321748,0.495729,0.446207,...,0.206785,0.392284,0.384862,0.459115,0.317046,0.41328,0.389939,0.361432,0.329097,0.202536
21079.0,0.559089,0.482828,0.318987,0.403998,0.522501,0.650694,0.588685,0.338016,0.536151,0.599413,...,0.301363,0.510887,0.506097,0.511051,0.488498,0.367894,0.538217,0.579441,0.522492,0.45036
