In [2]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_5.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.05):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    max_index = np.argmax(cross_corrs)
    best_tau = max_index - max_tau  # Convert array index back to tau value

    # Apply threshold check and adjust max_corr based on tau direction
    if max_corr >= threshold:
        return max_corr if best_tau >= 0 else -max_corr
    else:
        return 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename, max_tau):
    submatrix = pd.DataFrame(index=columns, columns=columns)

    # Loop through column pairs, skipping redundant calculations
    for col1, col2 in product(columns, repeat=2):
        if col1 <= col2:  # Ensures each pair is calculated only once
            correlation = time_delayed_cross_correlation(df, col1, col2, max_tau)
            submatrix.loc[col1, col2] = correlation
            submatrix.loc[col2, col1] = correlation  # Fill symmetric position

    submatrix.to_csv(filename)
    print(f"Submatrix {filename} has been saved.")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub = "5_sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub, max_tau)

filename_cross = "5_cross_matrix_2_3.csv"
compute_cross_group_matrix(data, group_2, group_3, filename_cross)


submatrix5_cross_matrix_2_3.csv has been saved


Unnamed: 0,38099.0,38101.0,38103.0,38105.0,39001.0,39003.0,39005.0,39007.0,39009.0,39011.0,...,56027.0,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,56045.0
21083.0,-0.662688,-0.662762,-0.668939,0.394267,0.620211,0.672421,-0.689289,0.834389,-0.328881,-0.587287,...,-0.681283,-0.667044,-0.606166,-0.595714,-0.42044,-0.776685,-0.586111,-0.705038,-0.378958,-0.678502
21085.0,-0.603065,0.626766,-0.575481,0.471923,0.535119,0.612262,0.587196,0.669954,0.304953,-0.618672,...,0.528313,0.585972,-0.553761,-0.601733,-0.385406,-0.63023,0.460724,-0.612152,-0.374006,-0.515916
21087.0,-0.240511,-0.230682,-0.230562,0.238576,-0.242805,0.290969,0.156603,0.268679,0.091366,-0.227805,...,-0.268063,-0.254158,-0.189967,-0.246887,0.131063,-0.280801,0.170103,-0.30873,-0.368162,-0.268824
21089.0,0.619623,-0.682084,-0.550064,-0.508951,-0.59842,0.649243,0.646186,0.615189,-0.401545,-0.629996,...,0.588762,-0.583522,-0.511727,0.566894,0.362391,-0.582126,0.496104,-0.604768,-0.313152,-0.554312
21091.0,-0.572709,0.667748,-0.536369,-0.466179,-0.601768,0.610198,0.713944,0.602914,-0.476043,0.662493,...,0.488004,-0.670056,-0.572981,0.615038,0.592761,0.680713,0.515902,0.634052,0.286941,-0.6406
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38089.0,-0.169075,0.183546,-0.191645,0.437353,0.25056,0.182553,0.296622,0.155238,0.492194,0.225042,...,-0.160796,0.180983,-0.259677,0.242228,0.379611,0.190329,0.164756,0.215716,-0.058634,-0.201592
38091.0,0.523548,0.515638,-0.518323,0.293566,0.577677,0.50026,0.563369,0.58554,-0.380945,0.561942,...,0.485341,-0.572189,-0.475055,0.458474,0.380643,0.566298,0.536328,0.63784,0.360296,0.582815
38093.0,0.78779,0.765604,-0.67095,-0.500107,0.654446,0.777954,0.72055,0.806373,-0.393507,0.727038,...,0.681857,-0.669532,0.591562,0.624115,0.468198,0.670826,0.590192,0.610286,-0.33743,-0.63973
38095.0,0.594598,0.60735,0.540065,-0.508228,0.507006,0.580289,0.609271,0.595714,-0.515223,0.569291,...,0.492553,0.655715,0.57817,0.620943,0.3904,0.588212,0.559273,0.52281,0.285353,0.542212
