In [34]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool



# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_20.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

#Function to compute maximum of time correlation function between two temporal series

# Optimized time-delayed cross-correlation
import numpy as np
import pandas as pd

def optimized_time_delayed_cross_correlation(df, col1, col2, max_tau, threshold=0.5):
    """
    Optimized version of the time-delayed cross-correlation function for two time series N_i and N_j.
    
    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.
    
    Returns:
    max_corr (float): Maximum cross-correlation above the threshold, or 0 if none.
    """
    n = len(df[col1])
    mean_i = df[col1].mean()
    mean_j = df[col2].mean()
    std_i = df[col1].std()
    std_j = df[col2].std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Prepare array for cross-correlations
    cross_corrs = np.zeros(2 * max_tau + 1)
    
    # Calculate cross-correlation for each lag
    for idx, tau in enumerate(range(-max_tau, max_tau + 1)):
        if tau < 0:
            # Shift column 2 forward by tau
            shifted_j = np.roll(df[col2].values, tau)
            valid_indices = np.arange(-tau, n)
        else:
            # Shift column 1 backward by tau
            shifted_i = np.roll(df[col1].values, -tau)
            valid_indices = np.arange(0, n - tau)
        
        # Compute cross-correlation for valid indices
        if tau < 0:
            numerator = np.mean(df[col1].values[valid_indices] * shifted_j[valid_indices]) - mean_i * mean_j
            denominator = std_i * np.std(shifted_j[valid_indices])
        else:
            numerator = np.mean(shifted_i[valid_indices] * df[col2].values[valid_indices]) - mean_i * mean_j
            denominator = np.std(shifted_i[valid_indices]) * std_j
        
        # Calculate correlation, handle denominator zero check
        cross_corrs[idx] = numerator / denominator if denominator != 0 else 0
    
    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    return max_corr if max_corr >= threshold else 0


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename):
    submatrix = pd.DataFrame(index=columns, columns=columns)
    for col1, col2 in product(columns, repeat=2):
        submatrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    submatrix.to_csv(filename)
    print(f"submatrix {filename} has been saved")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix{filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub = "sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub)

filename_cross = "cross_matrix_1_3.csv"
compute_cross_group_matrix(data, group_1, group_3, filename_cross)


submatrixcross_matrix_1_3.csv has been saved


Unnamed: 0,38099.0,38101.0,38103.0,38105.0,39001.0,39003.0,39005.0,39007.0,39009.0,39011.0,...,56027.0,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,56045.0
1001.0,0.442999,0.491342,0.430427,0.476892,0.628851,0.741279,0.74474,0.509933,0.669002,0.551948,...,0.568907,0.415352,0.383577,0.537846,0.499291,0.478711,0.674495,0.569089,0.345749,0.303099
1003.0,0.527716,0.548231,0.424045,0.504528,0.631279,0.613397,0.80341,0.616063,0.574951,0.557714,...,0.362961,0.439159,0.440404,0.603836,0.53635,0.571243,0.635333,0.611297,0.333807,0.320218
1005.0,0.56045,0.589356,0.466588,0.548656,0.639836,0.629173,0.777128,0.594192,0.586607,0.597868,...,0.277279,0.485369,0.484559,0.66068,0.5486,0.618563,0.65188,0.669093,0.351144,0.339741
1007.0,0.495908,0.545594,0.409611,0.501881,0.694588,0.605065,0.810375,0.667463,0.544327,0.567397,...,0.37784,0.451029,0.443421,0.619878,0.528716,0.590033,0.655857,0.616596,0.331233,0.320196
1009.0,0.460446,0.520787,0.411644,0.483971,0.637765,0.649646,0.804734,0.609535,0.531398,0.565326,...,0.408387,0.439572,0.437207,0.613473,0.553127,0.59126,0.623526,0.598221,0.345596,0.349142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21073.0,0.645208,0.707057,0.540335,0.750629,0.613657,0.582796,0.617493,0.71882,0.625418,0.683066,...,0.459939,0.711071,0.624469,0.860469,0.704583,0.800158,0.814038,0.877013,0.526433,0.549766
21075.0,0.56358,0.612646,0.514288,0.660261,0.527433,0.509805,0.503627,0.521423,0.568805,0.529331,...,0.568748,0.697779,0.66824,0.831029,0.645215,0.751218,0.659115,0.772947,0.509114,0.636437
21077.0,0.661325,0.678285,0.563459,0.721907,0.637364,0.550398,0.574345,0.639502,0.616605,0.643412,...,0.521117,0.669913,0.59051,0.812961,0.687627,0.745951,0.854579,0.817088,0.528301,0.517036
21079.0,0.638711,0.704193,0.48326,0.745703,0.682834,0.580391,0.598449,0.626253,0.552159,0.635748,...,0.296142,0.69931,0.63289,0.835176,0.727,0.837714,0.67632,0.829137,0.564235,0.538193
