In [None]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix


# Load the temporal series data
data_with_date = pd.read_csv('temporal_series_20.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 
threshold = 0.05

def time_delayed_cross_correlation(df, col1, col2, max_tau):
    """
    Computes the time-delayed cross-correlation function for two time series N_i and N_j.
    
    Args:
    N_i (numpy array): Time series for node i.
    N_j (numpy array): Time series for node j.
    max_tau (int): Maximum time delay (positive or negative).
    
    Returns:
    cross_corrs (numpy array): Cross-correlation values for each time delay in the range [-max_tau, max_tau].
    time_lags (numpy array): Corresponding time lags.
    """
    n = len(col1)
    mean_i = df[col1].mean()
    mean_j = df[col2].mean()
    
    # Prepare arrays for cross-correlations and time lags
    cross_corrs = []
    time_lags = range(-max_tau, max_tau + 1)
    
    for tau in time_lags:
        if tau < 0:
            shifted_j = np.roll(df[col2], tau)  # shift N_j forward (N_j(t+τ))
            valid_indices = np.arange(-tau, n)  # indices where the time shift is valid
        else:
            shifted_i = np.roll(df[col1], -tau)  # shift N_i backward (N_i(t-τ))
            valid_indices = np.arange(0, n - tau)  # indices where the time shift is valid

        # Calculate the cross-correlation for the valid time points
        if tau < 0:
            numerator = np.mean(df[col1][valid_indices] * shifted_j[valid_indices]) - mean_i * mean_j
            denominator = np.std(df[col1][valid_indices]) * np.std(shifted_j[valid_indices])
        else:
            numerator = np.mean(shifted_i[valid_indices] * df[col2][valid_indices]) - mean_i * mean_j
            denominator = np.std(shifted_i[valid_indices]) * np.std(df[col2][valid_indices])
        
        # Handle the case where the denominator is zero
        if denominator != 0:
            corr = numerator / denominator
        else:
            corr = 0
        
        cross_corrs.append(corr)    
    max_corr = np.max(cross_corrs)
    if max_corr >= threshold:
        return max_corr
    else:
        return 0

#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] =time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix
def compute_submatrix(df, columns, filename):
    submatrix = pd.DataFrame(index=columns, columns=columns)
    for col1, col2 in product(columns, repeat=2):
        submatrix.loc[col1,col2]
#We have a problem because around 3105 columns is too much, first let's see what happens with a subset
#Now we will make a test for the first 50 columns
time_series_subset = data.iloc[:, :10]


#Let's compute the cross correlation function for each pair of columns in the subset
subset_columns = time_series_subset.columns
#To store results
adjacency_matrix = np.zeros((len(subset_columns), len(subset_columns)))
                            
for i in range(len(subset_columns)):
        for j in range(len(subset_columns)):
            if i != j:
                N_i = time_series_subset.iloc[:,i].values
                N_j = time_series_subset.iloc[:,j].values
                cross_corrs, _ = time_delayed_cross_correlation(N_i, N_j, max_tau)
                #Now we store the results
                max_corr = np.max(cross_corrs)
                if max_corr >= threshold:
                    adjacency_matrix[i,j] = max_corr


adj_data = pd.DataFrame(adjacency_matrix, index=subset_columns, columns=subset_columns)
print(adj_data)

#Okkei now we try another approach, batch processing, we divide the computation in different parts, and then merge the results.

