In [7]:
import numpy as np
import pandas as pd
from scipy.signal import correlate
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from itertools import product
from itertools import combinations
from datetime import datetime, timedelta
from multiprocessing import Pool


# Load the temporal series data
data_with_date = pd.read_csv('/Users/gianmarcoferrara/ComplexNetworksProject/temporal_series/temporal_series_3.csv')
data = data_with_date.drop(columns=['date'])

#Inizialize parameters
max_tau = 15 

#Function to compute maximum of time correlation function between two temporal series
def time_delayed_cross_correlation(df, col1, col2, max_tau):
    """
    Optimized custom time-delayed cross-correlation function for two time series.

    Args:
    df (pandas.DataFrame): Dataframe containing the time series data.
    col1 (str): Name of the first time series column.
    col2 (str): Name of the second time series column.
    max_tau (int): Maximum time delay (positive or negative).
    threshold (float): Threshold for maximum correlation.

    Returns:
    float: Maximum cross-correlation above the threshold, or 0 if none.
    """
    # Extract the series and compute means and std deviations
    series_i = df[col1].values
    series_j = df[col2].values
    mean_i, std_i = series_i.mean(), series_i.std()
    mean_j, std_j = series_j.mean(), series_j.std()
    
    # Check if standard deviations are zero to avoid division by zero
    if std_i == 0 or std_j == 0:
        return 0
    
    # Initialize array to store cross-correlations for each lag
    cross_corrs = np.zeros(2 * max_tau + 1)

    # Compute cross-correlation for each lag
    for tau in range(-max_tau, max_tau + 1):
        if tau < 0:
            # Positive lag: shift series_j forward by -tau (series_i aligns with delayed series_j)
            numerator = np.sum((series_i[-tau:] - mean_i) * (series_j[:len(series_j) + tau] - mean_j))
            denominator = (std_i * std_j * (len(series_i) + tau))
        else:
            # Negative or zero lag: shift series_i forward by tau (series_j aligns with delayed series_i)
            numerator = np.sum((series_i[:len(series_i) - tau] - mean_i) * (series_j[tau:] - mean_j))
            denominator = (std_i * std_j * (len(series_i) - tau))

        # Calculate cross-correlation, check for zero denominator
        cross_corrs[tau + max_tau] = numerator / denominator if denominator != 0 else 0

    # Find the maximum correlation and compare with threshold
    max_corr = np.max(cross_corrs)
    max_index = np.argmax(cross_corrs)
    best_tau = max_index - max_tau  # Convert array index back to tau value

    # Appadjust max_corr based on tau direction
    if best_tau >= 0:
        return max_corr
    else:
        return -max_corr


#Function to compute the whole adjacency matrix (impossible to compute)
def ccr_matrix(df):
    #build the adjacency matrix
    adjacency_matrix = pd.DataFrame(np.nan, index=df.columns, columns=df.columns)

    #iterate for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:
            adjacency_matrix.loc[col1,col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)

    return adjacency_matrix

#function to compute just a submatrix of the adjacency matrix (submatricese on the diagonal)
def compute_submatrix(df, columns, filename, max_tau):
    submatrix = pd.DataFrame(index=columns, columns=columns)

    # Loop through column pairs, skipping redundant calculations
    for col1, col2 in product(columns, repeat=2):
        if col1 <= col2:  # Ensures each pair is calculated only once
            correlation = time_delayed_cross_correlation(df, col1, col2, max_tau)
            submatrix.loc[col1, col2] = correlation
            submatrix.loc[col2, col1] = correlation  # Fill symmetric position

    submatrix.to_csv(filename)
    print(f"Submatrix {filename} has been saved.")
    return submatrix

#function to computer a submatrix which is not on the diagonal
def compute_cross_group_matrix(df, group_a, group_b, filename):
    cross_matrix = pd.DataFrame(index=group_a, columns=group_b)
    for col1, col2 in product(group_a, group_b):
        cross_matrix.loc[col1, col2] = time_delayed_cross_correlation(df, col1, col2, max_tau)
    cross_matrix.to_csv(filename)
    print(f"submatrix {filename} has been saved")
    return cross_matrix

#Now to compute the submatrices we need to divide the 3104 columns into groups
def divide_into_groups(df, group_sizes):
    #Divides the comlumns into specified sizes
    columns = df.columns.tolist()
    groups = []
    start = 0
    for size in group_sizes:
        groups.append(columns[start:start + size])
        start += size
    return groups

#Change of program, now we will try to compute manually the single pieces
group_1 = data.columns[0:1000]
group_2 = data.columns[1000:2000]
group_3 = data.columns[2000:3104]


#filename_sub_1 = "2_3_sub_mat_1.csv"
#compute_submatrix(data, group_1, filename_sub_1, max_tau)

#filename_sub_2 = "2_3_sub_mat_2.csv"
#compute_submatrix(data, group_2, filename_sub_2, max_tau)

#filename_sub_3 = "2_3_sub_mat_3.csv"
#compute_submatrix(data, group_3, filename_sub_3, max_tau)

filename_cross_1 = "2_3_cross_matrix_1_2.csv"
compute_cross_group_matrix(data, group_1, group_2, filename_cross_1)

filename_cross_2 = "2_3_cross_matrix_1_3.csv"
compute_cross_group_matrix(data, group_1, group_3, filename_cross_2)

filename_cross_3 = "2_3_cross_matrix_2_3.csv"
compute_cross_group_matrix(data, group_2, group_3, filename_cross_3)


submatrix 2_3_cross_matrix_1_2.csv has been saved
submatrix 2_3_cross_matrix_1_3.csv has been saved
submatrix 2_3_cross_matrix_2_3.csv has been saved


Unnamed: 0,38099.0,38101.0,38103.0,38105.0,39001.0,39003.0,39005.0,39007.0,39009.0,39011.0,...,56027.0,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,56043.0,56045.0
21083.0,-0.41201,0.226866,0.230842,0.298978,0.39356,0.51524,0.424936,0.161822,0.473718,0.520441,...,0.123296,-0.483642,-0.188368,0.217071,0.433703,-0.479962,-0.478908,-0.186835,0.31429,0.276642
21085.0,-0.14055,0.601619,0.194585,-0.544925,-0.355572,-0.650276,-0.21485,0.114301,0.249648,-0.599927,...,-0.440234,-0.169305,0.343662,-0.484828,-0.172328,-0.061408,-0.340537,0.071778,0.151025,-0.47824
21087.0,-0.228554,0.637273,-0.370618,0.679417,-0.442337,-0.621052,0.141038,-0.020075,0.292062,0.549815,...,0.551497,-0.222479,0.274161,0.548176,-0.174993,-0.090562,-0.142784,0.027199,-0.530859,0.447422
21089.0,-0.226225,0.670362,0.274656,0.5177,-0.379916,-0.568089,0.181417,0.002622,0.264645,-0.573418,...,-0.36944,0.183389,0.278369,-0.516644,-0.300538,-0.127458,-0.260118,0.035365,-0.213926,-0.393764
21091.0,0.295038,0.340719,0.179656,0.30857,0.221409,0.214362,0.330148,0.129832,0.357117,0.212326,...,-0.382961,-0.399412,0.263884,0.169966,0.255763,-0.308653,0.500819,-0.146781,0.232837,0.236882
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38089.0,-0.223828,0.608535,-0.386597,-0.583474,-0.596216,-0.666634,0.195721,-0.013577,0.192876,-0.611692,...,-0.384368,-0.188035,0.245517,0.613667,-0.188271,-0.154858,-0.231529,-0.103461,-0.335942,-0.520032
38091.0,-0.413897,0.404848,0.286675,-0.33054,0.355656,-0.389054,-0.31887,0.126655,0.29666,-0.328223,...,-0.315175,-0.192067,0.334559,-0.348071,-0.371089,-0.305547,-0.311542,-0.104091,0.263164,-0.310944
38093.0,-0.144927,-0.566308,-0.16299,-0.616856,-0.248157,-0.569722,0.18648,0.035856,0.32299,-0.574585,...,-0.576448,0.09169,0.333339,-0.59134,-0.090948,0.027192,0.097365,-0.03227,-0.087416,-0.501845
38095.0,-0.31683,0.305916,0.170683,-0.354649,0.217376,-0.24739,0.224439,0.134657,0.30881,-0.28704,...,-0.281702,-0.261246,0.319219,0.261456,0.238498,0.212331,0.295779,-0.052365,0.127899,0.415548
