In [3]:
import pandas as pd
import numpy as np
import os

In [27]:
def calculate_and_save_adjacency_matrices(end_dates, data_dir = './data/processed/correlation matrix', thresholds = [0.5,0.4,0.6], is_srd = False):
    for threshold in thresholds:
        adj_mat_subfolder = f"threshold-{threshold}"
        adj_mat_path = f'{data_dir}/adjacency_matrices/{adj_mat_subfolder}'
        cor_mat_path = f'{data_dir}'

        if not os.path.exists(adj_mat_path):
            os.makedirs(adj_mat_path, exist_ok=True)

        for end_date in end_dates:
            corr_df = pd.read_csv(f'{cor_mat_path}/{end_date}.csv', index_col=0)
            
            # Convert all columns (except index) to numeric, errors='coerce' will convert non-numeric values to NaN
            corr_df = corr_df.apply(pd.to_numeric, errors='coerce')
            
            # Replace NaN with 0 to avoid comparison issues

            corr_df = corr_df.fillna(0)

            adjacency_matrix = np.where(corr_df.values >= threshold, 1, 0)

            corr_df.iloc[:, :] = adjacency_matrix
            
            corr_df.to_csv(f'./data/processed/adjacency_matrices/threshold-{threshold}/{end_date}.csv')
            corr_df.to_pickle(f'./data/processed/adjacency_matrices/threshold-{threshold}/{end_date}.pkl')


# For untreated network

In [24]:
gt_df = pd.read_csv('./data/processed/gt.csv', parse_dates=['date'])
dates = gt_df['date'].dt.date
del gt_df

window_width = 30

selection_index_array = [(index, index + window_width) for index in range(len(dates) - window_width)]
end_dates = [dates[index] for (_, index) in selection_index_array]
len(end_dates)

336

In [28]:
calculate_and_save_adjacency_matrices(end_dates=end_dates)

# For treated (SRD) network

In [21]:
srd_df = pd.read_pickle('./data/processed/[03]srd_out/srd_out.pkl')
dates = srd_df['date'].values
del srd_df

window_width = 30

selection_index_array = [(index, index + window_width) for index in range(len(dates) - window_width)]
end_dates = [dates[index] for (_, index) in selection_index_array]
len(end_dates)

335

In [23]:
calculate_and_save_adjacency_matrices(end_dates=end_dates, is_srd = True)