In [1]:
import pandas as pd
import numpy as np

# Function to convert a string like "[-12 -11 -10 ...]" into a NumPy array of floats
def parse_array(x):
    if pd.isna(x):
        return None
    # Remove the square brackets and split the string by whitespace.
    try:
        return np.array([float(item) for item in x.strip("[]").split()])
    except Exception as e:
        print("Error parsing:", x, e)
        return None

# Read the CSV file
df = pd.read_csv('../output/correlation_results_exact_match.csv')

# Create new columns with parsed numpy arrays from the string representations
df['lags_arr'] = df['lags'].apply(parse_array)
df['ccf_values_arr'] = df['ccf_values'].apply(parse_array)

# Drop rows where ccf_values are missing (None or NaN)
df_valid = df.dropna(subset=['ccf_values_arr'])

# (Optional) Check that all rows have the same number of lags/ccf_values.
# In this example, we assume they do.
if len(df_valid) == 0:
    raise ValueError("No rows with valid ccf_values found!")
array_length = len(df_valid.iloc[0]['lags_arr'])

# Set up a dictionary to collect ccf values for each lag (using the lag value as key)
lag_values = {lag: [] for lag in df_valid.iloc[0]['lags_arr']}

# Loop over the valid rows
for _, row in df_valid.iterrows():
    lags = row['lags_arr']
    ccf_values = row['ccf_values_arr']
    # Assuming arrays are aligned (lag at position 0 corresponds to ccf_values at position 0, etc.)
    for i in range(array_length):
        value = ccf_values[i]
        # Skip if the value is nan
        if not np.isnan(value):
            lag_values[lags[i]].append(value)

# Now, calculate the median for each lag
median_ccf_per_lag = {}
for lag, values in lag_values.items():
    if values:  # only compute median if there are values
        median_ccf_per_lag[lag] = np.median(values)
    else:
        median_ccf_per_lag[lag] = np.nan

# Print the results
print("Median ccf_values for each lag:")
for lag in sorted(median_ccf_per_lag.keys()):
    print(f"Lag {lag}: {median_ccf_per_lag[lag]}")


Median ccf_values for each lag:
Lag -12.0: -0.069444625
Lag -11.0: -0.06771856500000001
Lag -10.0: -0.036306000000000005
Lag -9.0: 0.048933725
Lag -8.0: 0.07530649
Lag -7.0: 0.06277991999999999
Lag -6.0: 0.042549915
Lag -5.0: 0.02647176
Lag -4.0: 0.01064043
Lag -3.0: -0.013144335
Lag -2.0: -0.04511925
Lag -1.0: -0.066889615
Lag 0.0: -0.09944414045
Lag 1.0: -0.103077805
Lag 2.0: -0.0614593016
Lag 3.0: 0.043142269999999996
Lag 4.0: 0.07060177000000001
Lag 5.0: 0.06272379
Lag 6.0: 0.040648365
Lag 7.0: 0.024329879999999998
Lag 8.0: 0.0043476999999999995
Lag 9.0: -0.015950505
Lag 10.0: -0.03728097
Lag 11.0: -0.0555434543
Lag 12.0: -0.073700715
