In [1]:
import sys

MAIN_DICT = "/gws/nopw/j04/ai4er/users/pn341/earthquake-predictability"
sys.path.append(MAIN_DICT)

In [2]:
import os
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from utils.load import add_noise, load_data
from utils.params import set_param

warnings.filterwarnings("ignore")

sns.set_theme(style="ticks")
np.random.seed(42)

In [3]:
exp = "p4581"
params = set_param(exp)

dirs = {"main": MAIN_DICT}
dirs["data"] = dirs["main"] + "/data/" + params["dir_data"]

X, Y, t, dt, vl = load_data(exp, dirs, params)
Y = Y[:, 0:2]

In [4]:
df = pd.DataFrame(
    np.hstack((X, Y, t.reshape(-1, 1))),
    columns=[
        "det_shear_stress",
        "obs_shear_stress",
        "obs_normal_stress",
        "time",
    ],
)
df.head()

Unnamed: 0,det_shear_stress,obs_shear_stress,obs_normal_stress,time
0,0.002811,1.29911,2.99288,0.0
1,0.002731,1.29903,2.99008,0.001
2,0.002231,1.29853,2.98854,0.002
3,0.002411,1.29871,2.98849,0.003
4,0.002241,1.29854,2.99069,0.004


In [5]:
sampled_df = df.rolling(window=int(1e3), step=int(1e3)).mean().dropna()
sampled_df.head()

Unnamed: 0,det_shear_stress,obs_shear_stress,obs_normal_stress,time
1000,0.010441,1.306805,2.989703,0.500503
2000,0.023812,1.320308,2.989567,1.500508
3000,0.033829,1.330455,2.989649,2.500513
4000,0.040971,1.337728,2.989704,3.500518
5000,0.045985,1.342874,2.989764,4.500523


In [6]:
from scipy.stats import ttest_rel

def calculate_variance_loss(original, downsampled):
    return original.var() - downsampled.var()

def test_statistical_significance(original, downsampled):
    downsampled_aligned = downsampled.reindex(original.index, method='nearest')
    t_stat, p_value = ttest_rel(original, downsampled_aligned)
    return t_stat, p_value

In [7]:
original_series = df['det_shear_stress']
downsampled_series = sampled_df['det_shear_stress']

variance_loss = calculate_variance_loss(original_series, downsampled_series)
t_stat, p_value = test_statistical_significance(original_series, downsampled_series)

print(f"[1] Variance Loss: {variance_loss}")
print(f"[2] Statistical Significance (p-value): {p_value}")

if p_value < 0.05:
    print(f"[3] Null hypothesis (identical averages) IS rejected.")
else:
    print(f"[3] Null hypothesis (identical averages) CANNOT be rejected.")

[1] Variance Loss: 0.0014469620926355767
[2] Statistical Significance (p-value): 0.4095960486436466
[3] Null hypothesis (identical averages) CANNOT be rejected.
