<a href="https://colab.research.google.com/github/hanggao811/AnomalyDetectionLIGO/blob/main/CSDChannel_Autoencoder_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Google Colab mount and working directory setup
try:
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    os.chdir('/content/drive/My Drive/hanggao')
except ImportError:
    pass  # Not running in Colab, skip mounting

import random
import numpy as np
import tensorflow as tf
%matplotlib notebook
import matplotlib.pyplot as plt
plt.close('all')

from Tensorflow.new_dataset.autoencoder_utils import (
    load_bg_data,
    load_signals_by_snr,
    FlexibleAutoencoderSubclass,
    Evaluator,
    SaveModelsCallback,
    compute_csd_features_batch,
    config_already_used
)

# Set seeds for reproducibility
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# Config for CSD autoencoder
config = {
    'batch_size': 64,
    'learning_rate': 1e-4,
    'n_epochs': 60,
    'save_interval': 2,
    'snr_threshold': 0,
    'input_channels': None,  # Will be set after loading CSD features
    'encoder_layers': [
        {'out_channels': 40, 'kernel_size':1, 'stride': 1, 'activation': 'relu'},
    ],
    'bottleneck_layer': {
        'filters': 20,
        'kernel_size': 1,
        'strides': 1,
        'padding': 'same',
        'activation':'relu'
    },
    'decoder_layers': [
        {'out_channels': 50, 'kernel_size': 1, 'stride': 1, 'activation': 'relu'},
        {'out_channels': 2, 'kernel_size': 1, 'stride': 1, 'activation': None},
    ],
    'use_clr': True,
    'clr_params': {
        'initial_lr': 1e-5,
        'max_lr': 5e-4,
        'step_size': 2000
    },
    'csd_fs': 4096,
    'csd_nperseg': 100,
    'csd_noverlap': 80  # Default to 50% overlap
}

if config_already_used(config, "csd_autoencoder_history.jsonl"):
    print("This config has already been used!")
else:
    print("This is a new config, running experiment...")

# Load background data (CSD features)
BG_train, BG_test = load_bg_data(
    preprocess_fn=compute_csd_features_batch,
    preprocess_kwargs={
        'fs': config['csd_fs'],
        'nperseg': config['csd_nperseg'],
        'noverlap': config['csd_noverlap']
    }
)
print("BG_train shape:", BG_train.shape)
config['input_length'] = BG_train.shape[1]
config['input_channels'] = BG_train.shape[2]
config['decoder_layers'][-1]['out_channels'] = config['input_channels']

# Build model
model = FlexibleAutoencoderSubclass(config)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate']),
              loss='mse')

# Callback to save models per epoch
save_callback = SaveModelsCallback(save_interval=config['save_interval'])

# Optionally add CLR callback
callbacks = [save_callback]
if config.get('use_clr', False):
    from Tensorflow.new_dataset.autoencoder_utils import get_clr_scheduler
    clr_callback = get_clr_scheduler(
        initial_lr=config['clr_params']['initial_lr'],
        max_lr=config['clr_params']['max_lr'],
        step_size=config['clr_params']['step_size']
    )
    callbacks.append(clr_callback)

# Train and save all models
history = model.fit(
    BG_train, BG_train,
    batch_size=config['batch_size'],
    epochs=config['n_epochs'],
    callbacks=callbacks
)
models = save_callback.saved_models

test_signals, combined_signals, snr_dict = load_signals_by_snr(
    snr_thresh=config['snr_threshold'],
    data_type='combined',
    preprocess_fn=compute_csd_features_batch,
    preprocess_kwargs={
        'fs': config['csd_fs'],
        'nperseg': config['csd_nperseg'],
        'noverlap': config['csd_noverlap']
    }
)

# Now create the Evaluator and evaluate
print("Evaluating models...")
evaluator = Evaluator(BG_test, test_signals, snr_dict)
evaluator.evaluate(models)

evaluator.plot_auc_vs_epoch()
evaluator.plot_roc_curves()
for k in [1]:
    print(f"Plotting TPR vs SNR at FPR={k}/56000")
    evaluator.plot_tpr_vs_snr_at_fpr(fpr_target=k/56000)

if evaluator.best_epoch is not None and evaluator.aucs_combined is not None and evaluator.best_epoch in evaluator.aucs_combined:
    print(f"Best epoch: {evaluator.best_epoch}, Combined AUC: {evaluator.aucs_combined[evaluator.best_epoch]:.4f}")
else:
    print("No best epoch or AUC found.")

# Plot MSE distribution
errors = evaluator.all_errors[evaluator.best_epoch]
bg_error = errors['BG']

plt.figure(figsize=(10, 6))
plt.hist(bg_error, bins=100, alpha=0.5, label='Background', density=True)
for label, sig_error in errors.items():
    if label == 'BG':
        continue
    plt.hist(sig_error, bins=100, alpha=0.5, label=label, density=True)
plt.xlabel('MSE (Reconstruction Error)')
plt.ylabel('Density')
plt.title('MSE Distribution: Background vs. Signals')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

evaluator.save_history(config, model_name="csd_autoencoder")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
This is a new config, running experiment...
BG_train shape: (200000, 2, 51)
Epoch 1/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - loss: 4.6958e-08
Epoch 2/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.5421e-08
Epoch 3/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.5288e-08
Epoch 4/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.5559e-08
Epoch 5/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.4324e-08
Epoch 6/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.2497e-08
Epoch 7/60
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - loss: 4.1709e-08
Epoch 8/60
[1m3125/3125[0m [32m━━━━━

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Plotting TPR vs SNR at FPR=1/56000


<IPython.core.display.Javascript object>

WNB SNR bin 0: 4000 samples
    WNB SNR bin 0: 150 signals above threshold (threshold=0.0000)
WNB SNR bin 1: 2670 samples
    WNB SNR bin 1: 150 signals above threshold (threshold=0.0000)
WNB SNR bin 2: 2252 samples
    WNB SNR bin 2: 150 signals above threshold (threshold=0.0000)
WNB SNR bin 3: 1882 samples
    WNB SNR bin 3: 149 signals above threshold (threshold=0.0000)
WNB SNR bin 4: 1482 samples
    WNB SNR bin 4: 146 signals above threshold (threshold=0.0000)
WNB SNR bin 5: 1045 samples
    WNB SNR bin 5: 120 signals above threshold (threshold=0.0000)
WNB SNR bin 6: 647 samples
    WNB SNR bin 6: 94 signals above threshold (threshold=0.0000)
WNB SNR bin 7: 370 samples
    WNB SNR bin 7: 63 signals above threshold (threshold=0.0000)
WNB SNR bin 8: 175 samples
    WNB SNR bin 8: 33 signals above threshold (threshold=0.0000)
KinkKink SNR bin 0: 4000 samples
    KinkKink SNR bin 0: 56 signals above threshold (threshold=0.0000)
KinkKink SNR bin 1: 2660 samples
    KinkKink SNR bin 1: 

<IPython.core.display.Javascript object>

[save_history] Saved 4 figures to csd_autoencoder_figures/
[save_history] Saved experiment record to csd_autoencoder_history.jsonl


(['csd_autoencoder_figures/csd_autoencoder_fig_1_20250729_110149.png',
  'csd_autoencoder_figures/csd_autoencoder_fig_2_20250729_110149.png',
  'csd_autoencoder_figures/csd_autoencoder_fig_3_20250729_110149.png',
  'csd_autoencoder_figures/csd_autoencoder_fig_4_20250729_110149.png'],
 'csd_autoencoder_history.jsonl')