# Model Simulation

In [3]:
## Standard imports
import pandas as pd
import numpy as np
## Script imports
import simuFlares
from STL_IF import STLIF
import detectFlare
from sigma_clip import sigma_clip

## Setup
# Load Data
pdcsap = pd.read_csv("../0.Data/031381302.csv", index_col = 'time').loc[:, ["pdcsap_flux"]].dropna()
# Calm interval
pdcsap = pdcsap.query("1442 <= index <= 1449")
inds = np.arange(pdcsap.shape[0])

## Flare parameters
num_flares = 5
# Base half-peak timescale: larger values => all flares last longer (relative to their amplitudes)
t_half = 2.5  # e.g. 10 minutes (2-min cadence)
# Flare ampltiude (Pareto) parameters
xm = 10         # Scale (~ x_min): Baseline amplitude (values will rarely be smaller than this)
alpha = 1       # Shape: smaller => heavier tail = more large flares.
offset = 30     # Offset amplitudes (shift)
upper = 100     # Amplitude cap

## Isolation Forest parameters
contamination = 0.001 # Expected proportion of anomalies
n_estimators = 100 # Number of trees
sample_size = 256 # Number of samples used to train each tree

## Simulate
n = 3 #100 # Number of simulations
stlif_metrics = []
sigma_metrics = []

for i in range(n):
    ## Simulate flares
    flare_lightcurve, flare_times = simuFlares.kepler_flare(
        inds,                           # time array
        t_half,                         # base half-peak width
        num_flares,                     # number of flares
        flux_dist=simuFlares.rpareto,   # amplitude distribution
        xm=xm, alpha=alpha, offset=offset, upper=upper
    )
    # Inject flares
    pdcsap["pdcsap_flux"] += flare_lightcurve

    ## Run model: STLIF
    data = STLIF(pdcsap, contamination=contamination, n_estimators=n_estimators, sample_size=sample_size)

    ## Calculate metrics
    prec, rec, f1 = detectFlare.event_level_scores(real_flares=flare_times, y_pred=data["anomaly"].values)
    stlif_metrics.append((prec, rec, f1))

    ## Run model: STLSigmaClip
    anomalies = sigma_clip(data['resid'], sigma=3.0, consecutive_pts=3).ravel()

    ## Calculate metrics
    prec, rec, f1 = detectFlare.event_level_scores(real_flares=flare_times, y_pred=anomalies)
    sigma_metrics.append((prec, rec, f1))

## Compute average metrics
avg_prec, avg_rec, avg_f1 = np.array(stlif_metrics).mean(axis=0)

# Print results
print(f"After {n} runs:")
print("STLIF:")
print(f"  Avg Precision: {avg_prec:.3f}")
print(f"  Avg Recall:    {avg_rec:.3f}")
print(f"  Avg F1 Score:  {avg_f1:.3f}")

## Compute average metrics
avg_prec, avg_rec, avg_f1 = np.array(sigma_metrics).mean(axis=0)

print("3-3sigma:")
print(f"  Avg Precision: {avg_prec:.3f}")
print(f"  Avg Recall:    {avg_rec:.3f}")
print(f"  Avg F1 Score:  {avg_f1:.3f}")

After 3 runs:
STLIF:
  Avg Precision: 1.000
  Avg Recall:    0.800
  Avg F1 Score:  0.880
3-3sigma:
  Avg Precision: 1.000
  Avg Recall:    0.600
  Avg F1 Score:  0.737
