<a href="https://colab.research.google.com/github/Miguel-LlamasLanza/Fink_earlyTDE_v2/blob/main/notebooks/Preprocessing_cuts_and_feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **1. Load data**

In [None]:
#Run to download necessary files to the notebook

import pathlib
import os

data_dir = '/content'
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)

files_to_download = {
    os.path.join(data_dir, "mallorn_data_fink_SNR_Full_LCs.pkl"): "1DNk-sk3a2fC1v8dCx6KqyHDjtxMLWi2W",
    os.path.join(data_dir, "mallorn_data_fink_SNR_30d_LCs.pkl"): "1bb1fgySX2KRMFxWLzSpFbLfq3vUOtWtj"}

for fpath, fid in files_to_download.items():
    url = f"https://drive.google.com/uc?id={fid}"
    if not os.path.exists(fpath):
        print(f"Downloading {os.path.basename(fpath)}...")
        !gdown "{url}" -O "{fpath}"
    else:
        print(f"{os.path.basename(fpath)} already exists, skipping.")

mallorn_data_fink_SNR_Full_LCs.pkl already exists, skipping.
mallorn_data_fink_SNR_30d_LCs.pkl already exists, skipping.


In [None]:
# Load data
import pandas as pd
df = pd.read_pickle('mallorn_data_fink_SNR_30d_LCs.pkl')

In [None]:
df.head()

Unnamed: 0,object_id,redshift,ebv,SpecType,target,lightcurve,n_detections,lightcurve_preprocessed_30d
0,Dornhoth_fervain_onodrim,3.049,0.11,AGN,0,mjd time_t0 filter flux fl...,13,mjd time_t0 filter flux flu...
1,Dornhoth_galadh_ylf,0.4324,0.058,SN II,0,mjd time_t0 filter flux f...,11,mjd time_t0 filter flux f...
2,Elrim_melethril_thul,0.4673,0.577,AGN,0,mjd time_t0 filter flux fl...,20,mjd time_t0 filter flux flux...
3,Ithil_tobas_rodwen,0.6946,0.012,AGN,0,mjd time_t0 filter flux f...,82,mjd time_t0 filter flux fl...
4,Mirion_adar_Druadan,0.4161,0.058,AGN,0,mjd time_t0 filter flux f...,18,mjd time_t0 filter flux flu...


### **2. Define filtering Functions**


In [114]:
import numpy as np

def get_slope(x, y, dy):
    x = np.atleast_1d(x)
    y = np.atleast_1d(y)
    dy = np.atleast_1d(dy)

    idx = np.isfinite(x) & np.isfinite(y) & np.isfinite(dy)

    if len(x[idx]) < 3 or np.ptp(x[idx]) == 0:
        return 0, 0

    p, cov = np.polyfit(
        x[idx] - np.mean(x[idx]), y[idx] - np.mean(y), 1, w=1 / dy[idx], cov="unscaled"
    )
    dp = np.sqrt(np.diag(cov))

    return p[0], dp[0]


def is_rising(
    jd,
    flux,
    fluxerr,
    filter,
    upper=None,
    nsigmas_rise=2,
    nsigmas_decay=1,
    nsigmas_slope=3,
    use_slope=True,
    verbose=False,
):
    idx0 = flux == flux

    is_decay = False
    is_rise = False

    for filt in ['u', 'g', 'r', 'i', 'z', 'y']:

        idx = idx0 & (filter == filt)
        uidx = ~idx0 & (filter == filt)

        if np.sum(idx) < 2:
            continue

        # Last point is significantly lower than the max?..
        diff = flux[idx][:-1] - flux[idx][-1]
        differr = np.hypot(fluxerr[idx][:-1], fluxerr[idx][-1])

        if np.any(diff > nsigmas_decay * differr):
            is_decay = True

        # Last point is significantly higher than the min?..
        diff = flux[idx][-1] - flux[idx][:-1]

        if np.any(diff > nsigmas_rise * differr):
            is_rise = True

        if np.sum(idx) >= 3 and use_slope:
            # Slope is significantly positive?..
            slope, serr = get_slope(jd[idx], flux[idx], fluxerr[idx])
            if verbose:
                print("slope", slope, "+/-", serr)
            if slope > nsigmas_slope * serr:
                is_rise = True

        # Any point is significantly lower than the previous?..
        diff = flux[idx][:-1] - flux[idx][1:]
        differr = np.hypot(fluxerr[idx][:-1], fluxerr[idx][1:])

        if np.any(diff > nsigmas_decay * differr):
            is_decay = True

        # Last point is significantly higher than any (prior) upper limit?..
        if upper is not None and np.sum(uidx) > 0:
            diff = flux[idx][-1] - upper[uidx]
            differr = fluxerr[idx][-1]

            if np.any(diff > nsigmas_rise * differr):
                is_rise = True

    if verbose:
        print("rise", is_rise, "decay", is_decay)

    return is_rise and not is_decay


### **3. Apply rising flags**

In [123]:
df = df[~df['lightcurve_preprocessed_30d'].isna()]

mask = []
for _, row in df.iterrows():

  mjd = row["lightcurve_preprocessed_30d"]["mjd"].to_numpy()
  flux = row["lightcurve_preprocessed_30d"]["flux_dered"].to_numpy()
  flux_err = row["lightcurve_preprocessed_30d"]["flux_err_dered"].to_numpy()
  filter = row["lightcurve_preprocessed_30d"]["filter"]

  mask.append(is_rising(mjd, flux, flux_err, filter, verbose = False))

df_rising = df[mask]

print(len(df_rising))



694


### **4. Apply other cuts (e.g. 5 points minimum)**

In [124]:
df_5points = df_rising[
	df_rising["lightcurve_preprocessed_30d"].apply(lambda lc: len(lc) >= 5)]
print(len(df_5points))

576


### **5. Apply feature extraction**