# Full Superphot+ pipeline for paper

## Step 1: Import light curves from CSV.

In [1]:
alerce_flux_files_dir = "alerce_datafiles"
dataset_csv = "combined_classified_05_09_2023.csv"

In [2]:
from superphot_plus.data_generation.alerce import generate_flux_files
generate_flux_files(dataset_csv, alerce_flux_files_dir)

In [3]:
#unclassified dataset
unclassified_training_csv = "training_set_unclassified_overlap.csv"
generate_flux_files(unclassified_training_csv, alerce_flux_files_dir)

In [4]:
# Then clip/remove nan values/etc.
import glob, os
from superphot_plus.import_utils import import_lc, save_datafile, add_to_new_csv
import_lc_dir = "reformatted_datafiles/"

"""
for fn in glob.glob(os.path.join(alerce_flux_files_dir, "*.csv")):
    name = fn.split("/")[-1][:-4]
    if os.path.exists(os.path.join(import_lc_dir, name+".npz")):
        continue
    t, f, ferr, b, ra, dec = import_lc(fn)
    if t is not None:
        save_datafile(name, t, f, ferr, b, import_lc_dir)
"""

'\nfor fn in glob.glob(os.path.join(alerce_flux_files_dir, "*.csv")):\n    name = fn.split("/")[-1][:-4]\n    if os.path.exists(os.path.join(import_lc_dir, name+".npz")):\n        continue\n    t, f, ferr, b, ra, dec = import_lc(fn)\n    if t is not None:\n        save_datafile(name, t, f, ferr, b, import_lc_dir)\n'

## Step 2: Fit light curves

In [5]:
# dynesty
dynesty_fit_folder = "dynesty_fits"
from superphot_plus.samplers.dynesty_sampler import DynestySampler
from superphot_plus.lightcurve import Lightcurve
from superphot_plus.surveys.surveys import Survey
from superphot_plus.posterior_samples import PosteriorSamples

"""
dynesty_sampler = DynestySampler()
for fn in glob.glob(os.path.join(import_lc_dir, "*.npz")):
    name = fn.split("/")[-1][:-4]
    if os.path.exists(os.path.join(dynesty_fit_folder, name+"_eqwt_dynesty.npz")):
        continue
    lc = Lightcurve.from_file(fn)
    priors = Survey.ZTF().priors
    samples = dynesty_sampler.run_single_curve(lc, priors)
    samples.save_to_file(dynesty_fit_folder)
"""

'\ndynesty_sampler = DynestySampler()\nfor fn in glob.glob(os.path.join(import_lc_dir, "*.npz")):\n    name = fn.split("/")[-1][:-4]\n    if os.path.exists(os.path.join(dynesty_fit_folder, name+"_eqwt_dynesty.npz")):\n        continue\n    lc = Lightcurve.from_file(fn)\n    priors = Survey.ZTF().priors\n    samples = dynesty_sampler.run_single_curve(lc, priors)\n    samples.save_to_file(dynesty_fit_folder)\n'

## Step 3: Create training CSV for classifier

In [6]:
import pandas as pd
from superphot_plus.file_utils import has_posterior_samples
from superphot_plus.import_utils import add_to_new_csv

training_csv = "training_set.csv"
orig_dataset = pd.read_csv(dataset_csv)
names = orig_dataset.NAME
labels = orig_dataset.CLASS
redshifts = orig_dataset.Z

with open(training_csv, 'w+') as tc:
    tc.write("")
    
names_uniq = []
add_to_new_csv("NAME", "LABEL", "Z", training_csv)
for i, name in enumerate(names):
    if has_posterior_samples(name, fits_dir="dynesty_fits", sampler="dynesty"):
        if name in names_uniq:
            continue
        names_uniq.append(name)
        add_to_new_csv(name, labels[i], redshifts[i], training_csv)
        
unclassified_training_fitted = "training_set_unclassified_fitted.csv"
orig_dataset = pd.read_csv(unclassified_training_csv)
names = orig_dataset.NAME
labels = orig_dataset.CLASS
redshifts = orig_dataset.Z

with open(unclassified_training_fitted, 'w+') as tc:
    tc.write("")
    
names_uniq = []
add_to_new_csv("NAME", "LABEL", "Z", unclassified_training_fitted)
for i, name in enumerate(names):
    if has_posterior_samples(name, fits_dir="dynesty_fits", sampler="dynesty"):
        if name in names_uniq:
            continue
        names_uniq.append(name)
        add_to_new_csv(name, labels[i], redshifts[i], unclassified_training_fitted)



## Step 4: Train Classifier


In [7]:
from superphot_plus.classify_ztf import classify

num_epochs = 100
num_folds = 5
goal_per_class = 500
neurons_per_layer = 128
num_layers = 3
log_file = "classification_log.txt"
dynesty_fit_folder = "dynesty_fits"

"""
classify(
    [training_csv,],
    dynesty_fit_folder,
    goal_per_class,
    num_epochs,
    neurons_per_layer,
    num_layers,
    log_file,
    num_folds=num_folds,
)
"""



'\nclassify(\n    [training_csv,],\n    dynesty_fit_folder,\n    goal_per_class,\n    num_epochs,\n    neurons_per_layer,\n    num_layers,\n    log_file,\n    num_folds=num_folds,\n)\n'

## Step 5: Train redshift-inclusive classifier

In [8]:
from superphot_plus.classify_ztf import classify

"""
classify(
    [training_csv,],
    dynesty_fit_folder,
    goal_per_class,
    num_epochs,
    neurons_per_layer,
    num_layers,
    log_file,
    include_redshift=True,
    num_folds=num_folds,
    metrics_dir="./figs/metrics_z",
    models_dir="./models_z",
    csv_path="probs_new_z.csv",
    cm_folder="./figs/hp_cm_z",
)
"""



'\nclassify(\n    [training_csv,],\n    dynesty_fit_folder,\n    goal_per_class,\n    num_epochs,\n    neurons_per_layer,\n    num_layers,\n    log_file,\n    include_redshift=True,\n    num_folds=num_folds,\n    metrics_dir="./figs/metrics_z",\n    models_dir="./models_z",\n    csv_path="probs_new_z.csv",\n    cm_folder="./figs/hp_cm_z",\n)\n'

## Step 6: Classify new supernovae

In [9]:
from superphot_plus.classify_ztf import return_new_classifications
from superphot_plus.mlp import MLP, ModelConfig

mlp_fn = glob.glob("./models/superphot-model-*.pt")[0]
mlp_prefix = mlp_fn.split("-")[-1][:-3]
mlp_config_fn = f"./models/superphot-config-{mlp_prefix}.json"
save_file = "probs_unclassified.csv"
model, _, _ = MLP.load(mlp_fn, mlp_config_fn, data=None)
return_new_classifications(model, unclassified_training_fitted, dynesty_fit_folder, save_file, include_labels=True)

probs_unclassified.csv
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKELY NOT A SN
OBJECT LIKE

## Step 6: Generate paper plots

In [10]:
probs_classified = "probs_new.csv"
probs_unclassified = "probs_unclassified.csv"
probs_unclassified_snr = "probs_unclassified_snr.csv"
probs_snr = "probs_snr.csv"

probs_classified_z = "probs_new_z.csv"
probs_z_snr = "probs_new_z_snr.csv"

from superphot_plus.plotting.utils import add_snr_to_prob_csv

add_snr_to_prob_csv(probs_classified, import_lc_dir, probs_snr)
add_snr_to_prob_csv(probs_unclassified, import_lc_dir, probs_unclassified_snr)
add_snr_to_prob_csv(probs_classified_z, import_lc_dir, probs_z_snr)

In [11]:
# Figure 1
from superphot_plus.plotting.lightcurves import plot_lightcurve_clipping

fig_1_name = "ZTF18aaqfqaa"
plot_lightcurve_clipping(fig_1_name, alerce_flux_files_dir, "figs")
os.rename(f"figs/lc_clip_demo_{fig_1_name}.pdf", "figs/fig1.pdf")

In [12]:
# Figure 2
from superphot_plus.plotting.classifier_results import plot_snr_hist

plot_snr_hist(probs_snr, "figs")
os.rename(f"figs/snr_hist.pdf", "figs/fig2.pdf")

In [13]:
# Figure 3
from superphot_plus.plotting.classifier_results import compare_mag_distributions

compare_mag_distributions(probs_snr, probs_unclassified_snr, "figs")
os.rename(f"figs/appm_hist_compare.pdf", "figs/fig3.pdf")

  return n/db/n.sum(), bin_edges
  return n/db/n.sum(), bin_edges


In [14]:
# Figure 4
from superphot_plus.plotting.lightcurves import plot_lc_fit
from superphot_plus.surveys.surveys import Survey
import matplotlib.pyplot as plt

def custom_container_fig4(sn_name, sn_type):
    def custom_formatting_fig4():
        plt.title(f"{sn_name} ({sn_type})")
        
    return custom_formatting_fig4

os.makedirs("figs/fig4", exist_ok=True)
sn_types = ["SN Ia", "SN Ia", "SN II", "SN IIn", "SLSN-I", "SN Ibc"]
sn_names = ["ZTF23aacxofz", "ZTF23aagfpxq", "ZTF21aajgdeu", ]

ztf_priors = Survey.ZTF().priors

for i, name in enumerate(sn_names):
    cf = custom_container_fig4(name, sn_types[i])
    plot_lc_fit(
        name,
        ztf_priors.reference_band,
        ztf_priors.ordered_bands,
        import_lc_dir,
        dynesty_fit_folder,
        "figs/fig4",
        sampling_method="dynesty",
        file_type="pdf",
        custom_formatting=cf,
    )

In [15]:
# Figure 5
from superphot_plus.plotting.sampling_results import plot_oversampling_1d
from superphot_plus.plotting.utils import read_probs_csv

names, labels, _, _ = read_probs_csv(probs_classified)
plot_oversampling_1d(names, labels, dynesty_fit_folder, "figs")
os.rename(f"figs/all_1d_hists.pdf", "figs/fig5.pdf")

In [16]:
# Figure 6
from superphot_plus.plotting.sampling_results import compare_oversampling
from superphot_plus.supernova_class import SupernovaClass as SnClass


compare_oversampling(
    names,
    labels,
    dynesty_fit_folder,
    "figs",
    allowed_types=["SLSN-I",],
    aux_bands=Survey.ZTF().priors.aux_bands,
    sampler="dynesty",
    goal_per_class=4000,
)


In [17]:
# Figure 7
import shutil
shutil.copy(f"figs/hp_cm/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_c.pdf", "figs/fig7_c.pdf")
shutil.copy(f"figs/hp_cm/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_p.pdf", "figs/fig7_p.pdf")

'figs/fig7_p.pdf'

In [18]:
# Figure 8

In [19]:
# Figure 9

In [20]:
# Figure 10
from superphot_plus.plotting.classifier_results import plot_snr_npoints_vs_accuracy

plot_snr_npoints_vs_accuracy(probs_snr, "figs")
os.rename("figs/snr_vs_accuracy.pdf", "figs/fig10_snr.pdf")
os.rename("figs/n_vs_accuracy.pdf", "figs/fig10_npoints.pdf")

In [21]:
# Figure 11
import shutil
shutil.copy(f"figs/hp_cm/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_c_p07.pdf", "figs/fig11_c.pdf")
shutil.copy(f"figs/hp_cm/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_p_p07.pdf", "figs/fig11_p.pdf")

'figs/fig11_p.pdf'

In [22]:
# Figure 12
from superphot_plus.plotting.classifier_results import generate_roc_curve

generate_roc_curve(probs_classified, "figs")
os.rename("figs/roc_all.pdf", "figs/fig12.pdf")

In [23]:
# Figure 13
from superphot_plus.plotting.classifier_results import plot_redshifts_abs_mags

plot_redshifts_abs_mags(probs_snr, "figs")
os.rename("figs/abs_mag_hist.pdf", "figs/fig13.pdf")

0: 3978
1: 868
3: 79
2: 167
4: 218

0


In [24]:
# Figure 14 - redshift inclusive CM
import shutil
shutil.copy(f"figs/hp_cm_z/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_c.pdf", "figs/fig14_c.pdf")
shutil.copy(f"figs/hp_cm_z/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_p.pdf", "figs/fig14_p.pdf")

'figs/fig14_p.pdf'

In [25]:
# Figure 15 - redshift inclusive p > 0.7 CM
import shutil
shutil.copy(f"figs/hp_cm_z/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_c_p07.pdf", "figs/fig15_c.pdf")
shutil.copy(f"figs/hp_cm_z/cm_{goal_per_class}_{num_epochs}_{neurons_per_layer}_{num_layers}_p_p07.pdf", "figs/fig15_p.pdf")

'figs/fig15_p.pdf'

In [26]:
# Figure 16 - binary redshift inclusive
from superphot_plus.plotting.confusion_matrices import plot_snIa_confusion_matrix
plot_snIa_confusion_matrix(probs_classified_z, "figs/fig16", p07=False)

In [27]:
# Figure 17 - agreement matrices

In [28]:
# Figure 18 - chisquared trends
from superphot_plus.plotting.classifier_results import plot_chisquared_vs_accuracy

plot_chisquared_vs_accuracy(probs_classified, probs_unclassified, dynesty_fit_folder, "figs")

FileNotFoundError: [Errno 2] No such file or directory: 'dynesty_fits/ZTF17aabtvsy_eqwt.npz'