# Instantiate dataset, select 2 samples

In [1]:
from snanomaly.dataset import OSCFactory
dataset = OSCFactory.OSC2018June()
sn1 = dataset.load_datapoint(dataset.path / "SN2005M.json")
sn2 = dataset.load_datapoint(dataset.path / "[HP99] 456.json")

# Binning

In [2]:
from snanomaly.preprocessing.binning import *

for band in sn1.photometry.bands.get_bands():
    print(f"### {band.name}: {band.nr_observations} ###")
    if band.nr_observations == 0:
        continue
    b = Binning(band=band, bin_width=3)
    print("Bin width:", b.bin_width)
    print("First three bins:", b.bins[:3])

    binned = b()
    print("Binned:", binned)

### B: 78 ###
Bin width: 3
First three bins: [53388. 53391. 53397.]
Binned: Band(B, 29 observations)
### R: 20 ###
Bin width: 3
First three bins: [53388. 53391. 53400.]
Binned: Band(R, 14 observations)
### I: 20 ###
Bin width: 3
First three bins: [53388. 53391. 53400.]
Binned: Band(I, 14 observations)
### g: 56 ###
Bin width: 3
First three bins: [53397. 53400. 53403.]
Binned: Band(g, 24 observations)
### r: 56 ###
Bin width: 3
First three bins: [53397. 53400. 53403.]
Binned: Band(r, 24 observations)
### i: 56 ###
Bin width: 3
First three bins: [53397. 53400. 53403.]
Binned: Band(i, 24 observations)
### g_pr: 0 ###
### r_pr: 0 ###
### i_pr: 0 ###


# Setup validation pipeline

In [3]:
from snanomaly.preprocessing.cleaning.checks.photometry import MinimumObservationsPerBand
from snanomaly.preprocessing.cleaning.validation_pipeline import ValidationPipeline
from snanomaly.models.sncandidate import Bandset

# Create a pipeline with checks
pipeline = ValidationPipeline(
    checks=[
        MinimumObservationsPerBand(
            min_observations=3,
            bandsets=[Bandset.BRI, Bandset.gri, Bandset.gri_primed]
        )
    ],
    fail_fast=True
)

# Validating one candidate

In [4]:
sn1 = dataset.load_datapoint(dataset.path / "SN2005M.json")
pipeline.validate(sn1)
pipeline.print_results(only_errors=False)
print(sn1.photometry.bands.available_bandsets)

### SN2005M ###
Passed.
{<Bandset.BRI: ('B', 'R', 'I')>, <Bandset.gri: ('g', 'r', 'i')>}


In [5]:
sn2 = dataset.load_datapoint(dataset.path / "[HP99] 456.json")
pipeline.validate(sn2)
pipeline.print_results(only_errors=False)

### [HP99] 456 ###
Check failed: [check=MinimumObservationsPerBand; message=No photometry data available]


# Filtering a collection of candidates

In [6]:
from tqdm import tqdm
from snanomaly import dirs

batch_size = 12
candidates = dataset.load_dataset(batch_size=batch_size)

### Validating just one batch

In [7]:
batch_size = 12
candidates = dataset.load_dataset(batch_size=batch_size)
valid_candidates = pipeline.filter_valid(candidates) # TODO: figure out why `available_bandsets` is cleared
batch = next(valid_candidates)
for c in batch:
    print(c.name, c.photometry.bands.available_bandsets)

SDSS-II SN 21396 {<Bandset.gri_primed: ('g_pr', 'r_pr', 'i_pr')>}
SDSS-II SN 19334 {<Bandset.gri_primed: ('g_pr', 'r_pr', 'i_pr')>}
SDSS-II SN 21324 {<Bandset.gri_primed: ('g_pr', 'r_pr', 'i_pr')>}
SN2016cuv {<Bandset.gri: ('g', 'r', 'i')>}


### Just counting valid candidates

In [None]:
valid_candidates = pipeline.filter_valid(candidates)

cnt_valid = 0
for batch in tqdm(valid_candidates, desc="Batches validated", total=dataset.nr_datapoints // batch_size):
    cnt_valid += len(batch)

print(f"No. valid candidates: {cnt_valid}/{dataset.nr_datapoints} ({cnt_valid / dataset.nr_datapoints * 100:.2f}%)")

### Saving valid candidates to disk

In [13]:
valid_candidates = pipeline.filter_valid(candidates)

out_path = dirs.OUTPUTS / "valid_canditates.csv"
cnt_valid = 0
with out_path.open("w") as f:
    f.write("name,BRI,gri,g'r'i'\n")
    for batch in tqdm(valid_candidates, desc="Batches validated", total=dataset.nr_datapoints // batch_size):
        cnt_valid += len(batch)
        for candidate in batch:
            av_bandsets_str = ",".join([str(bs in candidate.photometry.bands.available_bandsets) for bs in (Bandset.BRI, Bandset.gri, Bandset.gri_primed)])
            f.write(f"{candidate.name},{av_bandsets_str}\n")

print(f"No. valid candidates: {cnt_valid}/{dataset.nr_datapoints} ({cnt_valid / dataset.nr_datapoints * 100:.2f}%)")

Batches validated:  78%|███████▊  | 2932/3763 [06:23<01:48,  7.64it/s]

No. valid candidates: 4782/45162 (10.59%)





# Visualization of binned light curves

In [None]:
### Visualization ###
from snanomaly.visualization.photometry import *

bands = sn1.photometry.bands
plot = PlotPhotometry()
plot.set_title(sn1.name)

bands.B.e_flux[4] = 5 * 10 ** (-8)  # dummy error TODO: remove
bands.R.e_flux[6] = 3 * 10 ** (-8)  # dummy error TODO: remove
bs_BRI = Bandset([bands.B, bands.R, bands.I])
plot.set_bands([bs_BRI])
plot.show()

bs_gri = Bandset([bands.g, bands.r, bands.i])
plot.set_bands([bs_gri])
plot.show()

plot.set_bands([bs_BRI, bs_gri])
plot.show()

In [None]:
import plotly.offline as py
import plotly.graph_objects as go

fig = go.Figure()
fig.update_layout(
    title=r"$\LaTeX\ Example$",
    xaxis_title=r"$x$",
    yaxis_title=r"$y$"
)
# fig.show()
py.iplot(fig, filename="latex")

In [None]:
import plotly.offline as py
import plotly.graph_objects as go

fig = go.Figure()
fig.update_layout(
    title="No LaTeX Example",
    xaxis_title="x",
    yaxis_title="y"
)
# fig.show()
py.iplot(fig, filename="latex")