In [1]:
import sys
sys.path.append("..")
from lsst.daf.butler import Butler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing
import evals
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
collection = "u/kmrakovc/runs/single_frame_injection_stars_full"
#collection = "u/kmrakovc/runs/single_frame_injection_test_full"
repo_path ="/epyc/ssd/users/kmrakovc/DATA/rc2_subset/SMALL_HSC/"

In [3]:
def measure_one_src_catalog(butler, collection, src_ref, calexp_ref, index):
    src = butler.get("src",
                     dataId=src_ref.dataId,
                     collections=collection)
    photocalib = butler.get("calexp.photoCalib",
                            dataId=calexp_ref.dataId,
                            collections=collection)
    wcs = butler.get("calexp.wcs",
                     dataId=calexp_ref.dataId,
                     collections=collection)
    s1 = src[src['parent']==0]
    snr = np.array(s1["base_PsfFlux_instFlux"])/np.array(s1["base_PsfFlux_instFluxErr"])
    magnitude = photocalib.instFluxToMagnitude(s1, 'base_PsfFlux')
    detection_origins = wcs.skyToPixelArray(np.array([s1["coord_ra"]]),
                                            np.array([s1["coord_dec"]]),
                                            degrees=False)
    sources = s1.asAstropy()
    sources["snr"]=snr
    sources["magnitude"]=magnitude[:,0]
    sources["magnitudeErr"]=magnitude[:,1]
    sources["n"]=index
    sources["y"]=np.round(detection_origins[0])
    sources["x"]=np.round(detection_origins[1])
    return sources.to_pandas()

def measure_src_catalog (repo_path, collection, val_index=None, n_parallel=0):
    butler = Butler(repo_path)
    calexp_ref = np.unique(np.array(list(butler.registry.queryDatasets("calexp",
                                                                       collections=collection,
                                                                       instrument='HSC',
                                                                       findFirst=True))))
    src_ref = np.unique(np.array(list(butler.registry.queryDatasets("src",
                                                                    collections=collection,
                                                                    instrument='HSC',
                                                                    findFirst=True))))
    if val_index is None:
        val_index = list(range(len(src_ref)))
    parameters = [(butler, collection, calexp_ref[i], src_ref[i], i) for i in val_index]
    if n_parallel > 1:
        with multiprocessing.Pool(n_parallel) as pool:
            results = pool.starmap(measure_one_src_catalog, parameters)
    else:
        results = [None] * len(parameters)
        for i, p in enumerate(parameters):
            results[i] = measure_one_src_catalog(*p)
            print("\r", i + 1, "/", len(parameters), end="")
    results = pd.concat(results).reset_index(drop=True)
    return results

In [4]:
def cutouts_by_snr_bins (repo_path, collection, sources, snr=[5, 10, 15, 20, 25, 30], cutout_size=20, calexp_type="calexp"):
    snr_column_name=[col for col in sources.columns if 'snr' in col][0]
    mag_column_name=[col for col in sources.columns[sources.dtypes==float] if 'mag' in col]
    butler = Butler(repo_path)
    calexp_ref = np.unique(np.array(list(butler.registry.queryDatasets(calexp_type,
                                                                       collections=collection,
                                                                       instrument='HSC',
                                                                       findFirst=True))))
    calexp_dimensions = butler.get(calexp_type+".dimensions",
                                   dataId=calexp_ref[0].dataId,
                                   collections=collection)
    sources = sources[(sources[snr_column_name]>0) & (sources["x"]>3*cutout_size) & (sources["y"]>3*cutout_size)]
    sources = sources[(sources["x"]<calexp_dimensions.y-3*cutout_size) & (sources["y"]<calexp_dimensions.x-3*cutout_size)]
    fig, ax = plt.subplots(1, len(snr), figsize=(len(snr)*2.7, 1*4))
    for i, s in enumerate(snr):
        source_info = sources.iloc[[np.argsort(np.square(np.array(sources[snr_column_name]-s)))[0]]][["n","x","y", snr_column_name]+mag_column_name]
        calexp = butler.get(calexp_type, dataId=calexp_ref[source_info["n"].values[0]].dataId, collections=collection).image.array
        x = int(source_info["x"].values[0])
        y = int(source_info["y"].values[0])
        cutout = calexp[x-cutout_size:x+cutout_size, 
                        y-cutout_size:y+cutout_size]
        ax[i].imshow(cutout, vmin=calexp.mean()-3*calexp.std(), vmax=calexp.mean()+3*calexp.std(), cmap="gray")
        ax[i].set_xticks([])
        ax[i].set_yticks([])
        ax[i].set_title("SNR="+str(round(source_info[snr_column_name].values[0],2)))
        x_label = ""
        for col_name in mag_column_name:
            x_label += col_name+":"+str(round(source_info[col_name].values[0],2))+"\n"
        ax[i].set_xlabel(x_label)
    fig.tight_layout()
    
def cutouts_of_one_snr (repo_path, collection, sources, sources_num=5, snr=5, cutout_size=20, calexp_type="calexp"):
    snr_column_name=[col for col in sources.columns if 'snr' in col][0]
    mag_column_name=[col for col in sources.columns[sources.dtypes==float] if 'mag' in col]
    butler = Butler(repo_path)
    calexp_ref = np.unique(np.array(list(butler.registry.queryDatasets(calexp_type,
                                                                       collections=collection,
                                                                       instrument='HSC',
                                                                       findFirst=True))))
    calexp_dimensions = butler.get(calexp_type+".dimensions",
                                   dataId=calexp_ref[0].dataId,
                                   collections=collection)
    sources = sources[(sources[snr_column_name]>0) & (sources["x"]>3*cutout_size) & (sources["y"]>3*cutout_size)]
    sources = sources[(sources["x"]<calexp_dimensions.y-3*cutout_size) & (sources["y"]<calexp_dimensions.x-3*cutout_size)]
    fig, ax = plt.subplots(1, sources_num, figsize=(sources_num*3, 1*3))
    for i in range(sources_num):
        source_info = sources.iloc[[np.argsort(np.square(np.array(sources[snr_column_name]-snr)))[i]]][["n","x","y", snr_column_name]+mag_column_name]
        calexp = butler.get(calexp_type, dataId=calexp_ref[source_info["n"].values[0]].dataId, collections=collection).image.array
        x = int(source_info["x"].values[0])
        y = int(source_info["y"].values[0])
        cutout = calexp[x-cutout_size:x+cutout_size, 
                        y-cutout_size:y+cutout_size]
        ax[i].imshow(cutout, vmin=calexp.mean()-3*calexp.std(), vmax=calexp.mean()+3*calexp.std(), cmap="gray")
        ax[i].set_xticks([])
        ax[i].set_yticks([])
        ax[i].set_title("SNR="+str(round(source_info[snr_column_name].values[0],2)))
        x_label = ""
        for col_name in mag_column_name:
            x_label += col_name+":"+str(round(source_info[col_name].values[0],2))+"\n"
        ax[i].set_xlabel(x_label)
    fig.tight_layout()

# Sources (not asteroids)

In [5]:
sources = measure_src_catalog (repo_path, collection, n_parallel=30)
sources

In [6]:
cutouts_of_one_snr (repo_path, collection, sources, snr=5)
cutouts_by_snr_bins (repo_path, collection, sources, snr=[5, 10, 15, 30, 50, 100])

In [7]:
fig, ax = plt.subplots()
ax.hist(sources["magnitude"], bins=50, range=(15, 30))
ax.set_xlabel("Magnitude")

In [8]:
fig, ax = plt.subplots()
ax.hist2d(sources["magnitude"], sources["snr"], bins=50, range=[[20, 30], [4, 100]])
ax.set_xlabel("Magnitude")
ax.set_ylabel("SNR")

# Asteroids

In [16]:
inject_sources = evals.eval_tools.recovered_sources(repo_path, collection, n_parallel=30)
detected_sources = inject_sources[inject_sources["stack_detected"]==1]
detected_sources[["integrated_mag", "PSF_mag", "surface_brightness", "stack_magnitude", "stack_snr"]]

In [10]:
cutouts_by_snr_bins (repo_path, collection, detected_sources, snr=[5, 10, 15, 30, 50, 100], calexp_type="injected_calexp")
cutouts_of_one_snr (repo_path, collection, detected_sources, snr=5, calexp_type="injected_calexp")