# Import Everything I Need

In [10]:
## Essential Imports: 
import os
import numpy as np
from pathlib import Path
from pzflow.examples import get_galaxy_data
import tables_io

## RAIL-Specific Imports: 
import rail
from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness
from rail.creation.engines.flowEngine import FlowModeler, FlowCreator, FlowPosterior
from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

from rail.estimation.algos.flexzboost import Inform_FZBoost, FZBoost

from rail.evaluation.evaluator import Evaluator

## Data Storage: 
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

## Data Formatting: (from different codes) 
from rail.core.utils import RAILDIR
flow_file = os.path.join(RAILDIR, 'examples/goldenspike/data/pretrained_flow.pkl')
bands = ['u','g','r','i','z','y']
band_dict = {band:f'mag_{band}_lsst' for band in bands}
rename_dict = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

# Setting Up the Flow Engine 

In [11]:
#path to access the data 
DATA_DIR = Path().resolve() / "data"
DATA_DIR.mkdir(exist_ok=True)

catalog_file = DATA_DIR / "base_catalog.pq"

#array of 100,000 galaxies w/ 7 attributes for each: redshift & ugrizy
catalog = get_galaxy_data().rename(band_dict, axis=1) 

#turns array into a table 
tables_io.write(catalog, str(catalog_file.with_suffix("")), catalog_file.suffix[1:])

catalog_file = str(catalog_file)
flow_file = str(DATA_DIR / "trained_flow.pkl")

#we set up the stage 
flow_modeler_params = {
    "name": "flow_modeler",
    "input": catalog_file,
    "model": flow_file,
    "seed": 0,
    "phys_cols": {"redshift": [0, 3]},
    "phot_cols": {
        "mag_u_lsst": [17, 35],
        "mag_g_lsst": [16, 32],
        "mag_r_lsst": [15, 30],
        "mag_i_lsst": [15, 30],
        "mag_z_lsst": [14, 29],
        "mag_y_lsst": [14, 28],
    },
    "calc_colors": {"ref_column_name": "mag_i_lsst"},
}

#we make the stage
flow_modeler = FlowModeler.make_stage(**flow_modeler_params)

#we use the stage to make a model 
flow_modeler.fit_model()

#now we have a model; the next thing to do is to pull datasets from it (and degrade them)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


Inserting handle into data store.  input: /global/u2/a/acraffor/Photo-z-Stress-Test/data/base_catalog.pq, flow_modeler
Training 30 epochs 
Loss:
(0) 21.3266
(1) 3.9642
(2) 3.5612
(3) 1.6287
(4) -0.0812
(5) 0.2244
(6) -0.2572
(7) 0.3581
(8) -0.7755
(9) -1.4998
(10) -1.6085
(11) -1.7710
(12) -2.4134
(13) -2.2855
(14) -2.3888
(15) -1.3606
(16) -2.2746
(17) -1.7556
(18) -2.1972
(19) -3.2067
(20) -3.0266
(21) -3.2861
(22) -2.6768
(23) -3.3777
(24) -2.1020
(25) -2.9262
(26) -3.9119
(27) -3.4481
(28) -4.1449
(29) -3.9272
(30) -3.5161
Inserting handle into data store.  model_flow_modeler: /global/u2/a/acraffor/Photo-z-Stress-Test/data/inprogress_trained_flow.pkl, flow_modeler


<rail.core.data.FlowHandle at 0x7fe534055f40>

# Make Training and Test Datasets 

In [4]:
#take a sample of 10,000 galaxies from the model created above to use as a training set
flow_creator_train = FlowCreator.make_stage(
    name='flow_creator_train', 
    model=flow_modeler.get_handle("model"), 
    n_samples=10000,
    seed=1235,
)

#an un-degraded training data set containing 10,000 galaxies
control_train_data = flow_creator_train.sample(10000, 1235)

# the inverse redshift degrader stage
inv_redshift = InvRedshiftIncompleteness.make_stage(
    name='inv_redshift',
    pivot_redshift=1.0,
)

deg_train_data = 

#take a sample of 100,000 galaxies from the model created above to use as a test set
flow_creator_test = FlowCreator.make_stage(
    name='flow_creator_test', 
    model=flow_modeler.get_handle("model"), 
    n_samples=100000,
    seed=1235,
)

control_test_data = 

lsst_error_model_train = LSSTErrorModel.make_stage(
    name='lsst_error_model_train',
    bandNames=band_dict, 
    seed=29,
)

deg_test_data = 


# Degrade 10,000-Galaxy Catalog with Redshift Incompleteness 

In [5]:
lsst_error_model_train = LSSTErrorModel.make_stage(
    name='lsst_error_model_train',
    bandNames=band_dict, 
    seed=29,
)


inv_redshift = InvRedshiftIncompleteness.make_stage(
    name='inv_redshift',
    pivot_redshift=1.0,
)

# Train FlexzBoost on Degraded Data 

In [6]:
###

# Perform Estimations with FlexzBoost

In [7]:
###

# Evaluate Using Performance Metrics 

In [8]:
###