# Imports 

In [30]:
## Essential Imports: 
import os
import qp
import numpy as np
from pathlib import Path
from pzflow.examples import get_galaxy_data
# import tables_io

## RAIL-Specific Imports: 
import rail
from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness
from rail.creation.engines.flowEngine import FlowModeler, FlowCreator, FlowPosterior
from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

from rail.estimation.algos.flexzboost import Inform_FZBoost, FZBoost

from rail.evaluation.evaluator import Evaluator

## Data Storage: 
DS = RailStage.data_store
DS.__class__.allow_overwrite = True

## Data Formatting: (from different codes) 
from rail.core.utils import RAILDIR
flow_file = os.path.join(RAILDIR, 'examples/goldenspike/data/pretrained_flow.pkl')
bands = ['u','g','r','i','z','y']
band_dict = {band:f'mag_{band}_lsst' for band in bands}
rename_dict = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

# Model

In [31]:
#path to access the data 
DATA_DIR = Path().resolve() / "data"
DATA_DIR.mkdir(exist_ok=True)

catalog_file = DATA_DIR / "base_catalog.pq"

#array of galaxies w/ 7 attributes for each: redshift & ugrizy
catalog = get_galaxy_data().rename(band_dict, axis=1) 

#turns array into a table 
tables_io.write(catalog, str(catalog_file.with_suffix("")), catalog_file.suffix[1:])

catalog_file = str(catalog_file)
flow_file = str(DATA_DIR / "trained_flow.pkl")

#we set up the stage 
flow_modeler_params = {
    "name": "flow_modeler",
    "input": catalog_file,
    "model": flow_file,
    "seed": 0,
    "phys_cols": {"redshift": [0, 3]},
    "phot_cols": {
        "mag_u_lsst": [17, 35],
        "mag_g_lsst": [16, 32],
        "mag_r_lsst": [15, 30],
        "mag_i_lsst": [15, 30],
        "mag_z_lsst": [14, 29],
        "mag_y_lsst": [14, 28],
    },
    "calc_colors": {"ref_column_name": "mag_i_lsst"},
}

#we make the stage
flow_modeler = FlowModeler.make_stage(**flow_modeler_params)

#we use the stage to make a model 
flow_modeler.fit_model()

Training 30 epochs 
Loss:
(0) 21.3266
(1) 6.7267
(2) 2.0761
(3) 2.6037
(4) -0.0680
(5) 0.4129
(6) 0.2506
(7) 0.1637
(8) -1.3346
(9) -1.7669
(10) -1.1823
(11) -1.6267
(12) 3402823273761818485311871060541440.0000
(13) 3402823273761818485311871060541440.0000
(14) -1.0711
(15) -0.6228
(16) 3402823273761818485311871060541440.0000
(17) 3402823273761818485311871060541440.0000
(18) -2.8045
(19) -3.3746
(20) 3402823273761818485311871060541440.0000
(21) -2.4881
(22) -3.2147
(23) -3.7188
(24) -3.4398
(25) -3.7955
(26) -3.3772
(27) 3402823273761818485311871060541440.0000
(28) -3.5247
(29) -4.1677
(30) -3.4874
Inserting handle into data store.  model_flow_modeler: /Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/inprogress_trained_flow.pkl, flow_modeler


<rail.core.data.FlowHandle at 0x17e79f160>

In [32]:
flow_modeler.get_handle("model")

<rail.core.data.FlowHandle at 0x17e79f160>

# Training Set 

In [34]:
ntrain = 10000

In [35]:
flow_creator_train = FlowCreator.make_stage(
    name='flow_creator_train', 
    model=flow_modeler.get_handle("model"), 
    n_samples=ntrain,
    seed=372,
)

inv_redshift= InvRedshiftIncompleteness.make_stage(
    name='inv_redshift_deg',
    pivot_redshift=1.0,
)

orig_train= flow_creator_train.sample(ntrain, 372)
deg_train = inv_redshift(orig_train)

Inserting handle into data store.  output_flow_creator_train: inprogress_output_flow_creator_train.pq, flow_creator_train
Inserting handle into data store.  output_inv_redshift_deg: inprogress_output_inv_redshift_deg.pq, inv_redshift_deg


Only run if you need output_orig_train_posts

In [36]:
# flow_post_orig_train = FlowPosterior.make_stage(name='orig_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_train)

# orig_train_pdfs = flow_post_orig_train.get_posterior(orig_train, column='redshift')

Inserting handle into data store.  output_orig_train_posts: inprogress_output_orig_train_posts.hdf5, orig_train_posts


Only run if you need output_deg_train_posts

In [37]:
# flow_post_deg_train = FlowPosterior.make_stage(name='deg_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = deg_train)

# deg_train_pdfs = flow_post_deg_train.get_posterior(deg_train, column='redshift')

Inserting handle into data store.  output_deg_train_posts: inprogress_output_deg_train_posts.hdf5, deg_train_posts


# Test Set 

In [27]:
ntest = 100000

In [41]:
flow_creator_test = FlowCreator.make_stage(
    name='flow_creator_test', 
    model=flow_modeler.get_handle("model"), 
    n_samples=ntest,
    seed=1078,
)

lsst_error_model = LSSTErrorModel.make_stage(
    name='lsst_error_model',
    bandNames=band_dict, 
    seed=39,
)

orig_test= flow_creator_test.sample(ntest, 1078)
deg_test = lsst_error_model(orig_test)

Inserting handle into data store.  output_flow_creator_test: inprogress_output_flow_creator_test.pq, flow_creator_test
Inserting handle into data store.  output_lsst_error_model: inprogress_output_lsst_error_model.pq, lsst_error_model


Only run if you need output_orig_test_posts

In [42]:
# flow_post_orig_test = FlowPosterior.make_stage(name='orig_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_test)

# orig_test_pdfs = flow_post_orig_test.get_posterior(orig_test, column='redshift')

Inserting handle into data store.  output_orig_test_posts: inprogress_output_orig_test_posts.hdf5, orig_test_posts


Only run if you need output_deg_test_posts

In [43]:
# flow_post_deg_test = FlowPosterior.make_stage(name='deg_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = deg_test)

# deg_test_pdfs = flow_post_deg_test.get_posterior(deg_test, column='redshift')

Inserting handle into data store.  output_deg_test_posts: inprogress_output_deg_test_posts.hdf5, deg_test_posts


  self._ycumul = (self._ycumul.T / self._ycumul[:,-1]).T


# Make tables

In [21]:
col_remapper = ColumnMapper.make_stage(
    name='col_remapper', 
    columns=rename_dict,
)
   
table_conv = TableConverter.make_stage(
    name='table_conv', 
    output_format='numpyDict',
)