# Imports 

In [1]:
## Essential Imports: 
import os
import numpy as np
import qp
import tables_io
from pathlib import Path 
from pzflow.examples import get_galaxy_data
import ceci

## RAIL-Specific Imports: 
import rail

# old : from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness


from rail.creation.degradation.lsst_error_model import LSSTErrorModel
from rail.creation.degradation.spectroscopic_degraders import InvRedshiftIncompleteness

import rail.creation 
import rail.creation.engines
from rail.creation.engines.flowEngine import FlowModeler, FlowCreator, FlowPosterior
from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

# old : from rail.estimation.algos.flexzboost import Inform_FZBoost, FZBoost

from rail.estimation.algos.train_z import TrainZEstimator, TrainZInformer
from rail.estimation.algos.cmnn import Inform_CMNNPDF, CMNNPDF
from rail.estimation.algos.gpz import GPzInformer, GPzEstimator 
from rail.estimation.algos.pzflow_nf import PZFlowInformer, PZFlowEstimator 
from rail.estimation.algos.flexzboost import FlexZBoostInformer, FlexZBoostEstimator  


from rail.evaluation.evaluator import Evaluator


## Data Storage: 
DS = RailStage.data_store
DS.__class__.allow_overwrite = True


### CMNN, PZFlow, FlexZBoost, GPZ, trainz for control

In [285]:
help(rail.estimation.algos)

Help on package rail.estimation.algos in rail.estimation:

NAME
    rail.estimation.algos

PACKAGE CONTENTS
    _gpz_util
    bpz_lite
    cmnn
    delightPZ
    delight_version (package)
    equal_count
    flexzboost
    gpz
    naive_stack
    point_est_hist
    pzflow_nf
    random_gauss
    train_z
    uniform_binning
    var_inf

FILE
    (built-in)




In [286]:
#from rail.stages import *
#rail.stages.import_and_attach_all()
#for val in RailStage.pipeline_stages.values():
#    print(val[0])

# Model

In [2]:
def makeModel():
    #path to access the data 
    DATA_DIR =  Path().resolve() / "data"
    DATA_DIR.mkdir(exist_ok=True)

    catalog_file = DATA_DIR / "base_catalog.pq"

    bands = ['u','g','r','i','z','y']
    band_dict = {band:f'mag_{band}_lsst' for band in bands}
    # band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}
    # band_dict_err = {f'mag_err_{band}_lsst' for band in bands}
    
    #array of galaxies w/ 7 attributes for each: redshift & ugrizy
    catalog = get_galaxy_data().rename(band_dict, axis=1) 

    #turns array into a table 
    tables_io.write(catalog, str(catalog_file.with_suffix("")), catalog_file.suffix[1:])

    catalog_file = str(catalog_file)
    flow_file = str(DATA_DIR / "trained_flow.pkl")

    print(flow_file)

    #we set up the stage 
    flow_modeler_params = {
        "name": "flow_modeler",
        "input": catalog_file,
        "model": flow_file,
        "seed": 0,
        "phys_cols": {"redshift": [0, 3]},
        "phot_cols": {
            "mag_u_lsst": [17, 35],
            "mag_g_lsst": [16, 32],
            "mag_r_lsst": [15, 30],
            "mag_i_lsst": [15, 30],
            "mag_z_lsst": [14, 29],
            "mag_y_lsst": [14, 28],
        },
        "calc_colors": {"ref_column_name": "mag_i_lsst"},
    }
    flow_modeler = FlowModeler.make_stage(**flow_modeler_params)
    # flow_modeler.fit_model()
    return flow_modeler, flow_file ##.get_handle("model")

In [4]:
modelData, flow_file = makeModel() 

/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl


In [61]:
!ls

README.md                    [34mqp[m[m
Test_Pipeline.ipynb          [34mspecSelection_TEST1[m[m
[34mdata[m[m                         [34mspecSelection_TEST2[m[m
output_flow_creator_test.pq  trained_flow.pkl
output_flow_creator_train.pq [34muntitled folder[m[m


# Make Training Set and Test Set 

In [5]:
def trainSet(ntrain, seed):
    data = FlowCreator.make_stage(
            name = 'train_set',
            model = flow_file,
            n_samples = ntrain,
            seed = seed 
    )
    return data 

In [6]:
def testSet(ntest, seed):
    data = FlowCreator.make_stage(
            name = 'test_set',
            model = flow_file,
            n_samples = ntest,
            seed = seed 
    )
    return data #.sample(ntest, seed)

# Degraders

## Inverse Redshift Incompleteness

In [34]:
def invRedshift(pivot = 1.0):
    assert type(pivot) == float 
    degr = InvRedshiftIncompleteness.make_stage(
        name = 'inv_redshift',
        pivot_redshift = pivot
    )
    return degr 

pivot_ls = [1.0, 1.4]

In [35]:
# ## Choose pivot z's for inverse redshift incompleteness 

# ## seed1 and ndata should be the same as  seed1 and ntrain used to call bigF!! 
# ## Otherwise this might not be representative of the real data 

# def choosePivots(seed1, ndata):
#     nums = trainSet(ndata, seed1)
#     data = nums.sample(ndata, seed1)
#     data_pq = col_remap(data)
#     data_table = table_conv(data_pq)
#     table = tables_io.convertObj(data_table.data, tables_io.types.PD_DATAFRAME)
#     return np.asarray(table['redshift'])

# percentiles = np.arange(10, 100, 10)
# pivots = [] 

# for i in percentiles:
#     pivot = np.percentile(choosePivots(17, 100000), i) 
#     pivots.append(pivot)

In [36]:
# print(pivots)

## LSST Error 

In [37]:
bands = ['u','g','r','i','z','y']
band_dict = {band:f'mag_{band}_lsst' for band in bands}

def lsstError(dict, seed): 
    deg = LSSTErrorModel.make_stage(
        name='lsst_error',
        renameDict= dict, 
        ndFlag=np.nan,
        seed=seed,
    )
    return deg 

In [166]:
help(rail.creation.degradation.lsst_error_model)


Help on module rail.creation.degradation.lsst_error_model in rail.creation.degradation:

NAME
    rail.creation.degradation.lsst_error_model - The LSST Model for photometric errors.

CLASSES
    rail.creation.degrader.Degrader(rail.core.stage.RailStage)
        LSSTErrorModel
    
    class LSSTErrorModel(rail.creation.degrader.Degrader)
     |  LSSTErrorModel(args, comm=None)
     |  
     |  The LSST Model for photometric errors.
     |  
     |  This is a wrapper around the error model from PhotErr. The parameter
     |  docstring below is dynamically added by the installed version of PhotErr:
     |  
     |  Parameters for the LSST photometric error model.
     |  
     |  Default values taken from pages 11, 12, 26 of Ivezic 2019.
     |  
     |  
     |  Parameters
     |  ----------
     |  nYrObs : float
     |      Number of years of observations
     |  nVisYr : dict
     |      Mean number of visits per year in each band
     |  gamma : dict
     |      A band dependent par

## Quantity Cuts 

In [38]:
## write a dictionary with the different bands and magnitudes you want

def quantCuts(band, mag):
    quantity_cut = QuantityCut.make_stage(
        name='quantity_cut',    
        cuts={'mag_i_lsst': 25.0},
    )

In [39]:
qcuts_dict = {'mag_u_lsst': [...], 
              'mag_g_lsst': [...], 
              'mag_r_lsst': [...], 
              'mag_i_lsst': [...], 
              'mag_z_lsst': [...], 
              'mag_y_lsst': [...] }

## Survey-Based Degraders

In [40]:
from rail.creation.degradation.spectroscopic_selections import *

def specSelectBOSS(ntrain):
    degr = SpecSelection_BOSS.make_stage(
        name = 'specselection_boss',
        N_tot = ntrain
    )
    return degr 

def specSelectDEEP2(ntrain):
    degr = SpecSelection_DEEP2.make_stage(
        name = 'specselection_deep2',
        N_tot = ntrain
    )
    return degr 

def specSelectGAMA(ntrain):
    degr = SpecSelection_GAMA.make_stage(
        name = 'specselection_gama',
        N_tot = ntrain
    )
    return degr 

def specSelectHSC(ntrain):
    degr = SpecSelection_HSC.make_stage(
        name = 'specselection_HSC',
        N_tot = ntrain
    )
    return degr 

def specSelectVVDSf02(ntrain):
    degr = SpecSelection_VVDSf02.make_stage(
        name = 'specselection_VVDSf02',
        N_tot = ntrain
    )
    return degr 

def specSelectzCOSMOS(ntrain):
    degr = SpecSelection_zCOSMOS.make_stage(
        name = 'specselection_zCOSMOS',
        N_tot = ntrain
    )
    return degr 

In [41]:
spec_dict = {'BOSS': specSelectBOSS, 
             'DEEP2': specSelectDEEP2, 
             'GAMA': specSelectGAMA,
             'HSC': specSelectHSC, 
             'VVDSf02': specSelectVVDSf02, 
             'zCOSMOS': specSelectzCOSMOS } 

## Posts 

In [42]:
def getPosts(data, model, grid):
    posts = FlowPosterior.make_stage(
        name='get_posts'+str(data), 
        column='redshift',
        grid = grid,
        model = model,
        data = data
    )
    return posts #posts.get_posterior(data, column = 'redshift')

In [43]:
def makeGrid(zmin, zmax, nbins):
    import numpy as np
    grid = np.linspace(zmin, zmax, nbins + 1)
    return grid 

In [44]:
grid = makeGrid(0, 2.5, 100)

Only run if you need output_orig_train_posts

In [45]:
# flow_post_orig_train = FlowPosterior.make_stage(name='orig_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_train)

# orig_train_pdfs = flow_post_orig_train.get_posterior(orig_train, column='redshift')

Only run if you need output_deg_train_posts ** rerun this cell!! 

In [46]:
# flow_post_deg_train = FlowPosterior.make_stage(name='deg_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              err_samples = 0,
#                                              data = deg_train)



# deg_train_pdfs = flow_post_deg_train.get_posterior(deg_train, column='redshift')

Only run if you need output_orig_test_posts

In [47]:
# flow_post_orig_test = FlowPosterior.make_stage(name='orig_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_test)

# orig_test_pdfs = flow_post_orig_test.get_posterior(orig_test, column='redshift')

Only run if you need output_deg_test_posts

In [48]:
# flow_post_deg_test = FlowPosterior.make_stage(name='deg_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = deg_test)

# deg_test_pdfs = flow_post_deg_test.get_posterior(deg_test, column='redshift')

# Make tables

In [49]:
bands = ['u','g','r','i','z','y']
band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

def colRemapper(dict):
    col_remap = ColumnMapper.make_stage(
    name='col_remapper', 
    columns=dict,
    )
    return col_remap

def tableConverter():
    table_conv = TableConverter.make_stage(
    name='table_conv', 
    output_format='numpyDict',
    )
    return table_conv

In [50]:
col_remap = colRemapper(band_dict_err)
table_conv = tableConverter()

# Inform & Estimate

In [51]:
def informTrainZ():
    inf = TrainZInformer.make_stage(
    name = 'inform_TrainZ',
    model = 'trainz.pkl',
    hdf5_groupname=""
    )
    return inf

def estimateTrainZ(info):
    est = TrainZEstimator.make_stage(
    name = 'estimate_TrainZ',
    model = 'trainz.pkl', 
    hdf5_groupname=""
    )
    return est

In [167]:
help(rail.estimation.algos.cmnn)#Inform_CMNNPDF)

Help on module rail.estimation.algos.cmnn in rail.estimation.algos:

NAME
    rail.estimation.algos.cmnn

DESCRIPTION
    Implementation of the color-matched nearest neighbor (CMNN) algorithm
    See https://ui.adsabs.harvard.edu/abs/2018AJ....155....1G/abstract
    for more details

CLASSES
    rail.estimation.estimator.CatEstimator(rail.core.stage.RailStage)
        CMNNPDF
    rail.estimation.informer.CatInformer(rail.core.stage.RailStage)
        Inform_CMNNPDF
    
    class CMNNPDF(rail.estimation.estimator.CatEstimator)
     |  CMNNPDF(args, comm=None)
     |  
     |  Color Matched Nearest Neighbor Estimator
     |  Note that there are several modifications from the original CMNN, mainly that
     |  the original estimator dropped non-detections from the Mahalnobis distance
     |  calculation. However, there is information in a non-detection, so instead here
     |  I've replaced the non-detections with 1 sigma limit and a magnitude
     |  uncertainty of 1.0 and fixed the deg

In [52]:
def informCMNN():
    inf = Inform_CMNNPDF.make_stage(
    name = 'inform_CMNN',
    model = 'cmnn.pkl',
    hdf5_groupname=""
    # config_options = {'err_bands': ['mag_err_u_lsst', 
    #                                 'mag_err_g_lsst'
    #                                 'mag_err_r_lsst'
    #                                 'mag_err_i_lsst'
    #                                 'mag_err_z_lsst'
    #                                 'mag_err_y_lsst'] }
    )
    return inf

def estimateCMNN(info):
    est = CMNNPDF.make_stage(
    name = 'estimate_CMNN',
    model = 'cmnn.pkl', 
    hdf5_groupname=""
    )
    return est

In [53]:
def informGPz():
    inf = GPzInformer.make_stage(
    name = 'inform_GPz',
    model = 'gpz.pkl',
    hdf5_groupname=""
    # config_options = {'err_bands': ['mag_err_u_lsst', 
    #                                 'mag_err_g_lsst'
    #                                 'mag_err_r_lsst'
    #                                 'mag_err_i_lsst'
    #                                 'mag_err_z_lsst'
    #                                 'mag_err_y_lsst'] }

    )
    return inf

def estimateGPz(info):
    est = GPzEstimator.make_stage(
    name = 'estimate_GPz',
    model = 'gpz.pkl', 
    hdf5_groupname=""
    )
    return est

In [54]:
def informPZFlow():
    inf = PZFlowInformer.make_stage(
    name = 'inform_PZFlow',
    model = 'pzflow.pkl',
    hdf5_groupname=""
    )
    return inf

def estimatePZFlow(info):
    est = PZFlowEstimator.make_stage(
    name = 'estimate_PZFlow',
    model = 'pzflow.pkl', 
    hdf5_groupname=""
    )
    return est

In [55]:
def informFZBoost():
    info = FlexZBoostInformer.make_stage(
    name ='inform_FZBoost', 
    model ='fzboost.pkl', 
    hdf5_groupname='',
    )
    return info

def estimateFZBoost(info):
    est = FlexZBoostEstimator.make_stage(
    name='est_FZBoost', 
    nondetect_val=np.nan,
    model= info,
    hdf5_groupname='',
    aliases=dict(input='test_data', output='fzboost_estim'),
    nzbins = 100 
    )
    return est 

In [56]:
inf_est_dict = {'TrainZ': [informTrainZ, estimateTrainZ],
               'CMNN': [informCMNN, estimateCMNN], 
               'GPz': [informGPz, estimateGPz], 
               'PZFlow': [informPZFlow, estimatePZFlow], 
               'FZBoost': [informFZBoost, estimateFZBoost]}

In [57]:
# 'invz': invRedshift,

spec_dict = {'BOSS': specSelectBOSS, 
             'DEEP2': specSelectDEEP2, 
             'GAMA': specSelectGAMA,
             'HSC': specSelectHSC, 
             'VVDSf02': specSelectVVDSf02, 
             'zCOSMOS': specSelectzCOSMOS } 

inf_est_dict = {'TrainZ': [informTrainZ, estimateTrainZ],
               'CMNN': [informCMNN, estimateCMNN], 
               'GPz': [informGPz, estimateGPz], 
               'PZFlow': [informPZFlow, estimatePZFlow], 
               'FZBoost': [informFZBoost, estimateFZBoost] }

In [58]:
# import ceci 

# pr = ceci.Pipeline.read(path_lst_1[0])#parent_dir+directory+"/invz=0.33672517538070684_lsstErr_pzflow.yml")
# pr.run()

# ## 1) terminal: go to path up to invz_lsstErr_pzflow, then run these 2 lines 
# ## 2)  make list/txt file with list of paths to files made by big F

# ## do 1) 
# ## open virtual env
# ## python 
# ## import ceci 
# ## run the 2 lines of code above 


# ### at the end we can put this into a .py file that we can run at the command line 

# ## %cd ? 

In [59]:
## more config parameters/better config parameters
## have to give path above to estimator model instead of get_handle('model')
## fix truncated parameter printing in help(...)

# Big F's

In [62]:
# # Make sure to change the first argument of testSet
# # testData = testSet(ntest, seed2)

# testData = testSet(100, 39)
# testData.run()

# test_data = DS.read_file("test_Data", TableHandle, "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/output_test_set.pq")

In [63]:
# #lsstErr = lsstError(band_dict, seed3)

# bands = ['u','g','r','i','z','y']
# band_dict = {band: f"mag_{band}_lsst" for band in bands}
# band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

# lsstErr = lsstError(band_dict, 172)
# lsstErr.connect_input(test_data) ## might be wrong; passing in a file not a stage 
# lsstErr.run()

# lsst_Err = DS.read_file("test_Data", TableHandle, "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/output_lsst_error.pq")

In [64]:
## for inverse redshift incompleteness:

pivot_ls = [1.0, 1.4] 

name_ls = ['BOSS', 'DEEP2', 'GAMA', 'HSC', 'VVDSf02', 'zCOSMOS']

## TrainZ

In [77]:
def bigF0(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins, invzparam):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)

    if degrader == invRedshift:
       deg = degrader(invzparam)
    else:
        deg = degrader(ntrain) 

    print('degrader is: '+str(deg))
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infTrainZ = informTrainZ()
    estTrainZ = estimateTrainZ(infTrainZ)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        infTrainZ, 
        estTrainZ]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infTrainZ.connect_input(deg) 
    estTrainZ.connect_input(infTrainZ, inputTag = 'model')
    estTrainZ.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_pzflow.yml" % name)
    pipe.save(outpath)
    return outpath 

### Specs

In [81]:
##run 

path_lst_0 = []
directory_0 = "specSelection_lsstErr_TrainZ"
parent_dir_0 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_0 = os.path.join(parent_dir_0, directory_0)
os.makedirs(path_0, exist_ok=True)

In [95]:
for key in spec_dict:
    path_lst_0.append(bigF0(spec_dict[key], key, path_0, 1000000, 10000, 17, 39, 172, 10, 0))

degrader is: Applying the BOSS selection.
degrader is: Applying the DEEP2 selection.
degrader is: Applying the GAMA selection.
degrader is: Applying the HSC selection.
degrader is: Applying the VVDSf02 selection.
degrader is: Applying the zCOSMOS selection.


In [96]:
out_dir_0 = "outputs"
out_parent_dir_0 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ"
path_outs_0 = os.path.join(out_parent_dir_0, out_dir_0)
os.makedirs(path_outs_0, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs")

ind = 0
for i in path_lst_0:
    if ind <= 5:
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs")
        dir_0 = name_ls[ind]
        parent_0 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs"
        outpath_0 = os.path.join(parent_0, dir_0)
        os.makedirs(outpath_0, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/"+dir_0)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break


# import pandas as pd
# df2 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs/output_specselection_boss.pq")


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/BOSS_lsstErr_pzflow_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/BOSS_lsstErr_pzflow_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   p

In [97]:
import pandas as pd
df_BOSS = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/BOSS/output_specselection_boss.pq")
df_DEEP2 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/DEEP2/output_specselection_deep2.pq")
df_GAMA = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/GAMA/output_specselection_gama.pq")
df_HSC = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/HSC/output_specselection_hsc.pq")
df_VVDSf02 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/VVDSf02/output_specselection_VVDSf02.pq")
df_zCOSMOS = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_TrainZ/outputs/zCOSMOS/output_specselection_zCOSMOS.pq")

print(len(df_BOSS))
print(len(df_DEEP2))
print(len(df_GAMA))
print(len(df_HSC))
print(len(df_VVDSf02))
print(len(df_zCOSMOS))

1133
52105
4567
23902
137241
57704


### invz

In [78]:
path_lst_0_invz = []
directory_0_invz = "invz_lsstErr_TrainZ"
parent_dir_0_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_0_invz = os.path.join(parent_dir_0_invz, directory_0_invz)
os.makedirs(path_0_invz, exist_ok=True)

In [79]:
for i in pivot_ls:
    path_lst_0_invz.append(bigF0(invRedshift, 'invz='+str(i), path_0_invz, 1000, 100, 17, 39, 172, 10, i))

degrader is: <rail.creation.degradation.spectroscopic_degraders.InvRedshiftIncompleteness object at 0x286f23ad0>
Inserting handle into data store.  output_inv_redshift: inprogress_output_inv_redshift.pq, inv_redshift
degrader is: <rail.creation.degradation.spectroscopic_degraders.InvRedshiftIncompleteness object at 0x286f21990>


In [80]:
out_dir_0_invz = "outputs"
out_parent_dir_0_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ"
path_outs_0_invz = os.path.join(out_parent_dir_0_invz, out_dir_0_invz)
os.makedirs(path_outs_0_invz, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/outputs")

ind = 0
for i in path_lst_0_invz:
    if ind < len(pivot_ls):
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/outputs")
        dir_0_invz = str(pivot_ls[ind])
        parent_0_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/outputs"
        outpath_0_invz = os.path.join(parent_0_invz, dir_0_invz)
        os.makedirs(outpath_0_invz, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/outputs/"+dir_0_invz)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/invz=1.0_lsstErr_pzflow_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_TrainZ/invz=1.0_lsstErr_pzflow_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m 

## CMNN

In [178]:
def bigF1(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins, invzparam):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)

    if degrader == invRedshift:
       deg = degrader(invzparam)
    else:
        deg = degrader(ntrain)  
    
    testData = testSet(ntest, seed2)

    remapper = colRemapper(band_dict_err)

    lsstErr = lsstError(band_dict, seed3)
    infCMNN = informCMNN()
    estCMNN = estimateCMNN(infCMNN)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        remapper,
        testData, 
        lsstErr, 
        infCMNN, 
        estCMNN]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    remapper.connect_input(deg)
    infCMNN.connect_input(remapper)

    lsstErr.connect_input(testData)
    remapper.connect_input(lsstErr)
    estCMNN.connect_input(infCMNN, inputTag = 'model')
    estCMNN.connect_input(remapper, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_CMNN.yml" % name)
    pipe.save(outpath)
    return outpath 

### Specs

In [179]:
path_lst_1 = []
directory_1 = "specSelection_lsstErr_CMNN"
parent_dir_1 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_1 = os.path.join(parent_dir_1, directory_1)
os.makedirs(path_1, exist_ok=True)

In [180]:
for key in spec_dict:
    path_lst_1.append(bigF1(spec_dict[key], key, path_1, 1000000, 100, 17, 39, 172, 10, 0))

In [181]:
out_dir_1 = "outputs"
out_parent_dir_1 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN"
path_outs_1 = os.path.join(out_parent_dir_1, out_dir_1)
os.makedirs(path_outs_1, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/outputs")

ind = 0
for i in path_lst_1:
    if ind <= 5:
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/outputs")
        dir_1 = name_ls[ind]
        parent_1 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/outputs"
        outpath_1 = os.path.join(parent_1, dir_1)
        os.makedirs(outpath_1, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/outputs/"+dir_1)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break



Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/BOSS_lsstErr_CMNN_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_CMNN/BOSS_lsstErr_CMNN_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing col_remapper
Command is:
OMP_NUM_THREADS=1   python

### invz

In [119]:
path_lst_1_invz = []
directory = "invz_lsstErr_CMNN"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_1_invz = os.path.join(parent_dir, directory)
os.makedirs(path_1_invz, exist_ok=True)

In [120]:
for i in pivot_ls:
    path_lst_1_invz.append(bigF1(invRedshift, 'invz='+str(i), path_1_invz, 1000000, 100, 17, 39, 172, 10, i))

In [121]:
out_dir_1_invz = "outputs"
out_parent_dir_1_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN"
path_outs_1_invz = os.path.join(out_parent_dir_1_invz, out_dir_1_invz)
os.makedirs(path_outs_1_invz, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/outputs")

ind = 0
for i in path_lst_1_invz:
    if ind < len(pivot_ls):
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/outputs")
        dir_1_invz = str(pivot_ls[ind])
        parent_1_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/outputs"
        outpath_1_invz = os.path.join(parent_1_invz, dir_1_invz)
        os.makedirs(outpath_1_invz, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/outputs/"+dir_1_invz)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/invz=1.0_lsstErr_CMNN_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/invz=1.0_lsstErr_CMNN_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rai


*************************************************
Error running pipeline stage inform_CMNN.

Standard output and error streams in ./inform_CMNN.out
*************************************************


Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_CMNN/invz=1.4_lsstErr_CMNN_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out



KeyboardInterrupt: 

## GPz

In [104]:
def bigF2(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins, invzparam):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)

    if degrader == invRedshift:
       deg = degrader(invzparam)
    else:
        deg = degrader(ntrain)  
    
    testData = testSet(ntest, seed2)

    remapper = colRemapper(band_dict_err)

    lsstErr = lsstError(band_dict, seed3)
    infGPz = informGPz()
    estGPz = estimateGPz(infGPz)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        remapper,
        testData, 
        lsstErr,  
        infGPz, 
        estGPz]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    remapper.connect_inpus(deg)
    infGPz.connect_input(remapper) 

    lsstErr.connect_input(testData)
    remapper.connect_input(lsstErr) 
    estGPz.connect_input(infGPz, inputTag = 'model')
    estGPz.connect_input(remapper, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_GPz.yml" % name)
    pipe.save(outpath)
    return outpath 

### Specs

In [105]:
path_lst_2 = []
directory = "specSelection_lsstErr_GPz"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_2 = os.path.join(parent_dir, directory)
os.makedirs(path_2, exist_ok=True)

In [117]:
for key in spec_dict:
    path_lst_2.append(bigF2(spec_dict[key], key, path_2, 1000000, 100, 17, 39, 172, 10, 0))

In [118]:
out_dir_2 = "outputs"
out_parent_dir_2 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz"
path_outs_2 = os.path.join(out_parent_dir_2, out_dir_2)
os.makedirs(path_outs_2, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/outputs")

ind = 0
for i in path_lst_2:
    if ind <= 5:
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/outputs")
        dir_2 = name_ls[ind]
        parent_2 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/outputs"
        outpath_2 = os.path.join(parent_2, dir_2)
        os.makedirs(outpath_2, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/outputs/"+dir_2)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break



Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/BOSS_lsstErr_GPz_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/BOSS_lsstErr_GPz_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ce


*************************************************
Error running pipeline stage inform_GPz.

Standard output and error streams in ./inform_GPz.out
*************************************************


Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/DEEP2_lsstErr_GPz_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=train_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/DEEP2_lsstErr_GPz_config.yml   --output=./output_train_set.pq 
Output writing to ./train_set.out

Job train_set has completed successfully!

Executing specsel


*************************************************
Error running pipeline stage inform_GPz.

Standard output and error streams in ./inform_GPz.out
*************************************************


Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/GAMA_lsstErr_GPz_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=train_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_GPz/GAMA_lsstErr_GPz_config.yml   --output=./output_train_set.pq 
Output writing to ./train_set.out



KeyboardInterrupt: 

### invz

In [None]:
path_lst_2_invz = []
directory = "invz_lsstErr_GPz"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_2_invz = os.path.join(parent_dir, directory)
os.makedirs(path_2_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_2_invz.append(bigF2(invRedshift, 'invz='+str(i), path_1_invz, 1000000, 100, 17, 39, 172, 10, i))

In [None]:
out_dir_2_invz = "outputs"
out_parent_dir_2_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_GPz"
path_outs_2_invz = os.path.join(out_parent_dir_2_invz, out_dir_2_invz)
os.makedirs(path_outs_2_invz, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_GPz/outputs")

ind = 0
for i in path_lst_2_invz:
    if ind < len(pivot_ls):
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_GPz/outputs")
        dir_2_invz = str(pivot_ls[ind])
        parent_2_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_GPz/outputs"
        outpath_2_invz = os.path.join(parent_2_invz, dir_2_invz)
        os.makedirs(outpath_2_invz, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_GPz/outputs/"+dir_2_invz)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break

## PZFlow

In [None]:
def bigF3(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins, invzparam):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)

    if degrader == invRedshift:
       deg = degrader(invzparam)
    else:
        deg = degrader(ntrain)  
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infPZFlow = informPZFlow()
    estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        infPZFlow, 
        estPZFlow]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infPZFlow.connect_input(deg) 
    estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_PZFlow.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
# help(rail.creation.degradation.spectroscopic_selections)

### Specs

In [None]:
##run 

path_lst_3 = []
directory = "specSelection_lsstErr_PZFlow"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_3 = os.path.join(parent_dir, directory)
os.makedirs(path_3, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_3.append(bigF3(spec_dict[key], key, path_3, 1000000, 100, 17, 39, 172, 10, 0))

In [None]:
out_dir_3 = "outputs"
out_parent_dir_3 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_PZFlow"
path_outs_3 = os.path.join(out_parent_dir_3, out_dir_3)
os.makedirs(path_outs_3, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_PZFlow/outputs")

ind = 0
for i in path_lst_3:
    if ind <= 5:
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_PZFlow/outputs")
        dir_3 = name_ls[ind]
        parent_3 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_PZFlow/outputs"
        outpath_3 = os.path.join(parent_3, dir_3)
        os.makedirs(outpath_3, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_PZFlow/outputs/"+dir_3)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break


### invz

In [None]:
path_lst_3_invz = []
directory = "invz_lsstErr_PZFlow"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_3_invz = os.path.join(parent_dir, directory)
os.makedirs(path_3_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_3_invz.append(bigF3(invRedshift, 'invz='+str(i), path_3_invz, 1000000, 100, 17, 39, 172, 10, i))

In [None]:
out_dir_3_invz = "outputs"
out_parent_dir_3_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_PZFlow"
path_outs_3_invz = os.path.join(out_parent_dir_3_invz, out_dir_3_invz)
os.makedirs(path_outs_3_invz, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_PZFlow/outputs")

ind = 0
for i in path_lst_3_invz:
    if ind < len(pivot_ls):
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_PZFlow/outputs")
        dir_3_invz = str(pivot_ls[ind])
        parent_3_invz = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_PZFlow/outputs"
        outpath_3_invz = os.path.join(parent_3_invz, dir_3_invz)
        os.makedirs(outpath_3_invz, exist_ok=True)
        os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/invz_lsstErr_PZFlow/outputs/"+dir_3_invz)
        pr = ceci.Pipeline.read(i)
        pr.run()
        ind += 1
    else: 
        break

## FlexZBoost

In [None]:
def bigF4(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins, invzparam):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)

    if degrader == invRedshift:
       deg = degrader(invzparam)
    else:
        deg = degrader(ntrain)  
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infFZBoost = informFZBoost()
    estFZBoost = estimateFZBoost(infFZBoost)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        infFZBoost, 
        estFZBoost]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infFZBoost.connect_input(deg) 
    estFZBoost.connect_input(infFZBoost, inputTag = 'model')
    estFZBoost.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_FZBoost.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
path_lst_4 = []
directory = "specSelection_lsstErr_FZBoost"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_4 = os.path.join(parent_dir, directory)
os.makedirs(path_4, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_4.append(bigF4(spec_dict[key], key, path_4, 1000, 100, 17, 39, 172, 10, 0))

Inserting handle into data store.  model_inform_FZBoost: inprogress_fzboost.pkl, inform_FZBoost


In [None]:
path_lst_4_invz = []
directory = "invz_lsstErr_FZBoost"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_4_invz = os.path.join(parent_dir, directory)
os.makedirs(path_4_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_4_invz.append(bigF4(invRedshift, 'invz='+str(i), path_1_invz, 1000, 100, 17, 39, 172, 10, i))

## Running things

In [None]:
help(os.makedirs)


Help on function makedirs in module os:

makedirs(name, mode=511, exist_ok=False)
    makedirs(name [, mode=0o777][, exist_ok=False])
    
    Super-mkdir; create a leaf directory and all intermediate ones.  Works like
    mkdir, except that any intermediate path segment (not just the rightmost)
    will be created if it does not exist. If the target directory already
    exists, raise an OSError if exist_ok is False. Otherwise no exception is
    raised.  This is recursive.



# Test

## Test #1

In [70]:
def bigF_TEST1(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infPZFlow = informPZFlow()
    estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        infPZFlow, 
        estPZFlow]  

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infPZFlow.connect_input(deg) 
    estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_TEST1.yml" % name)
    pipe.save(outpath)
    return outpath 

In [71]:

path_lst_TEST1 = []
directory = "specSelection_TEST1"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_TEST1 = os.path.join(parent_dir, directory)
os.makedirs(path_TEST1, exist_ok=True)


for key in spec_dict:
    path_lst_TEST1.append(bigF_TEST1(spec_dict[key], key, path_TEST1, 1000, 1000, 104, 12, 327, 10))

Inserting handle into data store.  model_inform_PZFlow: inprogress_pzflow.pkl, inform_PZFlow


In [72]:
def bigF_TEST2(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infPZFlow = informPZFlow()
    estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        infPZFlow, 
        estPZFlow]


    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infPZFlow.connect_input(deg) 
    estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 
    #estPZFlow.connect_input(lsst_Err, inputTag = 'input') ## might be wrong, passing in file instead of stage, need to debug w alex

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_TEST2.yml" % name)
    pipe.save(outpath)
    return outpath 

In [73]:

path_lst_TEST2 = []
directory = "specSelection_TEST2"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_TEST2 = os.path.join(parent_dir, directory)
os.makedirs(path_TEST2, exist_ok=True)


for key in spec_dict:
    path_lst_TEST2.append(bigF_TEST2(spec_dict[key], key, path_TEST2, 10000, 10000, 104, 12, 327, 10))

In [None]:
#________________________#

In [74]:
out_dir1 = "outputs"
out_parent_dir1 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1"
path_outs1 = os.path.join(out_parent_dir1, out_dir1)
os.makedirs(path_outs1, exist_ok=True)


In [75]:
print(path_TEST1)

/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1


In [76]:

os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/outputs")

pr = ceci.Pipeline.read(path_TEST1+"/BOSS_TEST1.yml")
pr.run()


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/BOSS_TEST1_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/BOSS_TEST1_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines

KeyboardInterrupt: 

In [None]:
import pandas as pd
df1train = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/outputs/output_train_set.pq")

df1train

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
0,0.453446,26.440466,25.532093,24.414080,24.132780,23.951639,23.614315
1,1.614395,26.199156,25.898083,25.544622,25.158127,24.919113,24.583817
2,1.532331,27.018871,26.491842,26.029282,25.345669,24.971943,24.457464
3,0.695185,25.286282,24.142662,22.950605,21.883846,21.517628,21.255428
4,0.697118,27.651823,27.095114,26.331165,25.550674,25.334547,25.198923
...,...,...,...,...,...,...,...
995,1.731998,29.733955,29.171751,28.277988,27.197586,26.220169,25.771671
996,0.822214,26.690687,26.424179,25.911663,25.186234,24.857292,24.752474
997,0.904075,27.091158,26.829113,26.207962,25.369640,24.944141,24.778233
998,1.430669,28.732162,27.865288,27.377495,26.528309,26.075668,25.367645


In [None]:
import pandas as pd
df1 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/outputs/output_specselection_boss.pq")

df1

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
291,0.332584,22.746304,20.570354,19.191704,18.38974,18.079622,17.820402


In [None]:
import pandas as pd
df2train = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs/output_train_set.pq")

df2train

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
0,1.342823,25.937267,25.592773,25.302280,24.184923,23.383533,22.579002
1,1.191652,28.780466,28.075939,27.267124,26.668451,25.890570,25.420044
2,1.751760,26.832052,26.458172,26.087759,25.595860,24.988527,24.666149
3,0.620976,22.530554,22.241764,21.103287,20.136244,19.746765,19.453854
4,0.793243,27.564707,26.932222,26.285490,25.502998,25.277523,25.163303
...,...,...,...,...,...,...,...
9995,1.981384,27.963957,27.702507,27.336897,27.156370,26.714178,26.362583
9996,0.744960,27.017117,26.490191,25.695547,24.839457,24.637497,24.520649
9997,1.101568,27.205582,26.924608,26.450403,26.079315,25.530327,25.305614
9998,0.928054,28.203192,27.457575,26.576130,25.856819,25.445122,25.309990


In [None]:
out_dir2 = "outputs"
out_parent_dir2 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2"
path_outs2 = os.path.join(out_parent_dir2, out_dir2)
os.makedirs(path_outs2, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs")

pr = ceci.Pipeline.read(path_TEST2+"/BOSS_TEST2.yml")
pr.run()

import pandas as pd
df2 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs/output_specselection_boss.pq")

df2


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/BOSS_TEST2_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/BOSS_TEST2_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
1286,0.591147,22.445744,20.724831,19.164146,18.284557,17.949516,17.694904
4494,0.498089,25.742825,22.528442,20.879122,19.909256,19.45573,19.245495
4836,0.441687,23.170385,20.473461,18.878864,18.125809,17.771408,17.569096
5332,0.299266,24.022921,21.591301,20.290333,19.444916,19.08745,18.843006
6753,0.53585,25.292589,22.514803,20.829006,19.857273,19.471964,19.235981
7243,0.508251,24.524036,22.378204,20.889675,19.961979,19.593729,19.318655
9625,0.569059,23.863209,22.356947,20.861506,19.825939,19.414465,19.127625
9705,0.568569,23.138166,21.716282,20.204254,19.35837,19.036844,18.774906


## Test #2

In [190]:
def FTEST(pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    #deg = lsstError(band_dict, seed3) 
    
    testData = testSet(ntest, seed2)

    # lsstErr = lsstError(band_dict, seed3)
    infFZBoost = informFZBoost()
    estFZBoost = estimateFZBoost(infFZBoost)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        #deg, 
        testData, 
        #lsstErr,  
        infFZBoost, 
        estFZBoost]  

    for stage in stages:
        pipe.add_stage(stage)

    # deg.connect_input(trainData)
    #lsstErr.connect_input(testData)

    infFZBoost.connect_input(trainData) 
    estFZBoost.connect_input(infFZBoost, inputTag = 'model')
    estFZBoost.connect_input(testData, inputTag = 'input') ## trucated out of docs :( 

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_TEST.yml" % "no_degrader")
    pipe.save(outpath)
    return outpath 

In [191]:
path_lst_TEST3 = []
directory_TEST3 = "specSelection_TEST3"
parent_dir_TEST3 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_TEST3 = os.path.join(parent_dir_TEST3, directory_TEST3)
os.makedirs(path_TEST3, exist_ok=True)


# for key in spec_dict:
#     path_lst_TEST3.append(FTEST(spec_dict[key], key, path_TEST1, 1000, 1000, 104, 12, 327, 10))

path_lst_TEST3.append(FTEST(path_TEST3, 10000, 100, 104, 12, 327, 10))

In [192]:
out_dir_TEST = "outputs_FZBoost"
out_parent_dir_TEST = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST3"
path_outs_TEST = os.path.join(out_parent_dir_TEST, out_dir_TEST)
os.makedirs(path_outs_TEST, exist_ok=True)

In [193]:

os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST3/outputs_FZBoost")

pr = ceci.Pipeline.read(path_TEST3+"/no_degrader_TEST.yml")
pr.run()

ConstructorError: could not determine a constructor for the tag 'tag:yaml.org,2002:python/object:rail.estimation.algos.flexzboost.FlexZBoostInformer'
  in "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST3/no_degrader_TEST_config.yml", line 32, column 10

In [163]:
import pandas as pd
test_df = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST3/outputs_GPz/output_lsst_error.pq")

test_df

Unnamed: 0,redshift,mag_g_lsst,mag_g_lsst_err,mag_i_lsst,mag_i_lsst_err,mag_r_lsst,mag_r_lsst_err,mag_u_lsst,mag_u_lsst_err,mag_y_lsst,mag_y_lsst_err,mag_z_lsst,mag_z_lsst_err
0,0.965879,25.879404,0.057823,24.656401,0.028527,25.367927,0.036107,26.197949,0.226304,23.746738,0.051104,24.103603,0.030794
1,0.112687,26.847807,0.135247,26.121641,0.104139,26.529480,0.100909,27.294345,0.533706,25.402500,0.215698,26.351736,0.219303
2,0.704666,27.559750,0.246778,25.592196,0.065307,26.581457,0.105604,28.976660,1.501604,25.789174,0.296218,25.392386,0.096273
3,0.793990,26.650420,0.113975,25.222062,0.047021,25.966401,0.061397,26.901584,0.397699,24.729052,0.121445,25.014646,0.069001
4,0.546315,24.110855,0.012791,22.613920,0.006719,23.241883,0.007272,24.459181,0.050363,22.060726,0.012195,22.297977,0.007802
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1.339472,27.394182,0.215147,25.858046,0.082613,26.823286,0.130335,,,24.499371,0.099389,25.131967,0.076545
9996,1.262786,25.282655,0.034108,23.820874,0.014115,24.422184,0.016017,25.878391,0.173117,22.580313,0.018460,23.134816,0.013621
9997,0.511737,24.683479,0.020304,22.474778,0.006385,23.168497,0.007034,27.234214,0.510776,21.834788,0.010365,22.022427,0.006859
9998,0.312853,24.656233,0.019842,23.954590,0.015711,24.076688,0.012212,25.410581,0.115890,23.788194,0.053020,23.778379,0.023198
