# Imports 

In [3]:
## Essential Imports: 
import os
import numpy as np
import qp
import tables_io
from pathlib import Path 
from pzflow.examples import get_galaxy_data
import ceci

## RAIL-Specific Imports: 
import rail

# old : from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness


from rail.creation.degradation.lsst_error_model import LSSTErrorModel
from rail.creation.degradation.spectroscopic_degraders import InvRedshiftIncompleteness

import rail.creation 
import rail.creation.engines
from rail.creation.engines.flowEngine import FlowModeler, FlowCreator, FlowPosterior
from rail.core.data import TableHandle
from rail.core.stage import RailStage
from rail.core.utilStages import ColumnMapper, TableConverter

# old : from rail.estimation.algos.flexzboost import Inform_FZBoost, FZBoost

from rail.estimation.algos.train_z import TrainZEstimator, TrainZInformer
from rail.estimation.algos.cmnn import Inform_CMNNPDF, CMNNPDF
from rail.estimation.algos.gpz import GPzInformer, GPzEstimator 
from rail.estimation.algos.pzflow_nf import PZFlowInformer, PZFlowEstimator 
from rail.estimation.algos.flexzboost import FlexZBoostInformer, FlexZBoostEstimator  


from rail.evaluation.evaluator import Evaluator


## Data Storage: 
DS = RailStage.data_store
DS.__class__.allow_overwrite = True


### CMNN, PZFlow, FlexZBoost, GPZ, trainz for control

In [4]:
help(rail.estimation.algos)

Help on package rail.estimation.algos in rail.estimation:

NAME
    rail.estimation.algos

PACKAGE CONTENTS
    _gpz_util
    bpz_lite
    cmnn
    delightPZ
    delight_version (package)
    equal_count
    flexzboost
    gpz
    naive_stack
    point_est_hist
    pzflow_nf
    random_gauss
    train_z
    uniform_binning
    var_inf

FILE
    (built-in)




In [5]:
# help(rail.creation.engines)

In [6]:
#from rail.stages import *
#rail.stages.import_and_attach_all()
#for val in RailStage.pipeline_stages.values():
#    print(val[0])

# Model

In [7]:
def makeModel():
    #path to access the data 
    DATA_DIR = Path().resolve() / "data"
    DATA_DIR.mkdir(exist_ok=True)

    catalog_file = DATA_DIR / "base_catalog.pq"

    bands = ['u','g','r','i','z','y']
    band_dict = {band:f'mag_{band}_lsst' for band in bands}
    
    #array of galaxies w/ 7 attributes for each: redshift & ugrizy
    catalog = get_galaxy_data().rename(band_dict, axis=1) 

    #turns array into a table 
    tables_io.write(catalog, str(catalog_file.with_suffix("")), catalog_file.suffix[1:])

    catalog_file = str(catalog_file)
    flow_file = str(DATA_DIR / "trained_flow.pkl")

    print(flow_file)

    #we set up the stage 
    flow_modeler_params = {
        "name": "flow_modeler",
        "input": catalog_file,
        "model": flow_file,
        "seed": 0,
        "phys_cols": {"redshift": [0, 3]},
        "phot_cols": {
            "mag_u_lsst": [17, 35],
            "mag_g_lsst": [16, 32],
            "mag_r_lsst": [15, 30],
            "mag_i_lsst": [15, 30],
            "mag_z_lsst": [14, 29],
            "mag_y_lsst": [14, 28],
        },
        "calc_colors": {"ref_column_name": "mag_i_lsst"},
    }
    flow_modeler = FlowModeler.make_stage(**flow_modeler_params)
    # flow_modeler.fit_model()
    return flow_modeler, flow_file ##.get_handle("model")

In [8]:
modelData, flow_file = makeModel() 

/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl


# Make Training Set and Test Set 

In [9]:
def trainSet(ntrain, seed):
    data = FlowCreator.make_stage(
            name = 'train_set',
            model = flow_file,
            n_samples = ntrain,
            seed = seed 
    )
    return data 

In [10]:
def testSet(ntest, seed):
    data = FlowCreator.make_stage(
            name = 'test_set',
            model = flow_file,
            n_samples = ntest,
            seed = seed 
    )
    return data #.sample(ntest, seed)

# Degraders

## Inverse Redshift Incompleteness

In [11]:
def invRedshift(pivot = 1.0):
    assert type(pivot) == float 
    degr = InvRedshiftIncompleteness.make_stage(
        name = 'inv_redshift',
        pivot_redshift = pivot
    )
    return degr 

pivot_ls = [1.0, 1.4]

In [12]:
# ## Choose pivot z's for inverse redshift incompleteness 

# ## seed1 and ndata should be the same as  seed1 and ntrain used to call bigF!! 
# ## Otherwise this might not be representative of the real data 

# def choosePivots(seed1, ndata):
#     nums = trainSet(ndata, seed1)
#     data = nums.sample(ndata, seed1)
#     data_pq = col_remap(data)
#     data_table = table_conv(data_pq)
#     table = tables_io.convertObj(data_table.data, tables_io.types.PD_DATAFRAME)
#     return np.asarray(table['redshift'])

# percentiles = np.arange(10, 100, 10)
# pivots = [] 

# for i in percentiles:
#     pivot = np.percentile(choosePivots(17, 100000), i) 
#     pivots.append(pivot)

In [13]:
# print(pivots)

## LSST Error 

In [14]:
bands = ['u','g','r','i','z','y']
band_dict = {band:f'mag_{band}_lsst' for band in bands}

def lsstError(dict, seed): #tvis = 1, nYrObs = 1, airmass = 1, extendedSource = 1, sigmaSys = 1, magLim = 1, ndFlag = 1, A_min = 1, A_max = 1):
    deg = LSSTErrorModel.make_stage(
        name='lsst_error',
        renameDict= dict, 
        ndFlag=np.nan,
        seed=seed,
    )
    return deg 

## Quantity Cuts 

In [15]:
## write a dictionary with the different bands and magnitudes you want

def quantCuts(band, mag):
    quantity_cut = QuantityCut.make_stage(
        name='quantity_cut',    
        cuts={'mag_i_lsst': 25.0},
    )

In [16]:
qcuts_dict = {'mag_u_lsst': [...], 
              'mag_g_lsst': [...], 
              'mag_r_lsst': [...], 
              'mag_i_lsst': [...], 
              'mag_z_lsst': [...], 
              'mag_y_lsst': [...] }

## Survey-Based Degraders

In [17]:
from rail.creation.degradation.spectroscopic_selections import *

def specSelectBOSS(ntrain):
    degr = SpecSelection_BOSS.make_stage(
        name = 'specselection_boss',
        N_tot = ntrain
    )
    return degr 

def specSelectDEEP2(ntrain):
    degr = SpecSelection_DEEP2.make_stage(
        name = 'specselection_deep2',
        N_tot = ntrain
    )
    return degr 

def specSelectGAMA(ntrain):
    degr = SpecSelection_GAMA.make_stage(
        name = 'specselection_gama',
        N_tot = ntrain
    )
    return degr 

def specSelectHSC(ntrain):
    degr = SpecSelection_HSC.make_stage(
        name = 'specselection_HSC',
        N_tot = ntrain
    )
    return degr 

def specSelectVVDSf02(ntrain):
    degr = SpecSelection_VVDSf02.make_stage(
        name = 'specselection_VVDSf02',
        N_tot = ntrain
    )
    return degr 

def specSelectzCOSMOS(ntrain):
    degr = SpecSelection_zCOSMOS.make_stage(
        name = 'specselection_zCOSMOS',
        N_tot = ntrain
    )
    return degr 

In [18]:
spec_dict = {'BOSS': specSelectBOSS, 
             'DEEP2': specSelectDEEP2, 
             'GAMA': specSelectGAMA,
             'HSC': specSelectHSC, 
             'VVDSf02': specSelectVVDSf02, 
             'zCOSMOS': specSelectzCOSMOS } 

## Posts 

In [19]:
def getPosts(data, model, grid):
    posts = FlowPosterior.make_stage(
        name='get_posts'+str(data), 
        column='redshift',
        grid = grid,
        model = model,
        data = data
    )
    return posts #posts.get_posterior(data, column = 'redshift')

In [20]:
def makeGrid(zmin, zmax, nbins):
    import numpy as np
    grid = np.linspace(zmin, zmax, nbins + 1)
    return grid 

In [21]:
grid = makeGrid(0, 2.5, 100)

Only run if you need output_orig_train_posts

In [22]:
# flow_post_orig_train = FlowPosterior.make_stage(name='orig_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_train)

# orig_train_pdfs = flow_post_orig_train.get_posterior(orig_train, column='redshift')

Only run if you need output_deg_train_posts ** rerun this cell!! 

In [23]:
# flow_post_deg_train = FlowPosterior.make_stage(name='deg_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              err_samples = 0,
#                                              data = deg_train)



# deg_train_pdfs = flow_post_deg_train.get_posterior(deg_train, column='redshift')

Only run if you need output_orig_test_posts

In [24]:
# flow_post_orig_test = FlowPosterior.make_stage(name='orig_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_test)

# orig_test_pdfs = flow_post_orig_test.get_posterior(orig_test, column='redshift')

Only run if you need output_deg_test_posts

In [25]:
# flow_post_deg_test = FlowPosterior.make_stage(name='deg_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = deg_test)

# deg_test_pdfs = flow_post_deg_test.get_posterior(deg_test, column='redshift')

# Make tables

In [26]:
bands = ['u','g','r','i','z','y']
band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

def colRemapper(dict):
    col_remap = ColumnMapper.make_stage(
    name='col_remapper', 
    columns=dict,
    )
    return col_remap

def tableConverter():
    table_conv = TableConverter.make_stage(
    name='table_conv', 
    output_format='numpyDict',
    )
    return table_conv

In [27]:
col_remap = colRemapper(band_dict_err)
table_conv = tableConverter()

# Inform & Estimate

In [28]:
def informTrainZ():
    inf = TrainZInformer.make_stage(
    name = 'inform_TrainZ',
    model = 'trainz.pkl',
    hdf5_groupname=""
    )
    return inf

def estimateTrainZ(info):
    est = TrainZEstimator.make_stage(
    name = 'estimate_TrainZ',
    model = 'trainz.pkl', 
    hdf5_groupname=""
    )
    return est

In [29]:
def informCMNN():
    inf = Inform_CMNNPDF.make_stage(
    name = 'inform_CMNN',
    model = 'cmnn.pkl',
    hdf5_groupname=""
    )
    return inf

def estimateCMNN(info):
    est = CMNNPDF.make_stage(
    name = 'estimate_CMNN',
    model = 'cmnn.pkl', 
    hdf5_groupname=""
    )
    return est

In [30]:
def informGPz():
    inf = GPzInformer.make_stage(
    name = 'inform_GPz',
    model = 'gpz.pkl',
    hdf5_groupname=""
    )
    return inf

def estimateGPz(info):
    est = GPzEstimator.make_stage(
    name = 'estimate_GPz',
    model = 'gpz.pkl', 
    hdf5_groupname=""
    )
    return est

In [31]:
def informPZFlow():
    inf = PZFlowInformer.make_stage(
    name = 'inform_PZFlow',
    model = 'pzflow.pkl',
    hdf5_groupname=""
    )
    return inf

def estimatePZFlow(info):
    est = PZFlowEstimator.make_stage(
    name = 'estimate_PZFlow',
    model = 'pzflow.pkl', 
    hdf5_groupname=""
    )
    return est

In [32]:
def informFZBoost():
    info = FlexZBoostInformer.make_stage(
    name ='inform_FZBoost', 
    model ='fzboost.pkl', 
    hdf5_groupname='',
    )
    return info

def estimateFZBoost(info):
    est = FlexZBoostEstimator.make_stage(
    name='est_FZBoost', 
    nondetect_val=np.nan,
    model= info,
    hdf5_groupname='',
    aliases=dict(input='test_data', output='fzboost_estim'),
    nzbins = 100 
    )
    return est 

In [33]:
inf_est_dict = {'TrainZ': [informTrainZ, estimateTrainZ],
               'CMNN': [informCMNN, estimateCMNN], 
               'GPz': [informGPz, estimateGPz], 
               'PZFlow': [informPZFlow, estimatePZFlow], 
               'FZBoost': [informFZBoost, estimateFZBoost]}

In [34]:
# 'invz': invRedshift,

spec_dict = {'BOSS': specSelectBOSS, 
             'DEEP2': specSelectDEEP2, 
             'GAMA': specSelectGAMA,
             'HSC': specSelectHSC, 
             'VVDSf02': specSelectVVDSf02, 
             'zCOSMOS': specSelectzCOSMOS } 

inf_est_dict = {'TrainZ': [informTrainZ, estimateTrainZ],
               'CMNN': [informCMNN, estimateCMNN], 
               'GPz': [informGPz, estimateGPz], 
               'PZFlow': [informPZFlow, estimatePZFlow], 
               'FZBoost': [informFZBoost, estimateFZBoost] }

In [36]:
# import ceci 

# pr = ceci.Pipeline.read(path_lst_1[0])#parent_dir+directory+"/invz=0.33672517538070684_lsstErr_pzflow.yml")
# pr.run()

# ## 1) terminal: go to path up to invz_lsstErr_pzflow, then run these 2 lines 
# ## 2)  make list/txt file with list of paths to files made by big F

# ## do 1) 
# ## open virtual env
# ## python 
# ## import ceci 
# ## run the 2 lines of code above 


# ### at the end we can put this into a .py file that we can run at the command line 

# ## %cd ? 

In [37]:
## more config parameters/better config parameters
## have to give path above to estimator model instead of get_handle('model')
## fix truncated parameter printing in help(...)

# Big F's

In [38]:
# Make sure to change the first argument of testSet
# testData = testSet(ntest, seed2)

testData = testSet(100, 39)
testData.run()

test_data = DS.read_file("test_Data", TableHandle, "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/output_test_set.pq")

Inserting handle into data store.  model: /Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl, test_set
Inserting handle into data store.  output_test_set: inprogress_output_test_set.pq, test_set


In [39]:
#lsstErr = lsstError(band_dict, seed3)

bands = ['u','g','r','i','z','y']
band_dict = {band: f"mag_{band}_lsst" for band in bands}
band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

lsstErr = lsstError(band_dict, 172)
lsstErr.connect_input(test_data) ## might be wrong; passing in a file not a stage 
lsstErr.run()

lsst_Err = DS.read_file("test_Data", TableHandle, "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/output_lsst_error.pq")

Inserting handle into data store.  input: None, lsst_error


KeyError: 'Unknown file format , supported types are{list(FILE_FORMAT_SUFFIXS.keys())}'

In [None]:
## for inverse redshift incompleteness:

pivot_ls = [1.0, 1.4] 

## TrainZ

In [None]:
def bigF0(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    # testData = testSet(ntest, seed2)

    #lsstErr = lsstError(band_dict, seed3)
    infTrainZ = informTrainZ()
    estTrainZ = estimateTrainZ(infTrainZ)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        #testData, 
        #lsstErr,  
        infTrainZ, 
        estTrainZ]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infTrainZ.connect_input(deg) 
    estTrainZ.connect_input(infTrainZ, inputTag = 'model')
    estTrainZ.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_pzflow.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
##run 

path_lst_0 = []
directory = "specSelection_lsstErr_TrainZ"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_0 = os.path.join(parent_dir, directory)
os.makedirs(path_0, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_0.append(bigF0(spec_dict[key], key, path_0, 1000, 100, 17, 39, 172, 10))

In [None]:
path_lst_0_invz = []
directory = "invz_lsstErr_TrainZ"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_0_invz = os.path.join(parent_dir, directory)
os.makedirs(path_0_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_0_invz.append(bigF0(invRedshift(i), 'invz='+str(i), path_0_invz, 1000, 100, 17, 39, 172, 10))

## CMNN

In [None]:
def bigF1(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    # testData = testSet(ntest, seed2)

    #lsstErr = lsstError(band_dict, seed3)
    infCMNN = informCMNN()
    estCMNN = estimateCMNN(infCMNN)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        #testData, 
        #lsstErr,  
        infCMNN, 
        estCMNN]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infCMNN.connect_input(deg) 
    estCMNN.connect_input(infCMNN, inputTag = 'model')
    estCMNN.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_CMNN.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
path_lst_1 = []
directory = "specSelection_lsstErr_CMNN"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_1 = os.path.join(parent_dir, directory)
os.makedirs(path_1, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_1.append(bigF1(spec_dict[key], key, path_1, 1000, 100, 17, 39, 172, 10))

In [None]:
path_lst_1_invz = []
directory = "invz_lsstErr_CMNN"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_1_invz = os.path.join(parent_dir, directory)
os.makedirs(path_1_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_1_invz.append(bigF1(invRedshift(i), 'invz='+str(i), path_1_invz, 1000, 100, 17, 39, 172, 10))

## GPz

In [None]:
def bigF2(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    # testData = testSet(ntest, seed2)

    #lsstErr = lsstError(band_dict, seed3)
    infGPz = informGPz()
    estGPz = estimateGPz(infGPz)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        #testData, 
        #lsstErr,  
        infGPz, 
        estGPz]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infGPz.connect_input(deg) 
    estGPz.connect_input(infGPz, inputTag = 'model')
    estGPz.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_GPz.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
path_lst_2 = []
directory = "specSelection_lsstErr_GPz"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_2 = os.path.join(parent_dir, directory)
os.makedirs(path_2, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_1.append(bigF2(spec_dict[key], key, path_2, 1000, 100, 17, 39, 172, 10))

In [None]:
path_lst_2_invz = []
directory = "invz_lsstErr_GPz"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_2_invz = os.path.join(parent_dir, directory)
os.makedirs(path_2_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_2_invz.append(bigF2(invRedshift(i), 'invz='+str(i), path_1_invz, 1000, 100, 17, 39, 172, 10))

## PZFlow

In [None]:
def bigF3(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    # testData = testSet(ntest, seed2)

    #lsstErr = lsstError(band_dict, seed3)
    infPZFlow = informPZFlow()
    estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        #testData, 
        #lsstErr,  
        infPZFlow, 
        estPZFlow]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    # lsstErr.connect_input(testData)

    infPZFlow.connect_input(deg) 
    estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    # estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 
    estPZFlow.connect_input(lsst_Err, inputTag = 'input') ## might be wrong, passing in file instead of stage, need to debug w alex

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_PZFlow.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
# help(rail.creation.degradation.spectroscopic_selections)

In [None]:
##run 

path_lst_3 = []
directory = "specSelection_lsstErr_PZFlow"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_3 = os.path.join(parent_dir, directory)
os.makedirs(path_3, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_3.append(bigF3(spec_dict[key], key, path_3, 1000, 100, 17, 39, 172, 10))

ValueError: 
Some required inputs to the pipeline could not be found,
(or possibly your pipeline is cyclic):

Stage lsst_error is missing input(s): output_test_set
Stage estimate_PZFlow is missing input(s): output_lsst_error


In [None]:
path_lst_3_invz = []
directory = "invz_lsstErr_PZFlow"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_3_invz = os.path.join(parent_dir, directory)
os.makedirs(path_3_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_3_invz.append(bigF3(invRedshift(i), 'invz='+str(i), path_1_invz, 1000, 100, 17, 39, 172, 10))

## FlexZBoost

In [None]:
def bigF4(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    # testData = testSet(ntest, seed2)

    #lsstErr = lsstError(band_dict, seed3)
    infFZBoost = informFZBoost()
    estFZBoost = estimateFZBoost(infFZBoost)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        #testData, 
        #lsstErr,  
        infFZBoost, 
        estFZBoost]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    infFZBoost.connect_input(deg) 
    estFZBoost.connect_input(infFZBoost, inputTag = 'model')
    estFZBoost.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :(

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_lsstErr_FZBoost.yml" % name)
    pipe.save(outpath)
    return outpath 

In [None]:
path_lst_4 = []
directory = "specSelection_lsstErr_FZBoost"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_4 = os.path.join(parent_dir, directory)
os.makedirs(path_4, exist_ok=True)

In [None]:
for key in spec_dict:
    path_lst_4.append(bigF4(spec_dict[key], key, path_4, 1000, 100, 17, 39, 172, 10))

In [None]:
path_lst_4_invz = []
directory = "invz_lsstErr_FZBoost"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_4_invz = os.path.join(parent_dir, directory)
os.makedirs(path_4_invz, exist_ok=True)

In [None]:
for i in pivot_ls:
    path_lst_4_invz.append(bigF4(invRedshift(i), 'invz='+str(i), path_1_invz, 1000, 100, 17, 39, 172, 10))

## Running things

In [None]:
pr = ceci.Pipeline.read(path_2+"/BOSS_lsstErr_pzflow.yml")
pr.run()



Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_pzflow/BOSS_lsstErr_pzflow_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_lsstErr_pzflow/BOSS_lsstErr_pzflow_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   p

0

In [None]:
help(os.makedirs)


Help on function makedirs in module os:

makedirs(name, mode=511, exist_ok=False)
    makedirs(name [, mode=0o777][, exist_ok=False])
    
    Super-mkdir; create a leaf directory and all intermediate ones.  Works like
    mkdir, except that any intermediate path segment (not just the rightmost)
    will be created if it does not exist. If the target directory already
    exists, raise an OSError if exist_ok is False. Otherwise no exception is
    raised.  This is recursive.



# Test

In [77]:
def bigF_TEST1(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    infPZFlow = informPZFlow()
    estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        # infPZFlow, 
        # estPZFlow]
    ]    

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    # infPZFlow.connect_input(deg) 
    # estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    # estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 
    # estPZFlow.connect_input(lsst_Err, inputTag = 'input') ## might be wrong, passing in file instead of stage, need to debug w alex

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_TEST1.yml" % name)
    pipe.save(outpath)
    return outpath 

In [78]:

path_lst_TEST1 = []
directory = "specSelection_TEST1"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_TEST1 = os.path.join(parent_dir, directory)
os.makedirs(path_TEST1, exist_ok=True)


for key in spec_dict:
    path_lst_TEST1.append(bigF_TEST1(spec_dict[key], key, path_TEST1, 1000, 100, 17, 39, 172, 10))

In [79]:
def bigF_TEST2(degrader, name, pathname, ntrain, ntest, seed1, seed2, seed3, nbins):
    
    ##things you need
    #grid = makeGrid(0, 2.5, nbins) 
    bands = ['u','g','r','i','z','y']
    band_dict = {band: f"mag_{band}_lsst" for band in bands}
    band_dict_err = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    # modelData = makeModel()
    
    trainData = trainSet(ntrain, seed1)
    deg = degrader(ntrain) 
    
    testData = testSet(ntest, seed2)

    lsstErr = lsstError(band_dict, seed3)
    #infPZFlow = informPZFlow()
    #estPZFlow = estimatePZFlow(infPZFlow)

    ##pipeline and yml
    pipe = ceci.Pipeline.interactive()
    stages = [
        trainData, 
        deg, 
        testData, 
        lsstErr,  
        #infPZFlow, 
        #estPZFlow]
    ]

    for stage in stages:
        pipe.add_stage(stage)

    deg.connect_input(trainData)
    lsstErr.connect_input(testData)

    # infPZFlow.connect_input(deg) 
    # estPZFlow.connect_input(infPZFlow, inputTag = 'model')
    # estPZFlow.connect_input(lsstErr, inputTag = 'input') ## trucated out of docs :( 
    # estPZFlow.connect_input(lsst_Err, inputTag = 'input') ## might be wrong, passing in file instead of stage, need to debug w alex

    pipe.initialize(
    dict(model=flow_file), dict(output_dir=".", log_dir=".", resume=False), None) 

    outpath = os.path.join(pathname, "% s_TEST2.yml" % name)
    pipe.save(outpath)
    return outpath 

In [80]:

path_lst_TEST2 = []
directory = "specSelection_TEST2"
parent_dir = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/"
path_TEST2 = os.path.join(parent_dir, directory)
os.makedirs(path_TEST2, exist_ok=True)


for key in spec_dict:
    path_lst_TEST2.append(bigF_TEST2(spec_dict[key], key, path_TEST2, 1000, 100, 17, 39, 172, 10))

In [None]:
#________________________#

In [81]:
out_dir1 = "outputs"
out_parent_dir1 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1"
path_outs1 = os.path.join(out_parent_dir1, out_dir1)
os.makedirs(path_outs1, exist_ok=True)


In [84]:

os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/outputs")

pr = ceci.Pipeline.read(path_TEST1+"/BOSS_TEST1.yml")
pr.run()


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/BOSS_TEST1_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/BOSS_TEST1_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines

0

In [96]:
import pandas as pd
df1 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST1/outputs/output_specselection_boss.pq")

df1

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
470,0.678802,22.524094,22.298061,21.065384,20.072044,19.666212,19.417404
557,0.49628,22.490469,21.483559,20.207443,19.443588,19.127865,18.890064


In [98]:
out_dir2 = "outputs"
out_parent_dir2 = "/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2"
path_outs2 = os.path.join(out_parent_dir2, out_dir2)
os.makedirs(path_outs2, exist_ok=True)



os.chdir("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs")

pr = ceci.Pipeline.read(path_TEST2+"/BOSS_TEST2.yml")
pr.run()

import pandas as pd
df2 = pd.read_parquet("/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/outputs/output_specselection_boss.pq")

df2


Executing test_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines.flowEngine.FlowCreator   --model=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/data/trained_flow.pkl   --name=test_set   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/BOSS_TEST2_config.yml   --output=./output_test_set.pq 
Output writing to ./test_set.out

Job test_set has completed successfully!

Executing lsst_error
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.degradation.lsst_error_model.LSSTErrorModel   --input=./output_test_set.pq   --name=lsst_error   --config=/Users/alicec03/Desktop/Summer_Research/Photo-z-Stress-Test/Photo-z-Stress-Test/specSelection_TEST2/BOSS_TEST2_config.yml   --output=./output_lsst_error.pq 
Output writing to ./lsst_error.out

Job lsst_error has completed successfully!

Executing train_set
Command is:
OMP_NUM_THREADS=1   python3 -m ceci rail.creation.engines

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
470,0.678802,22.524094,22.298061,21.065384,20.072044,19.666212,19.417404
557,0.49628,22.490469,21.483559,20.207443,19.443588,19.127865,18.890064


In [99]:
df1 == df2

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
470,True,True,True,True,True,True,True
557,True,True,True,True,True,True,True


In [100]:
df2 - df1

Unnamed: 0,redshift,mag_u_lsst,mag_g_lsst,mag_r_lsst,mag_i_lsst,mag_z_lsst,mag_y_lsst
470,0.0,0.0,0.0,0.0,0.0,0.0,0.0
557,0.0,0.0,0.0,0.0,0.0,0.0,0.0
