# Imports 

In [1]:
def imports():
    ## Essential Imports: 
    import os
    import qp
    import numpy as np
    import tables.io
    from pathlib import Path
    import ceci
    
    ## RAIL-Specific Imports: 
    import rail
    from rail.creation.degradation import LSSTErrorModel, InvRedshiftIncompleteness
    from rail.creation.engines.flowEngine import FlowModeler, FlowCreator, FlowPosterior
    from rail.core.data import TableHandle
    from rail.core.stage import RailStage
    from rail.core.utilStages import ColumnMapper, TableConverter
    from rail.estimation.algos.flexzboost import Inform_FZBoost, FZBoost
    from rail.evaluation.evaluator import Evaluator


    ## Data Storage: 
    DS = RailStage.data_store
    DS.__class__.allow_overwrite = True
  

In [2]:
# imports()

# Model

In [3]:
def makeModel():
    imports()
    
    #path to access the data 
    DATA_DIR = Path().resolve() / "data"
    DATA_DIR.mkdir(exist_ok=True)

    catalog_file = DATA_DIR / "base_catalog.pq"

    #array of galaxies w/ 7 attributes for each: redshift & ugrizy
    catalog = get_galaxy_data().rename(band_dict, axis=1) 

    #turns array into a table 
    tables_io.write(catalog, str(catalog_file.with_suffix("")), catalog_file.suffix[1:])

    catalog_file = str(catalog_file)
    flow_file = str(DATA_DIR / "trained_flow.pkl")

    #we set up the stage 
    flow_modeler_params = {
        "name": "flow_modeler",
        "input": catalog_file,
        "model": flow_file,
        "seed": 0,
        "phys_cols": {"redshift": [0, 3]},
        "phot_cols": {
            "mag_u_lsst": [17, 35],
            "mag_g_lsst": [16, 32],
            "mag_r_lsst": [15, 30],
            "mag_i_lsst": [15, 30],
            "mag_z_lsst": [14, 29],
            "mag_y_lsst": [14, 28],
        },
        "calc_colors": {"ref_column_name": "mag_i_lsst"},
    }
    flow_modeler = FlowModeler.make_stage(**flow_modeler_params)
    flow_modeler.fit_model()
    return flow_modeler.get_handle("model")

In [4]:
# modelData = makeModel()

# Training Set 

In [5]:
def trainingSet(modeldata, ntrain, seed):
    data = FlowCreator.make_stage(
            name = 'stuff',
            model = modeldata,
            n_samples = ntrain,
            seed = seed 
    )
    return data.sample(ntrain, seed)

def invRedshift(data, pivot = 1.0):
    deg = InvRedshiftIncompleteness.make_stage(
        name = 'stuff',
        pivot_redshift = pivot
    )
    return deg(data) 

In [6]:
# trainData = invRedshift(trainingSet(modelData, 10000, 372), 1.0)

In [7]:
def getPosts(data, grid):
    posts = FlowPosterior.make_stage(
        name='stuff', 
        column='redshift',
        grid = grid,
        model = model_data,
        data = data
    )
    return posts.get_posterior(data, column = 'redshift')

In [8]:
def makeGrid(zmin, zmax, nbins):
    import numpy as np
    grid = np.linspace(zmin, zmax, nbins + 1)
    return grid 

In [9]:
# grid = makeGrid(0, 2.5, 100)

In [10]:
# trainPosts = getPosts(trainData, grid)

## Posts 

Only run if you need output_orig_train_posts

In [11]:
# flow_post_orig_train = FlowPosterior.make_stage(name='orig_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_train)

# orig_train_pdfs = flow_post_orig_train.get_posterior(orig_train, column='redshift')

Only run if you need output_deg_train_posts ** rerun this cell!! 

In [12]:
# flow_post_deg_train = FlowPosterior.make_stage(name='deg_train_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              err_samples = 0,
#                                              data = deg_train)



# deg_train_pdfs = flow_post_deg_train.get_posterior(deg_train, column='redshift')

# Test Set 

In [13]:
def testSet(ntest, seed):
    data = FlowCreator.make_stage(
            name = 'stuff',
            model = model_data,
            n_samples = ntest,
            seed = seed 
    )
    return data.sample(ntest, seed)


## you need to ask alex about where you can find the defaults for these params 
def lsstError(data, seed, tvis = 1, nYrObs = 1, airmass = 1, extendedSource = 1, sigmaSys = 1, magLim = 1, ndFlag = 1, A_min = 1, A_max = 1):
    bands = ['u','g','r','i','z','y']
    band_dict = {band:f'mag_{band}_lsst' for band in bands}
    
    deg = LSSTErrorModel.make_stage(
        name='stuff',
        bandNames=band_dict, 
        seed=seed,
    )
    return deg(data)

In [14]:
# testData = lsstError(testSet(100000, 1078), 39)

In [15]:
# testPosts = getPosts(testData, grid)

## Posts

Only run if you need output_orig_test_posts

In [16]:
# flow_post_orig_test = FlowPosterior.make_stage(name='orig_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = orig_test)

# orig_test_pdfs = flow_post_orig_test.get_posterior(orig_test, column='redshift')

Only run if you need output_deg_test_posts

In [17]:
# flow_post_deg_test = FlowPosterior.make_stage(name='deg_test_posts', 
#                                              column='redshift',
#                                              grid = np.linspace(0, 2.5, 101),
#                                              model=flow_file,
#                                              data = deg_test)

# deg_test_pdfs = flow_post_deg_test.get_posterior(deg_test, column='redshift')

# Make tables

In [18]:
def makeTable(datafile):
    import tables_io
    rename_dict = {f'mag_{band}_lsst_err':f'mag_err_{band}_lsst' for band in bands}

    col_remapper = ColumnMapper.make_stage(
    name='col_remapper', 
    columns=rename_dict,
    )
    table_conv = TableConverter.make_stage(
    name='table_conv', 
    output_format='numpyDict',
    )
    pq = col_remapper(datafile)
    tabledata = table_conv(pq)
    table = tables_io.convertObj(tabledata.data, tables_io.types.PD_DATAFRAME)
    return table


In [19]:
# trainTable = makeTable(trainData)
# testTable = makeTable(testData)

# Estimation 

In [20]:
def informFZBoost(data):
    info = Inform_FZBoost.make_stage(
    name ='informFZBoost', 
    model ='fzboost.pkl', 
    hdf5_groupname='',
    )
    info.inform(data)
    return info

In [21]:
# informedEst = informFZBoost(trainData)

In [22]:
def estimateFZBoost(data, info, nbins):
    est = FZBoost.make_stage(
    name='estFZBoost', 
    nondetect_val=np.nan,
    model= info.get_handle('model'), 
    hdf5_groupname='',
    aliases=dict(input='test_data', output='fzboost_estim'),
    nzbins = nbins ,
    zmax = zmax
    )
    return est.estimate(data)

In [23]:
# estData = estimateFZBoost(testData, informedEst, 100)

In [24]:
def bigF():
    imports()
    grid = makeGrid(0, 2.5, 100) 
    modelData = makeModel()
    trainData = invRedshift(trainingSet(modelData, 10, 372), 1.0)
    trainPosts = getPosts(trainData, grid)
    testData = lsstError(testSet(100, 1078), 39)
    testPosts = getPosts(testData, grid)
    trainTable = makeTable(trainData)
    testTable = makeTable(testData)
    informedEst = informFZBoost(trainData)
    estData = estimateFZBoost(testData, informedEst)
    pipe = ceci.Pipeline.interactive()
    stages = [modelData, trainData, trainPosts, testData, testPosts, trainTable, testTable, informedEst, estData]
    for stage in stages:
        pipe.add_stage(stage)

In [25]:
bigF()

NameError: name 'Path' is not defined