# Papermill and Scrapbook notebook execution pipeline

This notebook utilizes papermill and scrapbook to run CN caclulation notebook and PrecipTable generation notebooks:

* CN caclulation source notebook is located at: /home/src/data/CN_Calculator.ipynb. It takes input of a HUC12 number and output the spaially averaged curve number (CN)

* PrecipTable generation source notebook is located at: /home/src/data/hydromet/notebooks/pluvial/PrecipTable_CN_Reduced.ipynb. It takes input of a HUC number and the calculated CN. The output of this notebook is a Preciptation and Runoff table for a series of different return periods

In [None]:
# Import libraries
import argparse 
import inspect
import logging
import os
import pathlib as pl
import sys
import warnings; warnings.filterwarnings('ignore')
import papermill as pm 
from utils import *
import scrapbook as sb
import config_notebook as config_src

# if "/home/" not in sys.path:
#     sys.path.append("/home/")

## Pass the parameters to the CN calculation notebook using Papermill

In [None]:
# Parameters injected to Papermill command notebook
# HUC12 list for computing CN
# huc12_list = ['120903010306', '080102050106','051002010201','043001060504']
huc12_list = ['051002010201']

#huc12_list = ['120903010306']
# If plot figures of land use, HSG, and curve number
plot_figs = True
# The option of dealing with dual HSG and nodata, see soilvar_CN function defined
dual_HSG_num = 3
## If use Danielle's CN look-up table (higher CN estimates, e.g. Wetland CNs are 98 in this table for all HSGs). By default is True
FEMA_option = True

In [None]:
flood_type = 'flash'
# Specify the source code notebook for computing CN
input_notebook_path = './CN_Calculator.ipynb'
# Check the validity of this source code notebook
pm.inspect_notebook(input_notebook_path)
credentials = DefaultAzureCredential()

# # Define the directory to save output notebooks
# CN_output_dir = '/home/jovyan/app/data/CN_papermill'
# # If this directory does not exist, create one
# if not os.path.exists(CN_output_dir):
#     os.makedirs(CN_output_dir, exist_ok=True)

# Loop over all specified huc in the huc12_list
for huc12 in huc12_list:

    config = config_src.create_config(huc12, flood_type)
    log_out_dir = config.log.dir/("huc" + huc12)/('res'+config.res)
    output_notebook_filename_CN = f'CN_huc_{huc12}_out.ipynb'
    output_notebook_path_CN = pl.Path('./'+str(log_out_dir))/pl.Path(output_notebook_filename_CN)
    #CN_output_dir = output_notebook_path_CN   
    blob_path = pl.Path(("huc" + huc12), ("res" + config.res[:-1] +'m'))

    # Give a name of the output notebook
    # output_notebook_filename = f'CN_{huc12}_output.ipynb'
    # output_notebook_path = pl.Path('/home/jovyan/app/data/CN_papermill')/pl.Path(output_notebook_filename)

    # If th notebook has never been created, execute that notebook using the specified parameters
    #if not pl.Path.is_file(output_notebook_path):
    print(f'Calculating CN for HUC: {huc12}')

    try:
        pm.execute_notebook(
        input_notebook_path,
        output_notebook_path_CN,
        parameters=dict(huc12=huc12, plot_figs=plot_figs, dual_HSG_num=dual_HSG_num, FEMA_option=FEMA_option),
        log_output=True)
    except:
        pm.execute_notebook(
        input_notebook_path,
        output_notebook_path_CN,
        parameters=dict(huc12=huc12, plot_figs=plot_figs, dual_HSG_num=dual_HSG_num, FEMA_option=FEMA_option),
        log_output=True)
    
    logger.info('upload notebook file')
    upload_blob(
        output_notebook_path_CN, 
        config.log.storage, 
        config.log.container, 
        str(blob_path/pl.Path(output_notebook_filename_CN)), 
        credentials, 
        overwrite=True
    ) 

## Pass the calculated CN as a parameter along with HUC12 number to the PrecipTable generation notebook via Papaermill

In [None]:
# Specify the source code notebook for computing Precipitation/Runoff Table
input_notebook_path_2 = './hydromet/notebooks/pluvial/PrecipTable_CN_Reduced.ipynb'
# Check the validity of this source code notebook
pm.inspect_notebook(input_notebook_path_2)

# Define the directory to save output notebooks
PrecipTable_output_dir = '/home/jovyan/app/data/PrecipTable_papermill'
# If this directory does not exist, create one
if not os.path.exists(PrecipTable_output_dir):
    os.makedirs(PrecipTable_output_dir, exist_ok=True)

# Define a list to save huc12 number for each notebooks
Huc_list = []
CN_list = []
PrecipTable_list = []

# Loop over each notebook (Huc12 number and calculated CN are two parameters)
for huc12 in huc12_list:

    # Declare the directory to read notebooks (where those notebooks are saved)
    source_dir = "/home/jovyan/app/data/CN_papermill/CN_{}_output.ipynb".format(huc12)
    # Read all notebooks under this directory
    nb = sb.read_notebook(source_dir)

    # Pull out the saved scraps (HUC and CN) from the saved CN notebook
    HUC_num = nb.scraps['HUC'].data
    CN_val = round(nb.scraps['CN'].data)

    print('Creating PrecipTable for HUC: {0}, with CN calculated as: {1}'.format(HUC_num, CN_val))
    
    # Declare a new notebook directory to be saved for PrecipTable notebook for this huc
    output_notebook_filename_2 = f'PrecipTable_{HUC_num}_output.ipynb'
    output_notebook_path_2 = pl.Path('/home/jovyan/app/data/PrecipTable_papermill')/pl.Path(output_notebook_filename_2)

    # Execute papermill for this huc to obtain PrecipTable 
    #if not pl.Path.is_file(output_notebook_path_2):
    pm.execute_notebook(
    input_notebook_path_2,
    output_notebook_path_2,
    parameters=dict(HUC=HUC_num, CN=CN_val),
    log_output=True)
    
    # Read the notebook scraps we just created
    #nb_2 = sb.read_notebook(str(output_notebook_path_2))
    # Extract the table created to a Pandas dataframe
    #Table_2 = nb_2.scraps['Runoff_Table'].data

    # Save the outputs to the current notebook's list variables
    #Huc_list.append(HUC_num)
    #CN_list.append(CN_val)
    #PrecipTable_list.append(Table_2)

    # sb.glue(f'CN_{HUC_num}', CN_val)
    # sb.glue(f'Runoff_table_{HUC_num}', Table_2, 'pandas')

# Also save the Huc_list info as a scrap of the current notebook
#sb.glue('Huc_list', Huc_list)

Below is the old code that reads all CN_output notebooks at once. It is updated by the new code above that reads the notebook one-by-one. This is better for speeding up by ignoring irrelvent Hucs.

In [None]:
# # Declare the directory to read notebooks (where those notebooks are saved)
# source_dir = "/home/jovyan/app/data/CN_papermill"
# # Read all notebooks under this directory
# sbook = sb.read_notebooks(source_dir)

# # Specify the source code notebook for computing Precipitation/Runoff Table
# input_notebook_path_2 = './hydromet/notebooks/pluvial/PrecipTable_CN_Reduced.ipynb'
# # Check the validity of this source code notebook
# pm.inspect_notebook(input_notebook_path_2)

# # Define the directory to save output notebooks
# PrecipTable_output_dir = '/home/jovyan/app/data/PrecipTable_papermill'
# # If this directory does not exist, create one
# if not os.path.exists(PrecipTable_output_dir):
#     os.makedirs(PrecipTable_output_dir, exist_ok=True)

# # Define a list to save huc12 number for each notebooks
# Huc_list = []
# CN_list = []
# PrecipTable_list = []


# # Loop over each notebook (Huc12 number and calculated CN are two parameters)
# for nb in sbook.notebooks:
    
#     # Pull out the saved scraps (HUC and CN) from the saved CN notebook
#     HUC_num = nb.scraps['HUC'].data
#     CN_val = round(nb.scraps['CN'].data)

#     print('Creating PrecipTable for HUC: {0}, with CN calculated as: {1}'.format(HUC_num, CN_val))
    
#     # Declare a new notebook directory to be saved for PrecipTable notebook for this huc
#     output_notebook_filename_2 = f'PrecipTable_{HUC_num}_output.ipynb'
#     output_notebook_path_2 = pl.Path('/home/jovyan/app/data/PrecipTable_papermill')/pl.Path(output_notebook_filename_2)

#     # Execute papermill for this huc to obtain PrecipTable 
#     #if not pl.Path.is_file(output_notebook_path_2):
#     pm.execute_notebook(
#     input_notebook_path_2,
#     output_notebook_path_2,
#     parameters=dict(HUC=HUC_num, CN=CN_val),
#     log_output=True)
    
#     # Read the notebook scraps we just created
#     nb_2 = sb.read_notebook(str(output_notebook_path_2))
#     # Extract the table created to a Pandas dataframe
#     Table_2 = nb_2.scraps['Runoff_Table'].data

#     # Save the outputs to the current notebook's list variables
#     Huc_list.append(HUC_num)
#     CN_list.append(CN_val)
#     PrecipTable_list.append(Table_2)

#     # sb.glue(f'CN_{HUC_num}', CN_val)
#     # sb.glue(f'Runoff_table_{HUC_num}', Table_2, 'pandas')

# # Also save the Huc_list info as a scrap of the current notebook
# #sb.glue('Huc_list', Huc_list)

## Save PrecipTable and CN for each Huc in a dictionary

In [None]:
# nb_current = sb.read_notebook('/home/src/data/CN_PrecipTable_Papermill.ipynb')
# dict_CN_PrecipTable = {}
# for huc in Huc_list:
#     dict_CN_PrecipTable["CN_{0}".format(huc)] = nb_current.scraps[f'CN_{huc}'].data
#     dict_CN_PrecipTable["Table_{0}".format(huc)] = nb_current.scraps[f'Runoff_table_{huc}'].data

In [None]:
# # Dictionary contains: "CN_{Huc}" and "Table_{Huc}"
# dict_CN_PrecipTable = {}
# for i in range(len(Huc_list)):
#     dict_CN_PrecipTable["CN_{0}".format(Huc_list[i])] = CN_list[i]
#     dict_CN_PrecipTable["Table_{0}".format(Huc_list[i])] = PrecipTable_list[i]

In [None]:
# # Give a Huc number to test the results saved in the dictionary
# Huc_test = huc12_list[2]
# print(f'For Huc {Huc_test}: CN equals to {dict_CN_PrecipTable["CN_{0}".format(Huc_test)]}')
# dict_CN_PrecipTable["Table_{0}".format(Huc_test)] 