In [8]:
# imports
import sys
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

# * home brew imports
sys.path.append('/')
from model_functions import build_model_input
from mc_functions import get_daily_param_permutations, process_input_batch, get_mc_summary

In [9]:
# load sample data
data = pd.read_csv(
    '/content/sample_input.csv',
    parse_dates=['DateTime'],
  )

In [10]:
# iterate over each day for MLE estimates and use multi_threading to compute
# the input sets w/ different error manifestations

batch_size = 10
mc_summary_days = pd.DataFrame()


for date in data.Date.drop_duplicates():

  # wrap in 'try' to catch dates on which not enough input data is available
  try:

    # grab daily input
    daily_input, obs_ddo = build_model_input(input_df=data, date=str(date))

    # create batch of 'n' inputs w/ permutations based on sigma for
    # k600, t_water, do_conc_sat
    input_batches = get_daily_param_permutations(daily_input, n=100)

    # prepare storage of param estimation results from input batches;
    all_results = []

    # use multi-threading to compute 'batch_size' inputs at the same time
    with ThreadPoolExecutor(max_workers=None) as executor:
        for batch_start in range(0, len(input_batches), batch_size):
            batch = input_batches[batch_start: batch_start + batch_size]
            future = executor.submit(process_input_batch, batch, obs_ddo)
            all_results.append(future)

    # Collect and combine results from all batches
    final_results = []
    for future in all_results:
        final_results.extend(future.result())

    # format output
    # * define col names of output
    output_cols = ['gpp24', 'er24', 'sigma', 'r', 'rmse', 'mae']
    parameter_stats = pd.DataFrame(final_results)
    parameter_stats.columns = output_cols

    # summarize output
    # * define col names of statistics for summary
    summarize_vars = ['mu', 'sigma', 'p5', 'p50', 'p95']
    summarized_daily_out = pd.DataFrame()
    # * iterate over output columns
    for col in output_cols:

      single_row_df = pd.DataFrame([get_mc_summary(parameter_stats[col])])
      single_row_df.columns = summarize_vars
      single_row_df['variable'] = col

      summarized_daily_out = pd.concat([summarized_daily_out,single_row_df])

    summarized_daily_out['Date'] = date
    mc_summary_days = pd.concat(
        [
          mc_summary_days,
          summarized_daily_out
        ]
    )

  # if inputs for day have wrong legnths, error is caught and
  # day is skipped
  except ValueError:
    print(f'skipped date {date}')

print(mc_summary_days.head(3))

  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**2) - np.sum(residuals**2) / (2 * sigma**2))
  nll = -(-n/2 * np.log(2 * np.pi * sigma**

skipped date 1978-07-23
           mu     sigma          p5         p50         p95 variable  \
0  166.468256  0.146371  166.216405  166.468671  166.685002    gpp24   
0  253.427576  5.340920  243.625417  253.643224  261.733098     er24   
0    5.924074  0.098047    5.784861    5.931348    6.089973    sigma   

         Date  
0  1978-07-14  
0  1978-07-14  
0  1978-07-14  
