<a href="https://colab.research.google.com/github/david-levin11/Verification_Notebooks/blob/main/NBM_TimeSeries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Title--Please Make a Copy of this Notebook Prior To Editing!**
<br/>
Description--Write your description of what the notebook will do and any pitfalls or caveats here.

- David Levin, Arctic Testbed & Proving Ground, Anchorage Alaska

##**2 - Install and Import Packages**
This will take about a minute to run.

In [4]:
!pip install boto3
!pip install pygrib
!pip install swifter

import os
import sys
import requests
import boto3
import pygrib
import json
import time
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

from functools import partial
from multiprocessing import Pool, cpu_count
from multiprocessing import set_start_method, get_context
######################### General Config #########################

# @markdown <FONT SIZE=5>**1. Please Provide Your Synoptic API Token...**
user_token = "c6c8a66a96094960aabf1fed7d07ccf0" # @param {type:"string"}

# @markdown <FONT SIZE=5>**2. Select NBM Run**
# @markdown <br><FONT SIZE=3>NBM 4.1 and 4.2 available as a threaded dataset
# @markdown for PMaxT, PMinT, PQPF from 1/18/2023 to present
start_date = "2024-07-04" # @param {type:"date"}
# @markdown <FONT SIZE=5>**2. Select Run Start Time (UTC hour)**
utctime =12 #@param {type:"slider", min:0, max:23, step:1}
# @markdown <FONT SIZE=5>**2. Select Number Of Forecast Hours (0-240)**
run_hours = 72 #@param {type:"slider", min:0, max:240, step:1}
end_date = (datetime.strptime(start_date, '%Y-%m-%d') + timedelta(hours=run_hours)).strftime('%Y-%m-%d')
# @markdown <FONT SIZE=5>**3. For Which Element?**
element = "wind" # @param ["maxt", "mint", "qpf24", "qpf12", "qpf06", "wind", "windgust", "rh", "td"]

# @markdown <FONT SIZE=5>**4. What NBM Region?**
nbm_selection ='Alaska' # @param ["Alaska", "CONUS", "Pacific", "Puerto Rico"]
nbm_region_dict = {'Alaska':'ak', 'CONUS':'co', 'Pacific':'hi', "Puerto Rico":"pr"}
nbm_region = nbm_region_dict[nbm_selection]

# Split element/interval
interval_selection = int(element[-2:]) if "qpf" in element else False
element = element[:3] if "qpf" in element else element
#@markdown <FONT SIZE=5>**Which Site(s)?  (Enter a single four letter site identifier or a comma seperated list)**
ob_sites = 'pajn' # @param {type:"string"}
sites = [ob_sites]

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# GLOBAL VARIABLES AND GENERAL CONFIG                                         #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

# Multiprocess settings
process_pool_size = 20 #cpu_count()*16
print(f'Process Pool Size: {process_pool_size}')

# Backend APIs
metadata_api = "https://api.synopticdata.com/v2/stations/metadata?"
qc_api = "https://api.synopticdata.com/v2/stations/qcsegments?"

# Data Query APIs
timeseries_api = "https://api.synopticdata.com/v2/stations/timeseries?"
statistics_api = "https://api.synopticlabs.org/v2/stations/statistics?"
precipitation_api = "https://api.synopticdata.com/v2/stations/precipitation?"


# Assign API to element name
synoptic_apis = {
    'qpf':precipitation_api,
    'maxt':timeseries_api,
    'mint':timeseries_api,
    'wind':timeseries_api,
    'windgust':timeseries_api,
    'temp':timeseries_api,
    'rh': timeseries_api,
    'td': timeseries_api}

# Assign synoptic variable to element name
synoptic_vars = {
    'qpf':None,
    'maxt':'air_temp',
    'mint':'air_temp',
    'wind': 'wind_speed',
    'windgust': 'wind_gust',
    'rh':'relative_humidity',
    'td':'dew_point_temperature'
}

synoptic_vars_out = {
    'qpf':'OBSERVATIONS.precipitation',
    'maxt':'STATISTICS.air_temp_set_1.maximum',
    'mint':'STATISTICS.air_temp_set_1.minimum',}


ob_hours = {
    'qpf':[['0000', '0000'], ['1200', '1200']],
    'maxt':[['1200', '0600']],
    'mint':[['0000', '1800']]}

# NBM Globals
aws_bucket_nbm = 'noaa-nbm-grib2-pds'
aws_bucket_urma = 'noaa-urma-pds'

# Where to place the grib files (subdirs can be added in local) (not used)
output_dir = '/nas/atpg/data/nbm_verification/timeseries/'


# Which grib variables do each element correlate with
nbm_vars = {'qpf':'APCP',
                  'maxt':'TMP',
                  'mint':'TMP',
                  'wind': 'WIND',
                  'windgust': 'GUST',
                  'rh': 'RH',
                  'td': 'DPT'}

# Which grib levels do each element correlate with
nbm_levs = {'qpf':'surface',
               'maxt':'2 m above ground',
               'mint':'2 m above ground',
               'wind': '2 m above ground',
               'windgust': '2 m above ground',
               'rh': '2 m above ground',
               'td': '2 m above ground'}

# Convert user input to datetime objects
start_date, end_date = [datetime.strptime(date+' 0000', '%Y-%m-%d %H%M')
    for date in [start_date, end_date]]

# Bracket start date by 4.1 implementation
if start_date < datetime(2023, 1, 18, 0, 0, 0):
    start_date = datetime(2023, 1, 18, 0, 0, 0)


# Build synoptic arg dict
synoptic_api_args = {

    'api':synoptic_apis[element],
    'element':element,
    'interval':interval_selection if element == 'qpf' else False,
    'stid':sites,

    'vars_query':None if element == 'qpf'
        else f'{synoptic_vars[element]}',
    'days_offset':1 if element != 'mint' else 0}

# Build nbm/urma arg dict
nbm_request_args = {
    'interval':interval_selection if element == 'qpf' else False,
    'nbm_area':nbm_region,
    'element':element,
    'var':nbm_vars[element],
    'level':nbm_levs[element]}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FUNCTIONS AND METHODS (GENERAL)                                             #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def mkdir_p(check_dir):
    from pathlib import Path
    check_dir = output_dir + check_dir
    Path(check_dir).mkdir(parents=True, exist_ok=True)
    return check_dir

  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FUNCTIONS AND METHODS (SYNOPTIC API)                                        #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def fetch_obs_from_API(start, end, utime, city, var, api, output_type='csv'):

    valid = True

    trtime = f'{utime:02d}'
    trstart = start.strftime('%Y%m%d')+trtime+'00'
    trend = end.strftime('%Y%m%d')+trtime+'00'


    element_label = var if var != 'qpf' else \
    'qpe' + f'{interval_selection:02d}'

    output_file = mkdir_p(f'obs_{output_type}/') +\
    f'obs.{element_label}.'+\
    f'.{start}.{end}.{output_type}'

    if os.path.isfile(output_file):
        print(f'File {output_file} already exists.')
        return output_file
    else:
      json_file = mkdir_p('obs_json/')+\
      f'obs.{element_label}.' +\
      f'.{start}.{end}.json'

      api_query_args = {
          'api_token': f'&token={user_token}',
          'stid': f'&stid={city}',
          'start': f'&start={trstart}',
          'end': f'&end={trend}',
          'vars': f'&vars={synoptic_vars[var]}',
          'format': f'&format=json',
          'units': '&units=english'
      }
      api_query = api + ''.join(
                [api_query_args[k] for k in api_query_args.keys()])

      print(f'Polling API for: {api_query}')

      status_code, response_count = None, 0
      while (status_code != 200) & (response_count <= 10):
          print(f'HTTP:{status_code}, #:{response_count}')

          # Don't sleep first try, sleep increasing amount for each retry
          time.sleep(2*response_count)

          response = requests.get(api_query)
          # response.raise_for_status()

          status_code = response.status_code
          response_count += 1

      try:
          response_dataframe = pd.json_normalize(
              response.json()['STATION'])
      except:
          valid = False
      else:
          with open(json_file, 'wb+') as wfp:
              wfp.write(response.content)

    if valid:
        # Check ACTIVE flag (Can disable in config above if desired)
        response_dataframe = response_dataframe[
            response_dataframe['STATUS'] == "ACTIVE"]

        # Un-nest the QPF totals
        if var == 'qpf':
            response_dataframe['TOTAL'] = [i[0]['total']
                for i in response_dataframe['OBSERVATIONS.precipitation']]

        if output_type == 'pickle':
        # Save out df as pickle
            response_dataframe.to_pickle(output_file)

        elif output_type == 'csv':
        # Save out df as csv
            response_dataframe.to_csv(output_file)

        return None

    return

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FUNCTIONS AND METHODS (NBM)                                                 #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def ll_to_index(loclat, loclon, datalats, datalons):
    # index, loclat, loclon = loclatlon
    abslat = np.abs(datalats-loclat)
    abslon = np.abs(datalons-loclon)
    c = np.maximum(abslon, abslat)
    latlon_idx_flat = np.argmin(c)
    latlon_idx = np.unravel_index(latlon_idx_flat, datalons.shape)
    return latlon_idx

def generate_hourly_intervals(start_time, end_time):
    # Calculate the total number of hours between the start and end times
    total_hours = int((end_time - start_time).total_seconds() // 3600)

    # Generate the list of formatted strings
    intervals = [f"F{str(i+1).zfill(3)}" for i in range(total_hours)]

    return intervals
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# INPUT-BASED GLOBAL VARIABLES AND CONFIG                                     #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

if __name__ == "__main__":
  for site in sites:
    fetch_obs_from_API(start_date, end_date, utctime, site, element, timeseries_api)


print(generate_hourly_intervals(start_date, end_date))

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# DATA ACQUISITION                                                            #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



Process Pool Size: 20
File /nas/atpg/data/nbm_verification/timeseries/obs_csv/obs.wind..2024-07-04 00:00:00.2024-07-07 00:00:00.csv already exists.
['F001', 'F002', 'F003', 'F004', 'F005', 'F006', 'F007', 'F008', 'F009', 'F010', 'F011', 'F012', 'F013', 'F014', 'F015', 'F016', 'F017', 'F018', 'F019', 'F020', 'F021', 'F022', 'F023', 'F024', 'F025', 'F026', 'F027', 'F028', 'F029', 'F030', 'F031', 'F032', 'F033', 'F034', 'F035', 'F036', 'F037', 'F038', 'F039', 'F040', 'F041', 'F042', 'F043', 'F044', 'F045', 'F046', 'F047', 'F048', 'F049', 'F050', 'F051', 'F052', 'F053', 'F054', 'F055', 'F056', 'F057', 'F058', 'F059', 'F060', 'F061', 'F062', 'F063', 'F064', 'F065', 'F066', 'F067', 'F068', 'F069', 'F070', 'F071', 'F072']
