In [None]:
import pandas as pd
import numpy as np
import os
import os.path as osp
import subprocess
from src.data_funcs import get_stids
import json
from datetime import datetime, timedelta
from synoptic.services import stations_timeseries
from src.data_funcs import time_intp

In [None]:
# Load Setup
tokens = json.load(open('etc/tokens.json'))
meso_token = tokens['mesowest']

# User Arugments
start = "202306010000" 
end = "202308312300"
bbox = [37,-111,46,-95]

In [None]:
# Get times array
times = pd.date_range(start=start,end=end, freq="1h")

In [None]:
# Get STIDs 
sts=get_stids(start, end, bbox, meso_token)

In [None]:
raws_vars = ["air_temp", "relative_humidity", "precip_accum", "fuel_moisture", "wind_speed", "solar_radiation"]
t0 = datetime.strptime(start, "%Y%m%d%H%M")
t1 = datetime.strptime(end, "%Y%m%d%H%M")
params = dict(
    stid=["PLACEHOLDER"], # change this in the loop
    vars=raws_vars,
    start=t0,
    end= t1+timedelta(hours=1) # add an hour since it doesn't include end date exactly
)

In [None]:
# Loop over STIDs and collect data with full sensor data
dfs = [] # empty list to collect data
for st in sts:

    print("~"*50)
    print(f"Collecting RAWS data for {st}")
    params["stid"] = [st]
    try:
        df_temp = stations_timeseries(verbose="HIDE", **params)
        # Keep data if has all sensor data
        if all(col in df_temp.columns for col in raws_vars):
            # Keep data if over 90% time coverage to limit interpolation
            if df_temp.shape[0]>= .9*len(times):
                print(f"Number of observations at {st}: {df_temp.shape[0]}")
                # Fill in missing rows with NA, linear interpolate missing values
                if df_temp.shape[0] != len(times):
                    print(f"Interpolating missing hourly values")
                    df_temp = df_temp.asfreq("1h")
                    df_temp = df_temp.interpolate(method="linear")
                # Add Derived Time Fields: day of year, hour of day
                df_temp["hour"]=df_temp.index.hour
                df_temp["doy"]=df_temp.index.dayofyear
                # Add Static Data Fields Repeated in time: elevation, lon, lat, stid
                df_temp["elev"]=df_temp.attrs["ELEVATION"]
                df_temp["lon"]=df_temp.attrs["longitude"]
                df_temp["lat"]=df_temp.attrs["latitude"]
                df_temp["stid"]=df_temp.attrs["STID"]
                # Append station data to collection in bbox
                dfs.append(df_temp)
                print(f"Collected FMC data for {st}")
            else:
                print(f"Not sufficient data at {st}: More than 10% missing data for time period for station")
        else:
            print(f"Not full data at {st}: missing sensor timeseries")
    except AssertionError as e:
        # Error handling behavior
        print("AssertionError caught:", e)
