# Fuel Field Observations from Oklahoma

The purpose of this notebook is to clean and format data received from JD Carlson (via Derek Vanderkamp) on fuel moisture field observations conducted in Oklahoma in 1996-1997.

## Background

- Part of publication in 2007
- Used to calibrate Nelson model, used by many agencies

## Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from src.utils import time_intp, read_yml

In [None]:
df = pd.read_excel("data/oklahoma_Carlson_data.xlsx")
nlist = read_yml("etc/nlists/carlson_fielddata.yaml")
output_dir = "data/processed_data"

In [None]:
df

## Process

Standardize names, convert temp C to K

In [None]:
# Rename columns to standardize
# Warn about keys not present in DataFrame
missing = set(nlist.keys()) - set(df.columns)
if missing:
    warnings.warn(f"The following old names were not found in DataFrame columns: {missing}")

df = df.rename(columns=nlist)
# Units
df.temp = df.temp+273.15

## Explore

Carlson Data from Derek Vanderkamp:

- Includes weather data and fuel moisture data.
- Weather data and fuel moisture data not exactly lined up in time
- Separate rows with missing weather or missing fuel moisture if not at the same time

GOAL:
NOTE: Running this process for 1h, 10h, 100h, and 1000h separately

- Separate weather from FMC data
- Sort by time
- Write separately

In [None]:
print(f"Unique sites: {len(df.site.unique())}")
print(f"Unique subsites: {len(df.subsite.unique())}")
print(f"Unique res: {len(df.res.unique())}")

In [None]:
df.columns

In [None]:
# Define Variable Sets
tvars = ["year", "month", "doy", "mday", "hod", "min", "date"]
wvars = ["solar", "rain", "rh", "temp", "vap.press", "vpd",
         "wind", "vap.den"]
fvars = ["fm1", "fm10", "fm100", "fm1000"] # 1h, 10h, 100h, and 1000h

### Separate Datasets

Note: filtering FMC data by fuel class

In [None]:
def get_fm_class(df0, fuel_class,
                 tvars = ["year", "month", "doy", "mday", "hod", "min", "date"], 
                 wvars = ["solar", "rain", "rh", "temp", "vap.press", "vpd", "wind", "vap.den"]):

    # Extract fuel data
    fm = df[tvars + fvars]

    if fuel_class == "1h":
        fm = fm[~(fm['fm1'].isna())]
        fm = fm.drop(columns = ["fm10", "fm100", "fm1000"])
    elif fuel_class == "10h":
        fm = fm[~(fm['fm10'].isna())]
        fm = fm.drop(columns = ["fm1", "fm100", "fm1000"])
    elif fuel_class == "100h":
        fm = fm[~(fm['fm100'].isna())]
        fm = fm.drop(columns = ["fm1", "fm10", "fm1000"])
    elif fuel_class == "1000h":
        fm = fm[~(fm['fm1000'].isna())]
        fm = fm.drop(columns = ["fm1", "fm10", "fm100"])

    return fm

In [None]:
fm1 = get_fm_class(df, fuel_class = "1h")
fm10 = get_fm_class(df, fuel_class = "10h")
fm100 = get_fm_class(df, fuel_class = "100h")
fm1000 = get_fm_class(df, fuel_class = "1000h")

In [None]:
# Extract weather data
weather = df[tvars + wvars]
weather = weather[~(weather.rh.isna()) & ~(weather.temp.isna())]

In [None]:
order_cols = ["year", "month", "mday", "hod", "min"]

fm1 = fm1.sort_values(order_cols).reset_index(drop=True)
fm10 = fm10.sort_values(order_cols).reset_index(drop=True)
fm100 = fm100.sort_values(order_cols).reset_index(drop=True)
fm1000 = fm1000.sort_values(order_cols).reset_index(drop=True)
weather = weather.sort_values(order_cols).reset_index(drop=True)

### Fix Date
The dates are Central time with DST jumps. Need to align with UTC for hod and doy variables used to train RNN models

In [None]:
# Compare to date column in data frame, manually extract
print(f"Number of Missing Dates: {np.sum(weather.date.isna())}")
display(weather[weather.date.isna()][tvars])

These are dates associated with DST jump forward. 

In [None]:
display(
weather[
    (weather.year == 1996) &
    (weather.month == 4) &
    (weather.mday == 7)
][tvars]
)

In [None]:
display(
weather[
    (weather.year == 1997) &
    (weather.month == 4) &
    (weather.mday == 6)
][tvars]
)

## Fix Dates

Convert to UTC accounting for DST jumps

Round to nearest whole hour for FM data

In [None]:
# Check weather data 1 hr spacing
sort_cols = ["year", "month", "mday", "hod", "min"]
weather = weather.sort_values(sort_cols).reset_index(drop=True)
d = weather[["hod", "min"]].copy()
d["dh"] = (d["hod"].shift(-1) - d["hod"]) % 24
print(d["dh"].shape) 
print(d["dh"].value_counts()) # Expect all 1's with 1 NA value at end

In [None]:
anchor_local = pd.Timestamp("1996-03-26 15:00:00")
anchor_utc   = anchor_local + pd.Timedelta(hours=7)

weather["utc"] = anchor_utc + pd.to_timedelta(weather.index, unit="h")
weather["hod_utc"] = weather["utc"].dt.hour
weather["hod_doy"] = weather["utc"].dt.dayofyear

In [None]:
utc_ref = weather[["utc"]].copy()
utc_ref = utc_ref.reset_index(drop=True)

In [None]:
fm1_local = pd.to_datetime(
    pd.DataFrame({
        "year":   fm1["year"],
        "month":  fm1["month"],
        "day":    fm1["mday"],
        "hour":   fm1["hod"],
        "minute": fm1["min"],
    }).astype("Int64"),
    errors="coerce"
)

# provisional UTC (same frame as weather.utc)
fm1["utc_prov"] = fm1_local + pd.Timedelta(hours=7)

# round to nearest physical hour
fm1["utc_rounded"] = fm1["utc_prov"].dt.round("h")

# ML feature
fm1["hod_utc"] = fm1["utc_rounded"].dt.hour
fm1["hod_doy"] = fm1["utc_rounded"].dt.dayofyear

In [None]:
fm10_local = pd.to_datetime(
    pd.DataFrame({
        "year":   fm10["year"],
        "month":  fm10["month"],
        "day":    fm10["mday"],
        "hour":   fm10["hod"],
        "minute": fm10["min"],
    }).astype("Int64"),
    errors="coerce"
)

# provisional UTC (same frame as weather.utc)
fm10["utc_prov"] = fm10_local + pd.Timedelta(hours=7)

# round to nearest physical hour
fm10["utc_rounded"] = fm10["utc_prov"].dt.round("h")

# ML feature
fm10["hod_utc"] = fm10["utc_rounded"].dt.hour
fm10["hod_doy"] = fm10["utc_rounded"].dt.dayofyear

In [None]:
fm100_local = pd.to_datetime(
    pd.DataFrame({
        "year":   fm100["year"],
        "month":  fm100["month"],
        "day":    fm100["mday"],
        "hour":   fm100["hod"],
        "minute": fm100["min"],
    }).astype("Int64"),
    errors="coerce"
)

# provisional UTC (same frame as weather.utc)
fm100["utc_prov"] = fm100_local + pd.Timedelta(hours=7)

# round to nearest physical hour
fm100["utc_rounded"] = fm100["utc_prov"].dt.round("h")

# ML feature
fm100["hod_utc"] = fm100["utc_rounded"].dt.hour
fm100["hod_doy"] = fm100["utc_rounded"].dt.dayofyear

In [None]:
fm1000_local = pd.to_datetime(
    pd.DataFrame({
        "year":   fm1000["year"],
        "month":  fm1000["month"],
        "day":    fm1000["mday"],
        "hour":   fm1000["hod"],
        "minute": fm1000["min"],
    }).astype("Int64"),
    errors="coerce"
)

# provisional UTC (same frame as weather.utc)
fm1000["utc_prov"] = fm1000_local + pd.Timedelta(hours=7)

# round to nearest physical hour
fm1000["utc_rounded"] = fm1000["utc_prov"].dt.round("h")

# ML feature
fm1000["hod_utc"] = fm1000["utc_rounded"].dt.hour
fm1000["hod_doy"] = fm1000["utc_rounded"].dt.dayofyear

## Calc Eqs in Weather

In [None]:
# To confirm Kelvin
weather.temp.head()

In [None]:
# Equilibria
rh = weather.rh
temp = weather.temp
Ed = 0.924 * rh**0.679 + 0.000499 * np.exp(0.1 * rh) + 0.18 * (21.1 + 273.15 - temp) * (1 - np.exp(-0.115 * rh))
Ew = 0.618 * rh**0.753 + 0.000454 * np.exp(0.1 * rh) + 0.18 * (21.1 + 273.15 - temp) * (1 - np.exp(-0.115 * rh))

weather["Ed"] = Ed
weather["Ew"] = Ew

## Save

In [None]:
# Write Out
os.makedirs(output_dir, exist_ok=True)
fm1.to_excel("data/processed_data/ok_1h.xlsx", index=False)
fm10.to_excel("data/processed_data/ok_10h.xlsx", index=False)
fm100.to_excel("data/processed_data/ok_100h.xlsx", index=False)
fm1000.to_excel("data/processed_data/ok_1000h.xlsx", index=False)

weather.to_excel("data/processed_data/dvdk_weather.xlsx", index=False)