# Building Training Data for FMC Models

The purpose of this notebook is to combine the weather data from OK Mesonet, and from Van der Kamp when necessary, with the field observations from Carlson into datasets for training and evaluating models of FMC for various fuel classes.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import sys
sys.path.append("src")
from utils import read_yml

In [None]:
df1    = pd.read_excel("data/processed_data/ok_1h.xlsx")
df10   = pd.read_excel("data/processed_data/ok_10h.xlsx")
df100  = pd.read_excel("data/processed_data/ok_100h.xlsx")
df1000 = pd.read_excel("data/processed_data/ok_1000h.xlsx")

dweather = pd.read_excel("data/processed_data/dvdk_weather.xlsx")
mweather = pd.read_excel("data/processed_data/mesonet.xlsx")

In [None]:
required_weather_vars = ["Ed", "Ew", "solar", "wind", "rain"]

## Join Weather

The OK Mesonet data is missing air temp for parts of the study period. Joining with air temp data from Van der Kamp.

* Identify missing data
* Linearly interpolate sections of missing data if less than or equal to 3 consecutive hours
* Fill in remaining missing data from Van der Kamp where applicable

### Handle Missing Observations

Identify areas with spot missing data that can be replaced with interpolation.

In [None]:
dfw = mweather[["date"] + required_weather_vars]
mask = dfw.isna().any(axis=1)

print(f"Number of Half-Hourly Observations from OK Mesonet: {mweather.shape[0]}")
print(f"Number of Half-Hour Periods with Missing Key Variables: {mask.sum()}")

#### Missing Solar

Total missing times: 6

Longest temporal streak of missing data: 1 hr (2 half-hourly observations)

Methodology: manual linear interp

In [None]:
print(f"Number of Missing Solar Radiation Observations: {mweather.solar.isna().sum()}")
mweather[mweather.solar.isna()]

In [None]:
# Interp Values
intp1 = (mweather.solar.iloc[5595 - 1] + mweather.solar.iloc[5595 + 1])/2
mweather.loc[5595, 'solar'] = intp1

m = (mweather.solar.iloc[9537] - mweather.solar.iloc[9534])/3
b = mweather.solar.iloc[9534] - m
intp2a = m*2 + b
intp2b = m*3 + b
mweather.loc[9535:9536, 'solar'] = [
    m*2 + b,
    m*3 + b
]


m = (mweather.solar.iloc[24028] - mweather.solar.iloc[24025])/3
b = mweather.solar.iloc[24025] - m
intp3a = m*2 + b
intp3b = m*3 + b
mweather.loc[24026:24027, 'solar'] = [
    m*2 + b,
    m*3 + b
]

intp4 = (mweather.solar.iloc[26967 - 1] + mweather.solar.iloc[26967 + 1])/2
mweather.loc[26967, 'solar'] = intp4

In [None]:
fmt = mdates.DateFormatter('%Y-%m-%d %H:%M')
fig, ax = plt.subplots(figsize=(10, 16), nrows=4, ncols=1)
locator = mdates.AutoDateLocator()

ax[0].plot(mweather.iloc[5580:5610].date,mweather.iloc[5580:5610].solar, 'o-')
ax[0].plot(mweather.iloc[5594:5597].date, mweather.iloc[5594:5597].solar, 'r-')
ax[0].plot(mweather.iloc[5595].date, intp1, 'ro')

ax[1].plot(mweather.iloc[9525:9545].date,mweather.iloc[9525:9545].solar, 'o-')
ax[1].plot(mweather.iloc[9534:9538].date, mweather.iloc[9534:9538].solar, 'r-')
ax[1].plot(mweather.iloc[9535:9537].date, [intp2a, intp2b], 'ro-')

ax[2].plot(mweather.iloc[24015:24035].date,mweather.iloc[24015:24035].solar, 'o-')
ax[2].plot(mweather.iloc[24025:24029].date, mweather.iloc[24025:24029].solar, 'r-')
ax[2].plot(mweather.iloc[24026:24028].date, [intp3a, intp3b], 'ro-')

ax[3].plot(mweather.iloc[26955:26975].date,mweather.iloc[26955:26975].solar, 'o-')
ax[3].plot(mweather.iloc[26966:26969].date, mweather.iloc[26966:26969].solar, 'r-')
ax[3].plot(mweather.iloc[26967].date, intp4, 'ro')



for a in ax:
    a.xaxis.set_major_locator(locator)
    a.xaxis.set_major_locator(mdates.AutoDateLocator())
    a.xaxis.set_major_formatter(fmt)
    a.tick_params(axis='x', rotation=45)
    a.grid()
    a.set_ylabel(r"Solar Radiation ($\text{Wm}^{-2}$)")

fig.suptitle("Interpolated Solar Radiation", fontsize=16)
fig.tight_layout()

#### Missing Wind

In [None]:
print(f"Number of Missing Wind Speed Observations: {mweather.wind.isna().sum()}")
mweather[mweather.wind.isna()]