### Merge KHSE and coop winds using the transfer function determined in `first_merge_...`

In [5]:
import pandas as pd
import numpy as np

# ============================================================
# USER SETTINGS
# ============================================================
# These should be your hourly (vector-mean) wind time series already produced earlier.
# Each file must contain a UTC time column plus speed and direction-from columns.
KHSE_WIND_CSV  = "isd_KHSE_only/KHSE_area_ISD_wind_merged.csv"
COOPS_WIND_CSV = "coops_8654467/COOPS_8654467_wind_metric_GMT.csv"   # <-- your CO-OPS met wind file

OUT_CSV = "KHSE_merged_with_COOPSfill_regimeAware_hourly.csv"

TIME_COL = "time"          # change if needed
U_COL    = "wind_speed_mps"             # wind speed (m/s)
D_COL    = "wind_dir_from_deg"         # direction FROM (deg)

FREQ = "1h"

# Transfer coefficients (COOPS -> KHSE), by direction sector
A_N, B_N = 0.37, 1.48   # North sector: U_kh = A_N*U_co + B_N
A_S, B_S = 0.87, 0.69   # South sector: U_kh = A_S*U_co + B_S

# ============================================================
# HELPERS
# ============================================================
def met_from_to_deg(dir_from):
    return (dir_from + 180.0) % 360.0

def wind_to_uv(U, dir_from_deg):
    """meteorological (FROM) -> u east, v north"""
    U = np.asarray(U, float)
    df = np.asarray(dir_from_deg, float) % 360.0
    theta_to = met_from_to_deg(df)
    rad = np.deg2rad(theta_to)
    u = U * np.sin(rad)
    v = U * np.cos(rad)
    return u, v

def uv_to_wind(u, v):
    """u,v -> meteorological speed + direction FROM"""
    u = np.asarray(u, float)
    v = np.asarray(v, float)
    U = np.sqrt(u*u + v*v)
    theta_to = (np.rad2deg(np.arctan2(u, v)) + 360.0) % 360.0
    dfrom = (theta_to + 180.0) % 360.0
    return U, dfrom

def is_north_sector(dfrom):
    """North sector: 270-360 OR 0-90"""
    d = dfrom % 360.0
    return (d >= 270.0) | (d < 90.0)

def apply_piecewise_transfer(U_co, Dfrom_co):
    """
    Apply your regime transfer to CO-OPS wind speed and return U_adj.
    Keeps direction for later u,v reconstruction.
    """
    U = np.asarray(U_co, float)
    D = np.asarray(Dfrom_co, float)

    U_adj = np.full_like(U, np.nan, dtype=float)
    mN = np.isfinite(U) & np.isfinite(D) & is_north_sector(D)
    mS = np.isfinite(U) & np.isfinite(D) & (~is_north_sector(D))

    U_adj[mN] = A_N * U[mN] + B_N
    U_adj[mS] = A_S * U[mS] + B_S

    # prevent negative speeds (rare, but can happen if extrapolated)
    U_adj = np.where(np.isfinite(U_adj), np.maximum(U_adj, 0.0), np.nan)
    return U_adj

# ============================================================
# LOAD HOURLY DATA
# ============================================================

co = pd.read_csv(COOPS_WIND_CSV,
    names=["time", "wind_speed_mps", "wind_dir_from_deg","Direction.1","Gust","X","R"],
    header=0,
    parse_dates=["time"]
)
co[TIME_COL] = pd.to_datetime(co[TIME_COL], utc=True, errors="coerce")
co[U_COL] = pd.to_numeric(co[U_COL], errors="coerce")
co[D_COL] = pd.to_numeric(co[D_COL], errors="coerce")
co = co.dropna(subset=[TIME_COL]).set_index(TIME_COL).sort_index()


kh = pd.read_csv(KHSE_WIND_CSV, parse_dates=[TIME_COL])
kh[TIME_COL] = pd.to_datetime(kh[TIME_COL], utc=True, errors="coerce")
kh[U_COL] = pd.to_numeric(kh[U_COL], errors="coerce")
kh[D_COL] = pd.to_numeric(kh[D_COL], errors="coerce")
kh = kh.dropna(subset=[TIME_COL]).set_index(TIME_COL).sort_index()


# (Optional) enforce exact hourly grid via resample mean on u,v if needed:
# If your CSVs are already hourly, you can skip this. If not, this makes them hourly correctly.
# Convert to u,v, resample, reconstruct.
for df in (kh, co):
    df["u"], df["v"] = wind_to_uv(df[U_COL].values, df[D_COL].values)

kh_uv = kh[["u","v"]].resample(FREQ).mean(numeric_only=True)
co_uv = co[["u","v"]].resample(FREQ).mean(numeric_only=True)

kh_U, kh_D = uv_to_wind(kh_uv["u"].values, kh_uv["v"].values)
co_U, co_D = uv_to_wind(co_uv["u"].values, co_uv["v"].values)

kh_hr = pd.DataFrame({"U": kh_U, "Dfrom": kh_D, "u": kh_uv["u"], "v": kh_uv["v"]}, index=kh_uv.index)
co_hr = pd.DataFrame({"U": co_U, "Dfrom": co_D, "u": co_uv["u"], "v": co_uv["v"]}, index=co_uv.index)

print("KHSE hourly span:", kh_hr.index.min(), "to", kh_hr.index.max(), "n=", len(kh_hr))
print("COOPS hourly span:", co_hr.index.min(), "to", co_hr.index.max(), "n=", len(co_hr))

# ============================================================
# APPLY REGIME-AWARE TRANSFER TO COOPS SPEED, RECONSTRUCT u,v
# ============================================================
co_hr["U_adj"] = apply_piecewise_transfer(co_hr["U"].values, co_hr["Dfrom"].values)

# keep COOPS direction for vector reconstruction
u_adj, v_adj = wind_to_uv(co_hr["U_adj"].values, co_hr["Dfrom"].values)
co_hr["u_adj"] = u_adj
co_hr["v_adj"] = v_adj

# ============================================================
# MERGE: KHSE PRIMARY, COOPS_ADJ FILL
# ============================================================
t0 = min(kh_hr.index.min(), co_hr.index.min())
t1 = max(kh_hr.index.max(), co_hr.index.max())
tidx = pd.date_range(t0.floor(FREQ), t1.ceil(FREQ), freq=FREQ, tz="UTC")

out = pd.DataFrame(index=tidx)
out.index.name = "time"

out = out.join(kh_hr[["U","Dfrom","u","v"]].rename(columns=lambda c: f"{c}_kh"), how="left")
out = out.join(co_hr[["U_adj","Dfrom","u_adj","v_adj"]], how="left")

out["U_merged"] = out["U_kh"].where(out["U_kh"].notna(), out["U_adj"])
out["u_merged"] = out["u_kh"].where(out["u_kh"].notna(), out["u_adj"])
out["v_merged"] = out["v_kh"].where(out["v_kh"].notna(), out["v_adj"])

U_m, D_m = uv_to_wind(out["u_merged"].values, out["v_merged"].values)
out["Dfrom_merged"] = D_m

# Source labeling (robust dtype)
out["src_merged"] = pd.Series(index=out.index, dtype="object")
out.loc[out["U_kh"].notna(), "src_merged"] = "KHSE"
out.loc[out["U_kh"].isna() & out["U_adj"].notna(), "src_merged"] = "COOPS_fill_adj"

# ============================================================
# SUMMARY
# ============================================================
kh_present = out["U_kh"].notna()
mg_present = out["U_merged"].notna()
filled = (~kh_present) & mg_present

print("\nHourly completeness:")
print("KHSE present hours:", int(kh_present.sum()), "of", len(out))
print("Merged present hours:", int(mg_present.sum()), "of", len(out))
print("Hours filled from COOPS (adj):", int(filled.sum()))

# ============================================================
# WRITE CSV
# ============================================================
save = out.reset_index()[["time","U_merged","Dfrom_merged","u_merged","v_merged","src_merged"]].copy()
save = save.rename(columns={
    "U_merged":"wind_speed_mps",
    "Dfrom_merged":"wind_dir_from_deg",
    "u_merged":"u_east_mps",
    "v_merged":"v_north_mps"
})
save.to_csv(OUT_CSV, index=False)

print("\nWrote merged wind time series:")
print(OUT_CSV)


KHSE hourly span: 1957-03-01 05:00:00+00:00 to 2025-08-27 04:00:00+00:00 n= 600384
COOPS hourly span: 2010-04-01 00:00:00+00:00 to 2025-12-23 16:00:00+00:00 n= 137897

Hourly completeness:
KHSE present hours: 507261 of 603228
Merged present hours: 527973 of 603228
Hours filled from COOPS (adj): 20712

Wrote merged wind time series:
KHSE_merged_with_COOPSfill_regimeAware_hourly.csv
