In [165]:
import os
import xarray as xr
import numpy as np
from netCDF4 import Dataset
from collections import defaultdict
from wrf import getvar, ALL_TIMES, interplevel, to_np, latlon_coords, get_cartopy, extract_times, ll_to_xy, to_np
from functions.listwrfouts import listWrfouts
from functions.injectmissincoords import inject_missing_coordinates_from_geo_em
from functions.plots import plot_T2_15facet, plot_PSFC_15facet, plot_TP_15facet, plot_WS10_15facet, plot_RH2_15facet
from functools import partial
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import gc
import pandas as pd
from matplotlib.colors import BoundaryNorm
import cmaps
from datetime import timedelta

In [None]:
def process_set(set_name, base_dir, geo_em_base):
    archive_path = os.path.join(base_dir, set_name, f"{set_name}_wrfouts.tar.gz")
    extract_dir = os.path.join(base_dir, set_name, "temp_extract_dir")

    files = listWrfouts(archive_path, extract_dir)
    print(f"Listing wrfouts from {archive_path}")
    domain_files = defaultdict(list)
    for f in files:
        if "d01" in os.path.basename(f):
            domain_files["d01"].append(f)
        elif "d02" in os.path.basename(f):
            domain_files["d02"].append(f)

    result = defaultdict(list)
    print(f"Listed wrfouts from {archive_path}")

    for dom in domain_files:
        domain_files[dom] = sorted(domain_files[dom])
        geo_em_path = os.path.join(geo_em_base, f"geo_em.{dom}.nc")
        for file in domain_files[dom]:
            print(f"Injecting missing values into {file}, from {geo_em_path}")
            inject_missing_coordinates_from_geo_em(file, geo_em_path)
        result[dom] = [Dataset(path, mode="r") for path in domain_files[dom]]

    return set_name, result

In [2]:
base_dir = r"D:\istanbul_wrfouts\23112024"
sets = [f"SET{i}" for i in range(1, 16)]
df = {}

for set in sets:
    df[set] = {"d01": [], "d02": []}
    
    wrfouts_list = listWrfouts(os.path.join(base_dir, set, f"{set}_wrfouts.tar.gz"), os.path.join(base_dir, set, "temp_extract_dir"))
    for f in wrfouts_list:
        if "d01" in os.path.basename(f):
            df[set]["d01"].append(Dataset(f))
        elif "d02" in os.path.basename(f):
            df[set]["d02"].append(Dataset(f))
            gc.collect()

In [None]:
base_dir = r"D:\istanbul_wrfouts\20012024"
geo_em_base = r"D:\istanbul_wrfouts"
sets = [f"SET{i}" for i in range(12, 16)]

df = {}

for set in sets:
    set_name, data = process_set(set, base_dir, geo_em_base)
    df[set_name] = data
    gc.collect()

In [7]:
station_coords = pd.read_csv("station_coords.csv", delimiter=",", header=None, names=["StationName", "Latitude", "Longitude", "StationID"])

In [None]:
def get_point_data(wrfin, var, lat, lon):
    supported_vars = ["tp", "t2", "psfc", "ws10"]
    if var == "tp":
        x_y = ll_to_xy(wrfin, latitude=lat, longitude=lon)
        rainc = getvar(wrfin, "RAINC", timeidx=ALL_TIMES)
        rainnc = getvar(wrfin, "RAINNC", timeidx=ALL_TIMES)
        values = rainc + rainnc

        point_values = values[:, x_y[1], x_y[0]]
        deacc_values = np.diff(to_np(point_values).astype(int), axis=0)

        times = getvar(wrfin, "times", timeidx=ALL_TIMES).to_index()
        time_index = times[:-1]  # match deacc shape

        return pd.Series(deacc_values, index=time_index, name="tp")
    elif var in supported_vars and var != "tp":
        x_y = ll_to_xy(wrfin, latitude=lat, longitude=lon)
        if var == "ws10":
            values = getvar(wrfin, "wspd_wdir10", timeidx=ALL_TIMES)[0]
        else: 
            values = getvar(wrfin, var.upper(), timeidx=ALL_TIMES)

        time_index = getvar(df["SET1"]["d02"], "times", timeidx=ALL_TIMES).to_index()[:-1]
        point_values = to_np(values[:, x_y[1], x_y[0]])

        if var == "t2": return pd.Series(np.round(point_values - 273.15, 2)[:-1], time_index, name=var.upper())  
        elif var == "psfc":  return pd.Series(np.round(point_values/100).astype(int)[:-1], time_index, name=var.upper())  
        elif var == "ws10":  return pd.Series(np.round(point_values).astype(int)[:-1], time_index, name=var.upper())  
    else:
        raise ValueError(f"Variable '{var}' is not supported. Supported variables are: {supported_vars}")

In [176]:
csv_path = os.path.join(os.getcwd(), "csv_files")
os.makedirs(csv_path, exist_ok=True)

for set in [f"SET{i}" for i in range(1, 16)]:
    case = pd.to_datetime(getvar(df["SET1"]["d02"][0], "times", timeidx=0).values)
    case = (case + timedelta(days=1)).strftime("%Y%m%d")

    for index, row in station_coords.iterrows():
        df_name = f'case_{case}_mp{df[set]["d02"][0].MP_PHYSICS}_pbl{df[set]["d02"][0].BL_PBL_PHYSICS}_station_{row["StationID"]}.csv'
        tp = get_point_data(df[set]["d02"], "tp", row["Latitude"], row["Longitude"])
        t2 = get_point_data(df[set]["d02"], "t2", row["Latitude"], row["Longitude"])
        psfc = get_point_data(df[set]["d02"], "psfc", row["Latitude"], row["Longitude"])
        ws10 = get_point_data(df[set]["d02"], "ws10", row["Latitude"], row["Longitude"])

        df_point = pd.concat([tp, t2, psfc, ws10], axis=1)
        df_point.columns = ["Yagis(mm)", "2_Metre_Sicaklik(C)", "Yüzey_Basinci(mb)", "10_Metre_Rüzgar(m/s)"]

        df_point["İstasyon_Adi"] = row["StationName"]
        df_point["İstasyon_Numarasi"] = row["StationID"]
        df_point["Enlem"] = row["Latitude"]
        df_point["Boylam"] = row["Longitude"]

        df_point = df_point.reset_index()
        df_point = df_point.rename(columns={"Time": "Tarih"})

        column_order = ["İstasyon_Adi", "İstasyon_Numarasi", "Enlem", "Boylam", "Tarih",
                        "Yagis(mm)", "2_Metre_Sicaklik(C)", "Yüzey_Basinci(mb)", "10_Metre_Rüzgar(m/s)"]
        df_point = df_point[column_order]

        df_point.to_csv(os.path.join(csv_path, df_name), sep=",")