In [65]:
import os
import xarray as xr
import numpy as np
from netCDF4 import Dataset
from collections import defaultdict
from wrf import getvar, ALL_TIMES, interplevel, to_np, latlon_coords, get_cartopy, extract_times, ll_to_xy, to_np
from functions.listwrfouts import listWrfouts
from functions.injectmissincoords import inject_missing_coordinates_from_geo_em
from functions.plots import plot_T2_15facet, plot_PSFC_15facet, plot_TP_15facet, plot_WS10_15facet, plot_RH2_15facet
from functions.get_point_data import get_point_data
from functions.errors import *
from functools import partial
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import gc
import pandas as pd
from matplotlib.colors import BoundaryNorm
import cmaps
from datetime import timedelta
import matplotlib.dates as mdates
import re

In [None]:
def process_set(set_name, base_dir, geo_em_base):
    archive_path = os.path.join(base_dir, set_name, f"{set_name}_wrfouts.tar.gz")
    extract_dir = os.path.join(base_dir, set_name, "temp_extract_dir")

    files = listWrfouts(archive_path, extract_dir)
    print(f"Listing wrfouts from {archive_path}")
    domain_files = defaultdict(list)
    for f in files:
        if "d01" in os.path.basename(f):
            domain_files["d01"].append(f)
        elif "d02" in os.path.basename(f):
            domain_files["d02"].append(f)

    result = defaultdict(list)
    print(f"Listed wrfouts from {archive_path}")

    for dom in domain_files:
        domain_files[dom] = sorted(domain_files[dom])
        geo_em_path = os.path.join(geo_em_base, f"geo_em.{dom}.nc")
        for file in domain_files[dom]:
            print(f"Injecting missing values into {file}, from {geo_em_path}")
            inject_missing_coordinates_from_geo_em(file, geo_em_path)
        result[dom] = [Dataset(path, mode="r") for path in domain_files[dom]]

    return set_name, result

# Read WRF outputs 

In [3]:
base_dir = r"D:\istanbul_wrfouts\20012024"
sets = [f"SET{i}" for i in range(1, 16)]
df = {}

for set in sets:
    df[set] = {"d01": [], "d02": []}
    
    wrfouts_list = listWrfouts(os.path.join(base_dir, set, f"{set}_wrfouts.tar.gz"), os.path.join(base_dir, set, "temp_extract_dir"))
    for f in wrfouts_list:
        if "d01" in os.path.basename(f):
            df[set]["d01"].append(Dataset(f))
        elif "d02" in os.path.basename(f):
            df[set]["d02"].append(Dataset(f))
            gc.collect()

In [None]:
base_dir = r"D:\istanbul_wrfouts\20012024"
geo_em_base = r"D:\istanbul_wrfouts"
sets = [f"SET{i}" for i in range(1, 16)]

df = {}

for set in sets:
    set_name, data = process_set(set, base_dir, geo_em_base)
    df[set_name] = data
    gc.collect()

In [None]:
for i in range(1, 16):
    try:
        set = "SET"+str(i)
        get_point_data(df[set]["d02"], "t2", 41.223333,29.165833)
    except:
        print(f"SET{i}")

: 

# Read CSV files

In [None]:
hours = np.arange(72)
T2 = 17.5 + 7.5 * np.sin(2 * np.pi * hours / 24 - np.pi / 2)  
T2 = np.round(T2, 2)

In [66]:
def errors_to_csv(case: str, variable: str):
    """
    Obs verinin eklenmesi lazım. Her istasyon için ayrı obs gelecek. Belki yeni bir mapping uygulanabilir?  
    """

    csv_files_path = os.path.join(os.getcwd(), "csv_files")
    results = pd.DataFrame(columns=["Physics", "MAE", "RMSE", "Bias"])
    safe_variable = re.sub(r'[<>:"/\\|?*]', '_', variable)

    for physics in sets_mapping["physics"]: 
        matching_files = []
        df = {}

        if os.path.isdir(csv_files_path):
            for filename in os.listdir(csv_files_path):
                if f"case_{case}" in filename and f"{physics}" in filename and filename.endswith(".csv"):
                    matching_files.append(os.path.join(csv_files_path, filename))

        for file in matching_files:
            station_id = file.split("station_")[1].split(".")[0]
            df[station_id] = pd.read_csv(file)

        results_for_each_physics = pd.DataFrame(columns=["StationId", "MAE", "RMSE", "Bias"])
        for station_id in df.keys():
            results_for_each_physics.loc[len(results_for_each_physics)] = {
            "StationId": str(station_id),
            "MAE": compute_mae(df[station_id][variable], T2),
            "RMSE": compute_rmse(df[station_id][variable], T2),
            "Bias": compute_bias(df[station_id][variable], T2)
            }
        
        results.loc[len(results)] = {
            "Physics": physics,
            "MAE": round(np.nanmean(results_for_each_physics["MAE"]), 4),
            "RMSE": round(np.nanmean(results_for_each_physics["RMSE"]), 4),
            "Bias": round(np.nanmean(results_for_each_physics["Bias"]), 4),
            }
        
        error_csvs_path = os.path.join(os.getcwd(), "error_csvs")
        os.makedirs(error_csvs_path, exist_ok=True)
        
        results.to_csv(os.path.join(error_csvs_path, f"{case}_{safe_variable}.csv"), index=False, float_format="%.4f")

In [None]:
csv_path = os.path.join(os.getcwd(), "csv_files")
case = "20241123"
results = pd.DataFrame(columns=["Physics", "MAE", "RMSE", "Bias"])
for physics in sets_mapping["physics"]: 
    matching_files = []
    df = {}

    if os.path.isdir(csv_path):
        for filename in os.listdir(csv_path):
            if f"case_{case}" in filename and f"{physics}" in filename and filename.endswith(".csv"):
                matching_files.append(os.path.join(csv_path, filename))

    for file in matching_files:
        station_id = file.split("station_")[1].split(".")[0]
        df[station_id] = pd.read_csv(file)

    results_for_each_physics = pd.DataFrame(columns=["StationId", "MAE", "RMSE", "Bias"])
    for station_id in df.keys():
        results_for_each_physics.loc[len(results_for_each_physics)] = {
        "StationId": str(station_id),
        "MAE": compute_mae(df[station_id]["2_Metre_Sicaklik(C)"], T2),
        "RMSE": compute_rmse(df[station_id]["2_Metre_Sicaklik(C)"], T2),
        "Bias": compute_bias(df[station_id]["2_Metre_Sicaklik(C)"], T2)
        }
    
    results.loc[len(results)] = {
        "Physics": physics,
        "MAE": round(np.nanmean(results_for_each_physics["MAE"]), 4),
        "RMSE": round(np.nanmean(results_for_each_physics["RMSE"]), 4),
        "Bias": round(np.nanmean(results_for_each_physics["Bias"]), 4),
        }
    results.to_csv("model_evaluation_results.csv", index=False, float_format="%.4f")

In [67]:
variables = ["2_Metre_Sicaklik(C)", "10_Metre_Ruzgar(m/s)", "Yagis(mm)"]
case = "20241123"
for variable in variables:
    errors_to_csv(case=case, variable=variable)