In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib import path
import matplotlib.pyplot as plt
import time
import xarray as xr
from shapely.geometry import Point

In [2]:
# Load ERA5 data
filename = r".\raw\data_stream-wave_stepType-instant.nc"
era5 = xr.open_dataset(filename)
era5=era5.sel(valid_time=slice('2020-01-01','2023-12-31')) #'2021-01-01','2021-12-31'
era5.load()

In [3]:
bbox_filepath = r".\bbox_wave.geojson"
bbox_gdf = gpd.read_file(bbox_filepath)

era5 = era5.copy()
if "time" not in era5.coords:
    era5 = era5.rename({"valid_time": "time"})

era5 = era5.rename({
    "swh": "hs",
    "mwp": "tp",
    "longitude": "lon",
    "latitude": "lat"
})
era5 = era5.set_coords(["lon", "lat"])

In [4]:
# Filter points within the bounding box
lon_flat, lat_flat = np.meshgrid(era5["lon"], era5["lat"])
points_gdf = gpd.GeoDataFrame(
    geometry=gpd.points_from_xy(lon_flat.ravel(), lat_flat.ravel()),
    crs="EPSG:4326"
)

points_within_bbox = points_gdf[points_gdf.within(bbox_gdf.unary_union)]
era5 = era5.sel(
    lon=xr.DataArray(points_within_bbox.geometry.x.values, dims="index"),
    lat=xr.DataArray(points_within_bbox.geometry.y.values, dims="index"),
    method="nearest"
)

  points_within_bbox = points_gdf[points_gdf.within(bbox_gdf.unary_union)]


In [5]:
# drop values with NaNs - you should end up only with the offshore points that are part of your bounding box
era5 = era5.dropna(dim='index')

In [6]:
era5.load()

In [7]:
# Observation coordinates
obs_coord = [28.935,-95.295]  # (lat, lon)

In [9]:
# Function to find the nearest index for a given value in an array
def find_nearest(array, value):
    idx = (np.abs(array - value)).argmin()
    return idx

# Combine 'lat' and 'lon' into a single DataFrame to find the nearest point
lat_values = era5['lat'].values  # Latitude values
lon_values = era5['lon'].values  # Longitude values

# Calculate the Euclidean distance for each (lat, lon) pair
distances = np.sqrt((lat_values - obs_coord[0])**2 + (lon_values - obs_coord[1])**2)

# Find the index of the minimum distance
closest_idx = distances.argmin()

# Extract data for the closest point using the index
closest_point_data = era5.isel(index=closest_idx)

# Combine 'time', 'hs', 'tp', and 'wd' into a single DataFrame
combined_df = pd.DataFrame({
    'time': closest_point_data['time'].values,
    'hs': closest_point_data['hs'].values.flatten(),  # Flatten the data
    'tp': closest_point_data['tp'].values.flatten()
    # 'wd': closest_point_data['wd'].values.flatten()
})

# Save the combined DataFrame to a CSV file
dataset_path = r"./Clean/wave.csv"
combined_df.to_csv(dataset_path, index=False)

print("Combined data has been successfully saved to 'wave.csv'.")

Combined data has been successfully saved to 'wave.csv'.
