## Libraries Used

In [1]:
import ee
import os
import wxee
import logging
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ee.Initialize()

## Base Parameters

In [3]:
base_dir = "/Users/dhruvyadav/Desktop/Research/Manmeet Sir Research/AI NWS/Data Collection"

In [4]:
surface_variables = [
    "temperature_2m_above_ground",
    "specific_humidity_2m_above_ground",
    "relative_humidity_2m_above_ground",
    "u_component_of_wind_10m_above_ground",
    "v_component_of_wind_10m_above_ground",
    "precipitable_water_entire_atmosphere"
]

In [5]:
region_polygon = ee.Geometry.Rectangle([-99, 29, -97, 31])

This is the Region Polygon

<img src="image copy.png" height="400">

In [6]:
forecast_hours = [0, 6, 12, 18]
scale_km = 25000
num_cores = 8

In [11]:
start_date = datetime(2025, 2, 1)
end_date   = datetime(2025, 3, 1)

In [12]:
year_str = start_date.strftime("%Y")       
month_name = start_date.strftime("%B")

year_dir = os.path.join(base_dir, year_str)
month_dir = os.path.join(year_dir, month_name)
data_dir = os.path.join(month_dir, "Data")

os.makedirs(data_dir, exist_ok=True)

## Logging

In [13]:
log_file = os.path.join(month_dir, "run.log")

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)


logger = logging.getLogger("logger")
logger.setLevel(logging.INFO)
logger.propagate = False

fh = logging.FileHandler(log_file, mode="w")
fh.setLevel(logging.INFO)
logger.addHandler(fh)

sh = logging.StreamHandler()
sh.setLevel(logging.INFO)
logger.addHandler(sh)

logger.info("==============================================")
logger.info(f"Starting processing for {month_name} {year_str}")
logger.info(f"Data directory: {data_dir}")
logger.info(f"Log file: {log_file}")

Starting processing for February 2025
Starting processing for February 2025
Data directory: /Users/dhruvyadav/Desktop/Research/Manmeet Sir Research/AI NWS/Data Collection/2025/February/Data
Data directory: /Users/dhruvyadav/Desktop/Research/Manmeet Sir Research/AI NWS/Data Collection/2025/February/Data
Log file: /Users/dhruvyadav/Desktop/Research/Manmeet Sir Research/AI NWS/Data Collection/2025/February/run.log
Log file: /Users/dhruvyadav/Desktop/Research/Manmeet Sir Research/AI NWS/Data Collection/2025/February/run.log


## Download Data

In [14]:
current = start_date
while current < end_date:
    next_day = current + timedelta(days=1)
    current_str = current.strftime("%Y-%m-%d")
    next_day_str = next_day.strftime("%Y-%m-%d")
    
    logger.info(f"\nProcessing date: {current_str}")
    xr_list = []  # Container for xarray datasets for each forecast hour

    for hour in forecast_hours:
        logger.info(f"  Processing forecast hour: {hour}")
        subset = (
            ee.ImageCollection("NOAA/GFS0P25")
            .filterDate(current_str, next_day_str)
            .filterBounds(region_polygon)
            .select(surface_variables)
            .filterMetadata("forecast_hours", "equals", hour)
        )
        
        count_hour = subset.size().getInfo()
        logger.info(f"  Images found for forecast hour {hour}: {count_hour}")
        
        if count_hour == 0:
            logger.info(f"  No data for forecast hour {hour} on {current_str}, skipping.")
            continue
        
        try:
            xr_hour = subset.wx.to_xarray(
                region=region_polygon,
                scale=scale_km,
                progress=True,
                num_cores=num_cores,
                masked=True
            )

            xr_hour = xr_hour.assign_coords(forecast_hour=hour)
            xr_list.append(xr_hour)
            logger.info(f"  xarray dataset for forecast hour {hour} info:\n{xr_hour}")
        except Exception as e:
            logger.error(f"  Error downloading data for forecast hour {hour} on {current_str}: {e}")
            continue

    if len(xr_list) == 0:
        logger.info(f"No valid forecast data found for {current_str}, skipping day.")
        current = next_day
        continue

    # Concatenate individual forecast hour datasets along a new 'forecast_hour' dimension
    combined_xr = xr.concat(xr_list, dim="forecast_hour")
    logger.info(f"\nCombined xarray dataset info for {current_str}:\n{combined_xr}")
    
    # Saving the NetCDF file
    out_file = os.path.join(data_dir, f"gfs_{current_str}_combined.nc")
    combined_xr.to_netcdf(out_file)
    logger.info(f"Saved combined NetCDF file: {out_file}")
    
    current = next_day

logger.info(f"Finished processing for {month_name} {year_str}")
logger.info("==============================================")


Processing date: 2025-02-01

Processing date: 2025-02-01
  Processing forecast hour: 0
  Processing forecast hour: 0
  Images found for forecast hour 0: 4
  Images found for forecast hour 0: 4
Requesting data: 100%|██████████| 4/4 [00:02<00:00,  1.65it/s]
Downloading data: 100%|██████████| 4/4 [00:01<00:00,  2.30it/s]
  xarray dataset for forecast hour 0 info:
<xarray.Dataset> Size: 19kB
Dimensions:                               (time: 4, x: 10, y: 10)
Coordinates:
  * time                                  (time) datetime64[ns] 32B 2025-02-0...
  * x                                     (x) float64 80B -98.93 ... -96.91
  * y                                     (y) float64 80B 31.1 30.88 ... 29.08
    spatial_ref                           int64 8B 0
    forecast_hour                         int64 8B 0
Data variables:
    temperature_2m_above_ground           (time, y, x) float64 3kB 14.94 ... ...
    specific_humidity_2m_above_ground     (time, y, x) float64 3kB 0.003361 ....
    relat