In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os


def create_mock_era5_dataset(output_filename="mock_era5_data.nc"):
   """
   Creates a mock NetCDF dataset locally using xarray, mimicking the structure
   of the ERA5 single-levels dataset specified in the user's request.


   Args:
       output_filename (str): The name of the NetCDF file to save.
   """


   # Define the variables, year, month, day, and time from the user's request
   variables = [
       "10m_u_component_of_wind",
       "10m_v_component_of_wind",
       "2m_dewpoint_temperature",
       "2m_temperature",
       "mean_sea_level_pressure",
       "mean_wave_direction",
       "mean_wave_period",
       "sea_surface_temperature",
       "significant_height_of_combined_wind_waves_and_swell",
       "surface_pressure",
       "total_precipitation"
   ]
   year = ["2001"]
   months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
   days = [
       "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
       "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
       "25", "26", "27", "28", "29", "30", "31"
   ]
   times_of_day = ["12:00", "18:00"]


   # Generate time coordinates
   # Create a list of all possible date strings (YYYY-MM-DD)
   date_strings = []
   for y in year:
       for m in months:
           for d in days:
               # Handle days that don't exist in all months (e.g., Feb 30/31, Apr 31)
               try:
                   pd.to_datetime(f"{y}-{m}-{d}")
                   date_strings.append(f"{y}-{m}-{d}")
               except ValueError:
                   continue


   # Combine dates with times of day
   full_datetime_strings = []
   for date_str in date_strings:
       for time_str in times_of_day:
           full_datetime_strings.append(f"{date_str} {time_str}")


   # Convert to pandas datetime objects
   times = pd.to_datetime(full_datetime_strings)


   # Define spatial coordinates
   # Using a reasonable resolution for demonstration purposes
   latitudes = np.arange(90, -90.25, -2.5) # From 90N to 90S, step of 2.5 degrees
   longitudes = np.arange(0, 360, 2.5)    # From 0E to 357.5E, step of 2.5 degrees


   # Determine the shape of the data arrays
   time_dim = len(times)
   lat_dim = len(latitudes)
   lon_dim = len(longitudes)


   print(f"Generating mock data for {time_dim} time steps, {lat_dim} latitudes, {lon_dim} longitudes.")


   # Create a dictionary to hold the data variables
   data_vars = {}


   # Generate mock data for each variable
   for var_name in variables:
       # Create random data for each variable with the correct shape
       # You can customize the range of random values to be more realistic
       if "temperature" in var_name:
           # Temperatures in Kelvin (ERA5 standard)
           mock_data = np.random.uniform(250, 310, (time_dim, lat_dim, lon_dim))
       elif "pressure" in var_name:
           # Pressure in Pascals
           mock_data = np.random.uniform(95000, 105000, (time_dim, lat_dim, lon_dim))
       elif "wind" in var_name:
           # Wind components in m/s
           mock_data = np.random.uniform(-20, 20, (time_dim, lat_dim, lon_dim))
       elif "precipitation" in var_name:
           # Total precipitation in meters (accumulated over 6 hours for 12:00/18:00)
           mock_data = np.random.uniform(0, 0.01, (time_dim, lat_dim, lon_dim))
       elif "wave" in var_name:
           # Wave parameters
           if "height" in var_name:
               mock_data = np.random.uniform(0, 10, (time_dim, lat_dim, lon_dim)) # meters
           elif "period" in var_name:
               mock_data = np.random.uniform(0, 20, (time_dim, lat_dim, lon_dim)) # seconds
           elif "direction" in var_name:
               mock_data = np.random.uniform(0, 360, (time_dim, lat_dim, lon_dim)) # degrees
           else:
               mock_data = np.random.rand(time_dim, lat_dim, lon_dim) # Generic random data
       else:
           mock_data = np.random.rand(time_dim, lat_dim, lon_dim) # Generic random data


       # Create an xarray DataArray for the variable
       data_vars[var_name] = xr.DataArray(
           mock_data,
           coords={
               "time": times,
               "latitude": latitudes,
               "longitude": longitudes
           },
           dims=["time", "latitude", "longitude"],
           name=var_name,
           attrs={"units": "mock_units", "long_name": f"Mock {var_name}"} # Add some attributes
       )


   # Create the xarray Dataset
   dataset = xr.Dataset(
       data_vars=data_vars,
       coords={
           "time": times,
           "latitude": latitudes,
           "longitude": longitudes
       },
       attrs={
           "title": "Mock ERA5 Single Levels Data",
           "source": "Generated locally with xarray, not from CDS",
           "Conventions": "CF-1.6",
           "history": f"Created on {pd.Timestamp.now()}"
       }
   )


   # Save the dataset to a NetCDF file
   try:
       dataset.to_netcdf(output_filename)
       print(f"\nSuccessfully created mock NetCDF file: {output_filename}")
       print(f"File size: {os.path.getsize(output_filename) / (1024*1024):.2f} MB")
   except Exception as e:
       print(f"\nError saving NetCDF file: {e}")


   return dataset


# --- How to run the function ---
if __name__ == "__main__":
   # Call the function to create the mock dataset
   mock_ds = create_mock_era5_dataset()


   # You can optionally load and inspect the created file
   # print("\nLoading the created file to verify:")
   # loaded_ds = xr.open_dataset("mock_era5_data.nc")
   # print(loaded_ds)

Generating mock data for 730 time steps, 73 latitudes, 144 longitudes.

Successfully created mock NetCDF file: mock_era5_data.nc
File size: 644.03 MB
