In [1]:
import numpy as np
import xarray as xr
import netCDF4 as nc

# Open the input NetCDF file for reading
input_file = nc.Dataset('/Users/tobias/Downloads/Mipas/MIPAS-E_IMK.200411.V8R_CH4_261.nc', 'r')

# Create a new NetCDF file with the same dimensions and variables as the input file
output_file = nc.Dataset('altout.nc', 'w', format='NETCDF4')

for name, dimension in input_file.dimensions.items():
    output_file.createDimension(name, len(dimension))

for name, variable in input_file.variables.items():
    if name == 'altitude':
        # Rename the altitude variable to height
        output_variable = output_file.createVariable('height', variable.dtype, variable.dimensions)
    else:
        output_variable = output_file.createVariable(name, variable.dtype, variable.dimensions)
    # Copy the variable attributes
    output_variable.setncatts({k: variable.getncattr(k) for k in variable.ncattrs()})
    # Copy the variable data
    output_variable[:] = variable[:]

height = output_file.variables['height']
altitude = output_file.createVariable('altitude', height.dtype, ('altitude',))
altitude.standard_name = height.standard_name #"altitude"
altitude.long_name = height.long_name #"Altitude over sea level"
altitude.units = height.units #"km"

# Compute the mean of the height variable along the time dimension
altitude[:] = np.nanmean(height[:], axis=1)

# Create an xarray Dataset from the output file
ds = xr.open_dataset(xr.backends.NetCDF4DataStore(output_file))

ds.load()

# Close the files
input_file.close()
output_file.close()

# Access the longitude variable in the xarray Dataset

# Resample time to monthly intervals
monthly_time = ds.time.resample(time="1M").mean().values

# Define the bin edges for latitude and longitude
lat_bins = np.linspace(-90, 90, 19)
lon_bins = np.linspace(-180, 180, 37)

# Bin the latitude and longitude values
lat_indices = np.digitize(ds.latitude.values, lat_bins)
lon_indices = np.digitize(ds.longitude.values, lon_bins)

# Initialize an empty array to store the binned CH4 values
n_altitudes = ds.height.shape[0]
n_lats = len(lat_bins) - 1
n_lons = len(lon_bins) - 1
n_time = 1
ch4_binned = np.empty((n_altitudes, n_lats, n_lons, n_time))
ch4_binned[:] = np.nan

# Loop over altitudes and fill in the binned CH4 values
for alt_idx in range(n_altitudes):
    ch4_alt = ds.target.values[alt_idx]
    for lat_idx in range(n_lats):
        for lon_idx in range(n_lons):
            mask = (lat_indices == lat_idx+1) & (lon_indices == lon_idx+1)
            if np.any(mask):
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nanmean(ch4_alt[mask])
            else:
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nan

# Create a new xarray dataset with the binned CH4 values
ds_binned = xr.Dataset(
    {"ch4": (["altitude", "latitude", "longitude", "time"], ch4_binned)},
    coords={
        "altitude": ds.height.values,
        "latitude": lat_bins[:-1] + np.diff(lat_bins)/2,
        "longitude": lon_bins[:-1] + np.diff(lon_bins)/2,
        "time": monthly_time
    },
    attrs=ds.attrs,
)


# Resample time to monthly intervals
monthly_time = ds.time.resample(time="1M").mean().values

# Define the bin edges for latitude and longitude
lat_bins = np.linspace(-90, 90, 19)
lon_bins = np.linspace(-180, 180, 37)

# Bin the latitude and longitude values
lat_indices = np.digitize(ds.latitude.values, lat_bins)
lon_indices = np.digitize(ds.longitude.values, lon_bins)

# Initialize an empty array to store the binned CH4 values
n_altitudes = ds.height.shape[0]
n_lats = len(lat_bins) - 1
n_lons = len(lon_bins) - 1
n_time = 1
ch4_binned = np.empty((n_altitudes, n_lats, n_lons, n_time))
ch4_binned[:] = np.nan

# Loop over altitudes and fill in the binned CH4 values
for alt_idx in range(n_altitudes):
    ch4_alt = ds.target.values[alt_idx]
    for lat_idx in range(n_lats):
        for lon_idx in range(n_lons):
            mask = (lat_indices == lat_idx+1) & (lon_indices == lon_idx+1)
            if np.any(mask):
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nanmean(ch4_alt[mask])
            else:
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nan

# Create a new xarray dataset with the binned CH4 values
ds_binned = xr.Dataset(
    {"ch4": (["altitude", "latitude", "longitude", "time"], ch4_binned)},
    coords={
        "altitude": ds.height.values,
        "latitude": lat_bins[:-1] + np.diff(lat_bins)/2,
        "longitude": lon_bins[:-1] + np.diff(lon_bins)/2,
        "time": monthly_time
    },
    attrs=ds.attrs,
)


MissingDimensionsError: cannot set variable 'altitude' with 2-dimensional data without explicit dimension names. Pass a tuple of (dims, data) instead.

In [2]:
import numpy as np
import xarray as xr
import netCDF4 as nc

# Open the input NetCDF file for reading
input_file = '/Users/tobias/Downloads/Mipas/MIPAS-E_IMK.200411.V8R_CH4_261.nc'

# Create a new NetCDF file with the same dimensions and variables as the input file
output_file = 'MIPAS-E_IMK.200411.V8R_CH4_261_corr.nc'


toexclude = ['altitude', 'ExcludeVar2']

with nc.Dataset(input_file,'r') as src, nc.Dataset(output_file, "w") as dst:
    # copy global attributes all at once via dictionary
    dst.setncatts(src.__dict__)
    # copy dimensions
    for name, dimension in src.dimensions.items():
        dst.createDimension(
            name, (len(dimension) if not dimension.isunlimited() else None))
    # copy all file data except for the excluded
    for name, variable in src.variables.items():
        if name == 'altitude':
        # Rename the altitude variable to height
            x = dst.createVariable('height', variable.dtype, variable.dimensions)
            dst['height'][:] = src[name][:]
            dst['height'].setncatts(src[name].__dict__)
            x = dst.createVariable('altitude', variable.dtype, ('altitude',))
            dst[name][:] = np.nanmean(src[name][:], axis=1)
            dst['height'].setncatts(src['altitude'].__dict__)
        if name not in toexclude:
            x = dst.createVariable(name, variable.datatype, variable.dimensions)
            dst[name][:] = src[name][:]
#            dst[name].setncatts(src[name].__dict__)
            # copy variable attributes all at once via dictionary
            dst[name].setncatts(src[name].__dict__)
#            for attr in variable.ncattrs():
#                x.setncattr(attr, variable.getncattr(attr))

    ds = xr.open_dataset(xr.backends.NetCDF4DataStore(dst))

    ds.load()


    
    
# Resample time to monthly intervals
monthly_time = ds.time.resample(time="1M").mean().values

# Define the bin edges for latitude and longitude
lat_bins = np.linspace(-90, 90, 19)
lon_bins = np.linspace(-180, 180, 37)

# Bin the latitude and longitude values
lat_indices = np.digitize(ds.latitude.values, lat_bins)
lon_indices = np.digitize(ds.longitude.values, lon_bins)

# Initialize an empty array to store the binned CH4 values
n_altitudes = ds.height.shape[0]
n_lats = len(lat_bins) - 1
n_lons = len(lon_bins) - 1
n_time = 1
ch4_binned = np.empty((n_altitudes, n_lats, n_lons, n_time))
ch4_binned[:] = np.nan

# Loop over altitudes and fill in the binned CH4 values
for alt_idx in range(n_altitudes):
    ch4_alt = ds.target.values[alt_idx]
    for lat_idx in range(n_lats):
        for lon_idx in range(n_lons):
            mask = (lat_indices == lat_idx+1) & (lon_indices == lon_idx+1)
            if np.any(mask):
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nanmean(ch4_alt[mask])
            else:
                ch4_binned[alt_idx, lat_idx, lon_idx, 0] = np.nan

# Create a new xarray dataset with the binned CH4 values
ds_binned = xr.Dataset(
    {"ch4": (["altitude", "latitude", "longitude", "time"], ch4_binned)},
    coords={
        "altitude": ds.height.values,
        "latitude": lat_bins[:-1] + np.diff(lat_bins)/2,
        "longitude": lon_bins[:-1] + np.diff(lon_bins)/2,
        "time": monthly_time
    },
    attrs=ds.attrs,
)




MissingDimensionsError: cannot set variable 'altitude' with 2-dimensional data without explicit dimension names. Pass a tuple of (dims, data) instead.

In [None]:
ds

In [None]:
ds.target