In [1]:
import xarray as xr
import numpy as np

# Path to the incorrect NetCDF file
incorrect_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/corrected_chelsa/tas_chelsa.nc"

# Path to the raster file for coordinates
raster_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/pr/processed/CHELSA_pr_01_1979_V.2.1.tif"

# Open the incorrect NetCDF file
incorrect_ds = xr.open_dataset(incorrect_file_path)

# Open the raster file to get latitude and longitude information
raster_ds = xr.open_rasterio(raster_file_path)

# Create a new dataset with the correct format
correct_ds = xr.Dataset()

# Copy variables from the incorrect dataset to the correct dataset
correct_ds['time'] = incorrect_ds['time']
correct_ds['tas'] = xr.DataArray((incorrect_ds['data'].values.astype(float)/10)-273.15, coords=[correct_ds['time'], raster_ds['y'], raster_ds['x']], dims=['time', 'lat', 'lon'])
correct_ds=correct_ds.where(correct_ds['tas']!= np.min(correct_ds['tas'].values))
# Add attributes to the variables
correct_ds['tas'].attrs['long_name'] = "Monthly Mean Air Temperature"
correct_ds['tas'].attrs['units'] = "Celsius (scaled by a factor of 10)"
 #correct_ds['tas'].attrs['_FillValue'] =  -273.15  # Adjusted fill value

correct_ds['lat'] = raster_ds['y']
correct_ds['lon'] = raster_ds['x']

correct_ds['lat'].attrs['long_name'] = "latitude"
correct_ds['lat'].attrs['units'] = "degrees_north"

correct_ds['lon'].attrs['long_name'] = "longitude"
correct_ds['lon'].attrs['units'] = "degrees_east"

correct_ds['time'].attrs['units'] = "months since 1979-01-01 00:00:00"
correct_ds['time'].attrs['calendar'] = "proleptic_gregorian"


correct_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/tas_chelsa_converted.nc"
correct_ds.to_netcdf(correct_file_path)

# Close the datasets
incorrect_ds.close()
raster_ds.close()


  raster_ds = xr.open_rasterio(raster_file_path)


In [2]:

import xarray as xr
import numpy as np

# Path to the incorrect NetCDF file
incorrect_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/corrected_chelsa/pr_chelsa.nc"

# Path to the raster file for coordinates
raster_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/pr/processed/CHELSA_pr_01_1979_V.2.1.tif"

# Open the incorrect NetCDF file
incorrect_ds = xr.open_dataset(incorrect_file_path)

# Open the raster file to get latitude and longitude information
raster_ds = xr.open_rasterio(raster_file_path)

# Create a new dataset with the correct format
correct_ds = xr.Dataset()

# Copy variables from the incorrect dataset to the correct dataset
correct_ds['time'] = incorrect_ds['time']
correct_ds['pr'] = xr.DataArray(incorrect_ds['data']/100, coords=[correct_ds['time'], raster_ds['y'], raster_ds['x']], dims=['time', 'lat', 'lon'])
#correct_ds=correct_ds.where(correct_ds['pr']!= 0.01)
# Add attributes to the variables
correct_ds['pr'].attrs['long_name'] = "Monthly Total Precipitation"
correct_ds['pr'].attrs['units'] = "mm per month"
correct_ds['pr'].attrs['_FillValue'] =  np.nan  # Adjusted fill value

correct_ds['lat'] = raster_ds['y']
correct_ds['lon'] = raster_ds['x']

correct_ds['lat'].attrs['long_name'] = "latitude"
correct_ds['lat'].attrs['units'] = "degrees_north"

correct_ds['lon'].attrs['long_name'] = "longitude"
correct_ds['lon'].attrs['units'] = "degrees_east"

correct_ds['time'].attrs['units'] = "months since 1979-01-01 00:00:00"
correct_ds['time'].attrs['calendar'] = "proleptic_gregorian"


correct_file_path = "/media/duilio/8277-C610/OGGM/insumos/CHELSA/pr_chelsa_converted.nc"
correct_ds.to_netcdf(correct_file_path)

# Close the datasets
incorrect_ds.close()
raster_ds.close()


  raster_ds = xr.open_rasterio(raster_file_path)


In [12]:
import xarray as xr
import numpy as np
import pandas as pd
# Path to the incorrect NetCDF file
incorrect_file_path = "/media/duilio/8277-C610/OGGM/insumos/GCM_BH5/CSIRO4_tas_monthly.nc"

# Open the incorrect NetCDF file
incorrect_ds = xr.open_dataset(incorrect_file_path)

# Add attributes to the variables
incorrect_ds['tas'].attrs['long_name'] = "Monthly Mean Air Temperature"
incorrect_ds['tas'].attrs['units'] = "Celsius"


incorrect_ds['lat'].attrs['long_name'] = "latitude"
incorrect_ds['lat'].attrs['units'] = "degrees_north"

incorrect_ds['lon'].attrs['long_name'] = "longitude"
incorrect_ds['lon'].attrs['units'] = "degrees_east"

incorrect_ds['time'].attrs['units'] = "months since 2020-01-31 00:00:00"
incorrect_ds['time'].attrs['calendar'] = "proleptic_gregorian"


correct_file_path = "/media/duilio/8277-C610/OGGM/insumos/GCM_BH5/input_cluster/CSIRO4_tas_monthly.nc"

incorrect_ds.to_netcdf(correct_file_path)
# Close the datasets
incorrect_ds.close()



ValueError: failed to prevent overwriting existing key units in attrs on variable 'time'. This is probably an encoding field used by xarray to describe how a variable is serialized. To proceed, remove this key from the variable's attributes manually.

In [26]:
def netcdf_attributes_correction(incorrect_file_path,output_file,variable):
    import xarray as xr
    # Open the incorrect NetCDF file
    incorrect_ds = xr.open_dataset(incorrect_file_path)
    # Add attributes to the variables
    if variable=='tas':
        incorrect_ds[variable].attrs['long_name'] = "Monthly Mean Air Temperature"
        incorrect_ds[variable].attrs['units'] = "Celsius"
    elif variable=='prcp':
        # Rename the 'prcp' variable to 'pr'
        incorrect_ds['pr'] = incorrect_ds['prcp'].copy()
        incorrect_ds = incorrect_ds.drop_vars('prcp')

        # Create a new 'pr' variable with the desired attributes
        pr_attrs = {
            'long_name': "Monthly Total Precipitation",
            'units': "mm per month",
            '_FillValue': np.nan
        }

        # Create a new variable with the desired attributes
        pr_variable = xr.DataArray(incorrect_ds['pr'], attrs=pr_attrs)

        # Replace the old 'pr' variable with the new one
        incorrect_ds['pr'] = pr_variable
    else: 
        print('wrong variable select prcp or tas')       
    # Longitude and latitude attributes
    incorrect_ds['lat'].attrs['long_name'] = "latitude"
    incorrect_ds['lat'].attrs['units'] = "degrees_north"
    incorrect_ds['lon'].attrs['long_name'] = "longitude"
    incorrect_ds['lon'].attrs['units'] = "degrees_east"

    # Check if 'units' key already exists in the 'time' variable's attributes
    time_attrs = incorrect_ds['time'].attrs.copy()
    time_attrs['units'] = "months since 2020-01-31 00:00:00"
    time_attrs['calendar'] = "proleptic_gregorian"
    incorrect_ds = incorrect_ds.drop_vars('time')
    # Add the 'time' variable back with the modified attributes
    incorrect_ds['time'] = xr.DataArray(np.arange(len(incorrect_ds['time'])),
                                        dims='time',
                                        attrs=time_attrs)
    incorrect_ds['time'].attrs['units'] = "months since 2020-01-31 00:00:00"
    incorrect_ds['time'].attrs['calendar'] = "proleptic_gregorian"
    incorrect_ds.to_netcdf(output_file)
    # Close the datasets
    incorrect_ds.close()

incorrect_file_path = "/media/duilio/8277-C610/OGGM/insumos/GCM_BH5/MIROC_prcp_monthly.nc"
output_file= "/media/duilio/8277-C610/OGGM/insumos/GCM_BH5/input_cluster/MIROC_pr_monthly.nc"
variable='prcp'

netcdf_attributes_correction(incorrect_file_path,output_file,variable)

In [17]:
variable='tas'
incorrect_ds[variable]