<a href="https://colab.research.google.com/github/ameyagumaste/Python_NETCDF_Arctic/blob/main/merge_CDF_Files_Apply_CF_Conventions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Merge Arctic NetCDF Files and Apply CF Conventions

import xarray as xr
import numpy as np
import os
from datetime import datetime as dt

# 0. Create sample NetCDF files with simulated data to be merged
output_folder = './arctic_data/'
os.makedirs(output_folder, exist_ok=True)

latitude = [78.5, 79.0, 79.5]
longitude = [30.0, 31.0]
depth = [0, 10, 20]

time = np.datetime64("2022-01-01")
time_range = [time + np.timedelta64(i, 'D') for i in range(3)]

for i, t in enumerate(time_range):  # Create 3 sample files with time
    salinity = np.random.uniform(30, 35, size=(3, 3, 2))
    temperature = np.random.uniform(-2, 5, size=(3, 3, 2))
    chlorophyll = np.random.uniform(0, 2, size=(3, 3, 2))

    ds = xr.Dataset(
        data_vars={
            'salinity': (['depth', 'latitude', 'longitude'], salinity),
            'temperature': (['depth', 'latitude', 'longitude'], temperature),
            'chlorophyll_a': (['depth', 'latitude', 'longitude'], chlorophyll),
        },
        coords={
            'depth': depth,
            'latitude': latitude,
            'longitude': longitude,
            'time': t
        }
    )
    ds = ds.expand_dims('time')
    ds.to_netcdf(f"{output_folder}/sample_data_{i+1}.nc")

# 1. Define folder and list of NetCDF files to merge
nc_files = [f for f in os.listdir(output_folder) if f.endswith('.nc')]

# 2. Load and merge NetCDF datasets
merged_ds = xr.open_mfdataset([os.path.join(output_folder, f) for f in nc_files], combine='nested', concat_dim='time')

# 3. Apply CF-convention compliant attributes
merged_ds.attrs = {
    'title': 'Merged Arctic Observational Dataset',
    'summary': 'Combined NetCDF datasets for Arctic research with CF-compliant metadata.',
    'creator_name': 'Ameya Gumaste',
    'creator_email': 'ameyagumaste@gmail.com',
    'institution': 'UVSQ - Université Paris-Saclay',
    'date_created': dt.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
    'Conventions': 'ACDD-1.3, CF-1.8',
    'project': 'Arctic Climate Data Integration',
    'license': 'https://creativecommons.org/licenses/by/4.0/'
}

# Add attributes for common Arctic coordinates
if 'latitude' in merged_ds:
    merged_ds['latitude'].attrs = {
        'standard_name': 'latitude', 'long_name': 'Latitude', 'units': 'degrees_north', 'axis': 'Y'
    }
if 'longitude' in merged_ds:
    merged_ds['longitude'].attrs = {
        'standard_name': 'longitude', 'long_name': 'Longitude', 'units': 'degrees_east', 'axis': 'X'
    }
if 'depth' in merged_ds:
    merged_ds['depth'].attrs = {
        'standard_name': 'depth', 'long_name': 'Depth below sea surface', 'units': 'meters', 'positive': 'down', 'axis': 'Z'
    }
if 'time' in merged_ds:
    merged_ds['time'].attrs = {
        'standard_name': 'time', 'long_name': 'Time of observation'
    }

# Add variable-level CF attributes using a loop
for var in merged_ds.data_vars:
    if 'salinity' in var:
        merged_ds[var].attrs.update({
            'standard_name': 'sea_water_salinity',
            'long_name': 'Sea Water Salinity',
            'units': '1e-3',
            'valid_min': 0.0,
            'valid_max': 40.0,
            'coverage_content_type': 'physicalMeasurement'
        })
    elif 'temperature' in var:
        merged_ds[var].attrs.update({
            'standard_name': 'sea_water_temperature',
            'long_name': 'Sea Water Temperature',
            'units': 'degree_Celsius',
            'coverage_content_type': 'physicalMeasurement'
        })
    elif 'chlorophyll' in var:
        merged_ds[var].attrs.update({
            'standard_name': 'mass_concentration_of_chlorophyll_a_in_sea_water',
            'long_name': 'Chlorophyll-a Concentration',
            'units': 'ug m-3',
            'coverage_content_type': 'physicalMeasurement'
        })

# 4. Export the merged and formatted dataset
merged_ds.to_netcdf("merged_arctic_cf_dataset.nc")
print("Merged CF-compliant dataset saved as 'merged_arctic_cf_dataset.nc'")

# 5. Save a CSV demo from merged dataset
csv_df = merged_ds['salinity'].isel(time=0).to_dataframe().reset_index()
csv_df.to_csv("salinity_demo.csv", index=False)
print("Demo CSV saved as 'salinity_demo.csv'")



Merged CF-compliant dataset saved as 'merged_arctic_cf_dataset.nc'
Demo CSV saved as 'salinity_demo.csv'
