In [1]:
import numpy as np
import xarray as xr
from netCDF4 import Dataset
import os

def combine_netcdf_files(path, output_file, exclude=None):
    file_list = [path + "/" + filename for filename in os.listdir(path)]
    file_list.sort()
    # Open the first file to get dimensions and variables
    first_file = Dataset(file_list[0], 'r')
    dimensions = {dim: len(first_file.dimensions[dim]) for dim in first_file.dimensions}
    variables = first_file.variables
    dimensions['time'] = len(file_list)
    # Create the output NetCDF file
    with Dataset(output_file, 'w', format='NETCDF4') as output:
        # Create dimensions
        for dim_name, dim_size in dimensions.items():
            output.createDimension(dim_name, dim_size)

        output.createVariable('lat', np.float32,('lat',))
        output.createVariable('lon', np.float32,('lon',))

        output.variables['lat'][:] = variables['lat'][:]
        output.variables['lon'][:] = variables['lon'][:]
        
        # Combine data from all input files
        for var_name, var in variables.items():
            if var_name in exclude:
                continue
            print(var)
            arr = []
            for file_path in file_list:
                print(file_path)
                src = xr.open_dataset(file_path)
                arr.append(src[var_name].values)
            arr = np.array(arr)
            arr = arr.astype(np.float32)
            concatenated = np.transpose(arr, (1, 2, 0))
            print(concatenated)
            dtype = var.dtype
            print(np.sum(np.isnan(concatenated)))
            dimensions = var.dimensions
            output.createVariable(var_name, np.float32, ('lat', 'lon', 'time'))
            output.variables[var_name][:] = concatenated

In [2]:
combine_netcdf_files("STSData/MODIS-SST", "MODIS_combined_SST_monthly.nc", exclude = ["lat", "lon", "palette"])

<class 'netCDF4._netCDF4.Variable'>
uint8 qual_sst(lat, lon)
    long_name: Quality Levels, Sea Surface Temperature
    _FillValue: 255
    missing_value: 255
unlimited dimensions: 
current shape = (720, 1440)
filling off
STSData/MODIS-SST/AQUA_MODIS.20110901_20110930.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20111001_20111031.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20111101_20111130.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20111201_20111231.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120101_20120131.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120201_20120229.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120301_20120331.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120401_20120430.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120501_20120531.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120601_20120630.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120701_20120731.L3m.MO.SST.x_sst.nc
STSData/MODIS-SST/AQUA_MODIS.20120801_201

In [4]:
combine_netcdf_files("STSData/SSS", "OISSS_combined_SSS_monthly.nc", exclude = ["latitude", "longitude", "time"])

ValueError: cannot find dimension lat in this group or parent groups

In [170]:
from ascat_averaged import ASCATAveraged

def read_data(filename):
    dataset = ASCATAveraged(filename, missing=missing)
    if not dataset.variables: sys.exit('file not found')
    return dataset

def get_uv(dataset):
    wspd = dataset.variables['windspd'][:,:]
    wdir = dataset.variables['winddir'][:,:]
    
    from bytemaps import get_uv
    u,v = get_uv(wspd,wdir)
    bad = np.where(wspd<0)
    u[bad] = -999
    v[bad] = -999
    return u, v

def create_nc(path, output_file):
    file_list = [path + "/" + filename for filename in os.listdir(path)]
    file_list.sort()
    first_file = read_data(file_list[0])
    with Dataset(output_file, 'w', format='NETCDF4') as output:
        # Create dimensions
        output.createDimension('longitude', 1440)
        output.createDimension('lat', 720)
        output.createDimension('time', 720)

        output.createVariable('lat', np.float32,('lat',))
        output.createVariable('longitude', np.float32,('longitude',))

        output.variables['lat'][:] = first_file.variables['lat']
        output.variables['longitude'][:] = first_file.variables['longitude']
        
        u_agg = []
        v_agg = []
        for file_path in file_list:
            print(file_path)
            src = read_data(file_path)
            u, v = get_uv(src)
            u_agg.append(u)
            v_agg.append(v)
        u_agg = np.array(u_agg)
        v_agg = np.array(v_agg)
        u_final = np.transpose(u_agg, (1, 2, 0))
        v_final = np.transpose(v_agg, (1, 2, 0))
        output.createVariable(var_name, np.float32, ('lat', 'longitude', 'time'))
        output.createVariable(var_name, np.float32, ('lat', 'longitude', 'time'))
        output.variables[var_name][:] = concatenated

In [165]:
create_nc("STSData/ASCAT-windraw", "ASCAT_combined_wind_monthly.nc")

STSData/ASCAT-windraw/ascat_201109_v02.1.gz
STSData/ASCAT-windraw/ascat_201110_v02.1.gz
STSData/ASCAT-windraw/ascat_201111_v02.1.gz
STSData/ASCAT-windraw/ascat_201112_v02.1.gz
STSData/ASCAT-windraw/ascat_201201_v02.1.gz
STSData/ASCAT-windraw/ascat_201202_v02.1.gz
STSData/ASCAT-windraw/ascat_201203_v02.1.gz
STSData/ASCAT-windraw/ascat_201204_v02.1.gz
STSData/ASCAT-windraw/ascat_201205_v02.1.gz
STSData/ASCAT-windraw/ascat_201206_v02.1.gz
STSData/ASCAT-windraw/ascat_201207_v02.1.gz
STSData/ASCAT-windraw/ascat_201208_v02.1.gz
STSData/ASCAT-windraw/ascat_201209_v02.1.gz
STSData/ASCAT-windraw/ascat_201210_v02.1.gz
STSData/ASCAT-windraw/ascat_201211_v02.1.gz
STSData/ASCAT-windraw/ascatb_201212_v02.1.gz
STSData/ASCAT-windraw/ascatb_201301_v02.1.gz
STSData/ASCAT-windraw/ascatb_201302_v02.1.gz
STSData/ASCAT-windraw/ascatb_201303_v02.1.gz
STSData/ASCAT-windraw/ascatb_201304_v02.1.gz
STSData/ASCAT-windraw/ascatb_201305_v02.1.gz
STSData/ASCAT-windraw/ascatb_201306_v02.1.gz
STSData/ASCAT-windraw/asc