In [2]:
import xarray as xr
import glob
import numpy as np

In [4]:
file_paths=glob.glob('/Users/karolina/Desktop/oc/AREX2022netcdf/*.nc')

In [6]:
datasets=[xr.open_dataset(fp) for fp in file_paths]

In [8]:
print(datasets)

[<xarray.Dataset> Size: 19kB
Dimensions:    (time: 1, n_levels: 257)
Coordinates:
  * time       (time) datetime64[ns] 8B 2022-07-24T01:18:42
Dimensions without coordinates: n_levels
Data variables:
    latitude   (time) float64 8B ...
    longitude  (time) float64 8B ...
    pres       (time, n_levels) float64 2kB ...
    temp       (time, n_levels) float64 2kB ...
    cond       (time, n_levels) float64 2kB ...
    psal       (time, n_levels) float64 2kB ...
    fluo       (time, n_levels) float64 2kB ...
    oxy        (time, n_levels) float64 2kB ...
    oxysat     (time, n_levels) float64 2kB ...
    ptemp      (time, n_levels) float64 2kB ...
    sigmath    (time, n_levels) float64 2kB ...
Attributes: (12/25)
    title:                   IOPAN CTD data file
    abstract:                IOPAN CTD data file from the AREX cruise with RV...
    topiccategory:           oceans
    keywords:                Oceanography Pressure Temperature Conductivity S...
    activity_type:          

In [10]:
max_levels=max(ds.sizes.get('n_levels', 0) for ds in datasets)
print(max_levels)

3631


In [12]:
aligned_datasets=[
    ds.pad(n_levels=(0, max_levels-ds.sizes.get('n_levels', 0)), constant_values=np.nan)
    if 'n_levels' in ds.sizes else ds
    for ds in datasets
]

In [14]:
print(aligned_datasets)

[<xarray.Dataset> Size: 261kB
Dimensions:    (time: 1, n_levels: 3631)
Coordinates:
  * time       (time) datetime64[ns] 8B 2022-07-24T01:18:42
Dimensions without coordinates: n_levels
Data variables:
    latitude   (time) float64 8B ...
    longitude  (time) float64 8B ...
    pres       (time, n_levels) float64 29kB 2.0 3.0 4.0 5.0 ... nan nan nan nan
    temp       (time, n_levels) float64 29kB 8.239 8.24 8.248 ... nan nan nan
    cond       (time, n_levels) float64 29kB 35.99 35.99 36.0 ... nan nan nan
    psal       (time, n_levels) float64 29kB 34.53 34.53 34.53 ... nan nan nan
    fluo       (time, n_levels) float64 29kB 0.6613 1.091 1.078 ... nan nan nan
    oxy        (time, n_levels) float64 29kB 7.038 7.048 7.056 ... nan nan nan
    oxysat     (time, n_levels) float64 29kB 106.9 107.1 107.2 ... nan nan nan
    ptemp      (time, n_levels) float64 29kB 8.239 8.239 8.247 ... nan nan nan
    sigmath    (time, n_levels) float64 29kB 26.87 26.87 26.87 ... nan nan nan
Attributes: (

In [16]:
combined_ds=xr.concat(aligned_datasets, dim='station', join='outer')

In [18]:
selected_vars=['ptemp', 'psal', 'longitude', 'latitude']

In [20]:
filtered_datasets= [ds[selected_vars] for ds in aligned_datasets]

In [22]:
for i, ds in enumerate(datasets):
    ds=ds.assign_coords(station_name=i)
    aligned_datasets.append(ds)

In [24]:
print(combined_ds)

<xarray.Dataset> Size: 16GB
Dimensions:    (station: 251, time: 251, n_levels: 3631)
Coordinates:
  * time       (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-27T0...
Dimensions without coordinates: station, n_levels
Data variables:
    latitude   (station, time) float64 504kB nan nan nan nan ... nan nan nan nan
    longitude  (station, time) float64 504kB nan nan nan nan ... nan nan nan nan
    pres       (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    temp       (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    cond       (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    psal       (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    fluo       (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    oxy        (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    oxysat     (station, time, n_levels) float64 2GB nan nan nan ... nan nan nan
    ptemp      (station, t

In [26]:
station_names=[
    ds.attrs.get('station_name', f"Station_{i+1}") for i, ds in enumerate(aligned_datasets)
]


In [28]:
print(station_names)

['Z1', 'N0', 'K7', 'WB11', 'V3', 'O-2', 'S11', 'EX9', 'Z15', 'H9', 'EX2', 'K1', 'N-6', 'NB4', 'V26', 'Y1', 'V19', 'EB2-10P', 'EB2-4P', 'O8', 'V13', 'Y15', 'K17', 'N4', 'S8', 'Z11', 'EB2-1P', 'O-7', 'S15', 'K-3', 'N-10', 'H13', 'EX4P', 'Z5', 'WB7', 'PhM3', 'N2', 'K11', 'H18', 'HV3', 'V9', 'WBY1', 'S5', 'Y5', 'H3', 'NB8P', 'V30', 'O4', 'EB2-6P', 'EB2-12P', 'EB2-2', 'Z12', 'O-6', 'S16', 'V21', 'N-11', 'H12', 'EX4', 'Z6', 'WB6', 'N1P', 'K10', 'H19', 'HV3t', 'V8', 'WB16', 'S4', 'Y4', 'H2', 'WB1', 'V31', 'O5', 'EB2-12', 'EB2-7', 'Z2', 'N-1', 'K6', 'WB10', 'V2', 'O-1', 'S12', 'Z15', 'EX10', 'H8', 'EX1', 'K0', 'N-7', 'NB5', 'V27', 'Y10', 'V18', 'EB2-5', 'EB2-10', 'V38', 'V12', 'Y16', 'K16', 'N4up', 'S7P', 'HV2', 'S17', 'Z9', 'EB2-2P', 'EB2-7P', 'EB2-14', 'O2', 'S1', 'S9', 'S0', 'S8P', 'HV1', 'S18', 'EB2-3', 'Z10', 'EB2-13', 'EB2-8', 'O3', 'N-4', 'K3', 'WB15', 'V7', 'V17', 'Y11', 'K13', 'N3', 'V24', 'NB2', 'H17', 'EX7P', 'Y9', 'H4', 'WB4', 'V34', 'V25', 'NB3', 'H16', 'EX7', 'Y8', 'H7', 'WB5', '

In [94]:
combined_ds['station_name']=('station', station_names)

In [36]:
combined_ds=xr.concat(filtered_datasets, dim='station')

In [98]:
combined_ds.to_netcdf('/users/karolina/desktop/seccombined.nc')

In [100]:
print(combined_ds.dims)



In [44]:
station_names=station_names[:len(filtered_datasets)]

In [46]:
filtered_datasets=[
    ds[selected_vars].assign_coords(station_name=station_names[i])
    for i, ds in enumerate(filtered_datasets)
]

In [50]:
combined_ds=xr.concat(filtered_datasets, dim='station')

In [52]:
combined_ds=combined_ds.assign_coords(station=('station', station_names))

In [56]:
output_file='/users/karolina/desktop/combined_data.nc'
combined_ds.to_netcdf(output_file)

In [58]:
file_path='/users/karolina/desktop/combined_data.nc'
ds=xr.open_dataset(file_path)

In [60]:
print(ds)

<xarray.Dataset> Size: 4GB
Dimensions:       (station: 251, time: 251, n_levels: 3631)
Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  (station) <U7 7kB ...
  * station       (station) <U7 7kB 'Z1' 'N0' 'K7' 'WB11' ... 'V33' 'Y6' 'H5'
Dimensions without coordinates: n_levels
Data variables:
    ptemp         (station, time, n_levels) float64 2GB ...
    psal          (station, time, n_levels) float64 2GB ...
    longitude     (station, time) float64 504kB ...
    latitude      (station, time) float64 504kB ...
Attributes: (12/25)
    title:                   IOPAN CTD data file
    abstract:                IOPAN CTD data file from the AREX cruise with RV...
    topiccategory:           oceans
    keywords:                Oceanography Pressure Temperature Conductivity S...
    activity_type:           Cruise
    conventions:             CF-1.0
    ...                      ...
    Author_name:             Agnieszka Beszczyns

In [92]:
print(ds.coords)
print(ds.dims)

Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  (station) <U7 7kB ...
  * station       (station) <U7 7kB 'Z1' 'N0' 'K7' 'WB11' ... 'V33' 'Y6' 'H5'


In [62]:
print(ds.data_vars)

Data variables:
    ptemp      (station, time, n_levels) float64 2GB ...
    psal       (station, time, n_levels) float64 2GB ...
    longitude  (station, time) float64 504kB ...
    latitude   (station, time) float64 504kB ...


In [68]:
ds_station_k2=ds.sel(station='K-2')
print(ds_station_k2)

<xarray.Dataset> Size: 15MB
Dimensions:       (time: 251, n_levels: 3631)
Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  <U7 28B ...
    station       <U7 28B 'K-2'
Dimensions without coordinates: n_levels
Data variables:
    ptemp         (time, n_levels) float64 7MB ...
    psal          (time, n_levels) float64 7MB ...
    longitude     (time) float64 2kB ...
    latitude      (time) float64 2kB ...
Attributes: (12/25)
    title:                   IOPAN CTD data file
    abstract:                IOPAN CTD data file from the AREX cruise with RV...
    topiccategory:           oceans
    keywords:                Oceanography Pressure Temperature Conductivity S...
    activity_type:           Cruise
    conventions:             CF-1.0
    ...                      ...
    Author_name:             Agnieszka Beszczynska-Möller
    Author_email:            abesz@iopan.pl
    distribution_statement:  These data are public and 

In [70]:
print(ds_station_k2['ptemp'])

<xarray.DataArray 'ptemp' (time: 251, n_levels: 3631)> Size: 7MB
[911381 values with dtype=float64]
Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  <U7 28B ...
    station       <U7 28B 'K-2'
Dimensions without coordinates: n_levels
Attributes:
    short_name:     potential temperature
    long_name:      Ocean potential temperature (ITS-90 deg.C)
    standard_name:  sea_water_potential_temperature
    units:          degrees Celsius


In [76]:
datasets=[xr.open_dataset(fp) for fp in file_path]


  engine = plugins.guess_engine(filename_or_obj)
  engine = plugins.guess_engine(filename_or_obj)
  engine = plugins.guess_engine(filename_or_obj)


ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html

In [78]:
print(datasets)

[<xarray.Dataset> Size: 19kB
Dimensions:    (time: 1, n_levels: 257)
Coordinates:
  * time       (time) datetime64[ns] 8B 2022-07-24T01:18:42
Dimensions without coordinates: n_levels
Data variables:
    latitude   (time) float64 8B ...
    longitude  (time) float64 8B ...
    pres       (time, n_levels) float64 2kB 2.0 3.0 4.0 ... 256.0 257.0 258.0
    temp       (time, n_levels) float64 2kB 8.239 8.24 8.248 ... 2.657 2.658
    cond       (time, n_levels) float64 2kB 35.99 35.99 36.0 ... 31.38 31.38
    psal       (time, n_levels) float64 2kB 34.53 34.53 34.53 ... 34.88 34.88
    fluo       (time, n_levels) float64 2kB 0.6613 1.091 1.078 ... 0.1213 0.1217
    oxy        (time, n_levels) float64 2kB 7.038 7.048 7.056 ... 6.693 6.693
    oxysat     (time, n_levels) float64 2kB 106.9 107.1 107.2 ... 89.33 89.33
    ptemp      (time, n_levels) float64 2kB 8.239 8.239 8.247 ... 2.642 2.643
    sigmath    (time, n_levels) float64 2kB 26.87 26.87 26.87 ... 27.82 27.82
Attributes: (12/25)
    

In [102]:
file_path='/users/karolina/desktop/seccombined.nc'
ds=xr.open_dataset(file_path)

In [104]:
print(ds)

<xarray.Dataset> Size: 4GB
Dimensions:       (station: 251, time: 251, n_levels: 3631)
Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  (station) <U7 7kB ...
  * station       (station) <U7 7kB 'Z1' 'N0' 'K7' 'WB11' ... 'V33' 'Y6' 'H5'
Dimensions without coordinates: n_levels
Data variables:
    ptemp         (station, time, n_levels) float64 2GB ...
    psal          (station, time, n_levels) float64 2GB ...
    longitude     (station, time) float64 504kB ...
    latitude      (station, time) float64 504kB ...
Attributes: (12/25)
    title:                   IOPAN CTD data file
    abstract:                IOPAN CTD data file from the AREX cruise with RV...
    topiccategory:           oceans
    keywords:                Oceanography Pressure Temperature Conductivity S...
    activity_type:           Cruise
    conventions:             CF-1.0
    ...                      ...
    Author_name:             Agnieszka Beszczyns

In [113]:
selected_vars=['ptemp', 'psal', 'longitude', 'latitude']
combined_ds_selected=combined_ds[selected_vars]
print(combined_ds_selected)

<xarray.Dataset> Size: 4GB
Dimensions:       (station: 251, time: 251, n_levels: 3631)
Coordinates:
  * time          (time) datetime64[ns] 2kB 2022-06-20T10:48:54 ... 2022-07-2...
    station_name  (station) <U7 7kB 'Z1' 'N0' 'K7' 'WB11' ... 'V33' 'Y6' 'H5'
  * station       (station) <U7 7kB 'Z1' 'N0' 'K7' 'WB11' ... 'V33' 'Y6' 'H5'
Dimensions without coordinates: n_levels
Data variables:
    ptemp         (station, time, n_levels) float64 2GB nan nan nan ... nan nan
    psal          (station, time, n_levels) float64 2GB nan nan nan ... nan nan
    longitude     (station, time) float64 504kB nan nan nan nan ... nan nan nan
    latitude      (station, time) float64 504kB nan nan nan nan ... nan nan nan
Attributes: (12/25)
    title:                   IOPAN CTD data file
    abstract:                IOPAN CTD data file from the AREX cruise with RV...
    topiccategory:           oceans
    keywords:                Oceanography Pressure Temperature Conductivity S...
    activity_type: 

In [119]:
for ds in aligned_datasets:
    if 'n_levels' not in ds.coords:
        ds.coords['n_levels']= np.arange(0, max_levels)

ValueError: conflicting sizes for dimension 'n_levels': length 3631 on 'n_levels' and length 257 on {'time': 'time', 'n_levels': 'pres'}

In [121]:
print(aligned_datasets[0]. dims)

