In [6]:
import xarray as xr
import numpy as np
import glob

In [8]:
file_path='/users/karolina/desktop/oc/AREX2022netcdf/*.nc'

In [10]:
file_list=glob.glob(file_path)

In [12]:
datasets=[xr.open_dataset(fp) for fp in file_list]

In [13]:
print(len(datasets))

251


In [16]:
max_levels = max(ds.sizes.get('n_levels', 0) for ds in datasets)
print("Max number of n_levels:", max_levels)

Max number of n_levels: 3631


In [42]:
aligned_datasets=[
    ds.pad(n_levels=(0, max_levels-ds.sizes["n_levels"]), constant_values=np.nan)
    if "n_levels" in ds.sizes else ds
    for ds in datasets
]
print(aligned_datasets[:5])

[<xarray.Dataset> Size: 261kB
Dimensions:    (time: 1, n_levels: 3631)
Coordinates:
  * time       (time) datetime64[ns] 8B 2022-07-24T01:18:42
Dimensions without coordinates: n_levels
Data variables:
    latitude   (time) float64 8B 78.17
    longitude  (time) float64 8B 10.99
    pres       (time, n_levels) float64 29kB 2.0 3.0 4.0 5.0 ... nan nan nan nan
    temp       (time, n_levels) float64 29kB 8.239 8.24 8.248 ... nan nan nan
    cond       (time, n_levels) float64 29kB 35.99 35.99 36.0 ... nan nan nan
    psal       (time, n_levels) float64 29kB 34.53 34.53 34.53 ... nan nan nan
    fluo       (time, n_levels) float64 29kB 0.6613 1.091 1.078 ... nan nan nan
    oxy        (time, n_levels) float64 29kB 7.038 7.048 7.056 ... nan nan nan
    oxysat     (time, n_levels) float64 29kB 106.9 107.1 107.2 ... nan nan nan
    ptemp      (time, n_levels) float64 29kB 8.239 8.239 8.247 ... nan nan nan
    sigmath    (time, n_levels) float64 29kB 26.87 26.87 26.87 ... nan nan nan
Attribute

In [20]:
print(len(aligned_datasets))

251


In [22]:
variables = ['psal', 'ptemp', 'latitude', 'longitude']


selected_data = []

for ds in aligned_datasets:
    try:
        if 'station_name' in ds:
            station_name=ds['station_name'].values
        elif 'station_name' in ds.attrs:
            station_name=ds.attrs['station_name']
            
        station_data = {
            'station_name': station_name,
            'latitude': ds['latitude'].values if 'latitude' in ds else np.nan,
            'longitude': ds['longitude'].values if 'longitude' in ds else np.nan,
            'psal': ds['psal'].values if 'psal' in ds else np.nan,
            'ptemp': ds['ptemp'].values if 'ptemp' in ds else np.nan
        }
        selected_data.append(station_data)
    except Exception as e:
        print(f"Data processing error: {e}")



In [38]:
for i, data in enumerate(selected_data[:5]): #For first 5 stations
    print(data)

{'station_name': 'Z1', 'latitude': array([78.17433333]), 'longitude': array([10.9905]), 'psal': array([[34.5255, 34.5256, 34.5275, ...,     nan,     nan,     nan]]), 'ptemp': array([[8.2388, 8.2394, 8.2472, ...,    nan,    nan,    nan]])}
{'station_name': 'N0', 'latitude': array([76.50316667]), 'longitude': array([11.0115]), 'psal': array([[34.9856, 34.9866, 34.9885, ...,     nan,     nan,     nan]]), 'ptemp': array([[6.8736, 6.8905, 6.8492, ...,    nan,    nan,    nan]])}
{'station_name': 'K7', 'latitude': array([74.99983333]), 'longitude': array([13.177]), 'psal': array([[34.9499, 34.9497, 34.9498, ...,     nan,     nan,     nan]]), 'ptemp': array([[7.9576, 7.953 , 7.9505, ...,    nan,    nan,    nan]])}
{'station_name': 'WB11', 'latitude': array([80.48166667]), 'longitude': array([12.1655]), 'psal': array([[32.568 , 32.5677, 32.5667, ...,     nan,     nan,     nan]]), 'ptemp': array([[1.8288, 1.8178, 1.7556, ...,    nan,    nan,    nan]])}
{'station_name': 'V3', 'latitude': array([7

In [32]:
for i, data in enumerate(selected_data[:5]): #For first 5 station
    print(f"Stacja {i+1}:")
    print(f"  Nazwa: {data['station_name']}")
    print(f"  Szerokość geograficzna: {data['latitude'][0]}")
    print(f"  Długość geograficzna: {data['longitude'][0]}")
    print(f"  Salinity (psal): {data['psal'][0][:5]} ...")  #Only first 5 values
    print(f"  Potential Temperature (ptemp): {data['ptemp'][0][:5]} ...") #Only first 5 values
    print("-" * 50)

Stacja 1:
  Nazwa: Z1
  Szerokość geograficzna: 78.17433333333334
  Długość geograficzna: 10.9905
  Salinity (psal): [34.5255 34.5256 34.5275 34.5296 34.5335] ...
  Potential Temperature (ptemp): [8.2388 8.2394 8.2472 8.2509 8.2638] ...
--------------------------------------------------
Stacja 2:
  Nazwa: N0
  Szerokość geograficzna: 76.50316666666667
  Długość geograficzna: 11.0115
  Salinity (psal): [34.9856 34.9866 34.9885 34.9877 34.9853] ...
  Potential Temperature (ptemp): [6.8736 6.8905 6.8492 6.8708 6.9006] ...
--------------------------------------------------
Stacja 3:
  Nazwa: K7
  Szerokość geograficzna: 74.99983333333333
  Długość geograficzna: 13.177
  Salinity (psal): [34.9499 34.9497 34.9498 34.9495 34.9495] ...
  Potential Temperature (ptemp): [7.9576 7.953  7.9505 7.9462 7.9446] ...
--------------------------------------------------
Stacja 4:
  Nazwa: WB11
  Szerokość geograficzna: 80.48166666666667
  Długość geograficzna: 12.1655
  Salinity (psal): [32.568  32.5677 3

In [71]:
#stations beginning with 'K'
filtered_data = [data for data in selected_data if data['station_name'].startswith('K')]
#total number 
print(f"Number of stations with names beginning with 'K': {len(filtered_data)}")

for i, data in enumerate(filtered_data):
    print(f"Stacja {i+1}: {data['station_name']}")

Number of stations with names beginning with 'K': 22
Stacja 1: K7
Stacja 2: K1
Stacja 3: K17
Stacja 4: K-3
Stacja 5: K11
Stacja 6: K10
Stacja 7: K6
Stacja 8: K0
Stacja 9: K16
Stacja 10: K3
Stacja 11: K13
Stacja 12: K2
Stacja 13: K12
Stacja 14: K9
Stacja 15: K5
Stacja 16: K-1
Stacja 17: K15
Stacja 18: K4
Stacja 19: K-2
Stacja 20: K14
Stacja 21: K8
Stacja 22: K18


In [69]:
for i, data in enumerate(filtered_data[:5]):  #For first 5 stations 
    print(f"Stacja {i+1}: {data['station_name']}")
    print(f"  Latitude: {data['latitude'] if 'latitude' in data and data['latitude'] is not None else 'Brak danych'}")
    print(f"  Longitude: {data['longitude'] if 'longitude' in data and data['longitude'] is not None else 'Brak danych'}")
    print(f"  psal: {data['psal'] if 'psal' in data and data['psal'] is not None else 'Brak danych'}")
    print(f"  ptemp: {data['ptemp'] if 'ptemp' in data and data['ptemp'] is not None else 'Brak danych'}")
    print("-" * 50)


Stacja 1: K7
  Latitude: [74.99983333]
  Longitude: [13.177]
  psal: [[34.9499 34.9497 34.9498 ...     nan     nan     nan]]
  ptemp: [[7.9576 7.953  7.9505 ...    nan    nan    nan]]
--------------------------------------------------
Stacja 2: K1
  Latitude: [74.99916667]
  Longitude: [16.07733333]
  psal: [[34.9098 34.9105 34.9104 ...     nan     nan     nan]]
  ptemp: [[8.0409 8.0418 8.0401 ...    nan    nan    nan]]
--------------------------------------------------
Stacja 3: K17
  Latitude: [74.99966667]
  Longitude: [3.9985]
  psal: [[34.9912 34.9911 34.9916 ...     nan     nan     nan]]
  ptemp: [[5.8268 5.8283 5.8291 ...    nan    nan    nan]]
--------------------------------------------------
Stacja 4: K-3
  Latitude: [74.99783333]
  Longitude: [17.98566667]
  psal: [[34.8817 34.8831 34.8834 ...     nan     nan     nan]]
  ptemp: [[6.7795 6.7994 6.8014 ...    nan    nan    nan]]
--------------------------------------------------
Stacja 5: K11
  Latitude: [74.99933333]
  Longit