[Jupyter Book](https://geo-smart.github.io/oceanography/intro.html) and [GitHub repo](https://github.com/geo-smart/oceanography).


# Data Loader


In the event that the data referenced in this book is not available: This notebook has sections to load it in.


## Shallow Profiler Oregon Slope Base January 2022

In [1]:
import netCDF4
import xarray as xr
import s3fs
from   shallowprofiler import *
from   charts import *

fs = s3fs.S3FileSystem(anon=True)

streamlist = fs.listdir('ooi-data', detail = False)

profiles = ReadProfileMetadata()

# 'streamlist' as the last line of the cell will list out the streams; organized by site and instrument
# For more details on the nomenclature run the following cell.


Jupyter Notebook running Python 3


In [2]:
def InstrumentBreakout(s):
    print('Field breakout:', s[9:11], s[11:13], s[13:15], s[15:17], s[18:20], s[20:22], s[22:23], s[24:26], \
          s[27:32], s[32:33], s[33:36], s[46:])

print('List Oregon Slope Base Profiler streams:\n')
for s in streamlist:
    if 'SF01A' in s: print(s)
print()
print('Translation:')
print('  CTDPF / ctdpf                 CTD')
print('  PHSEN / phsen                 pH')
print('  FLORT / flort                 Fluorometer { FDOM, Chlor-A, Backscatter }')
print('  OPTAA / optaa                 Spectrophotometer') 
print('  PARAD / parad                 PAR')
print('  SPKIR / spkir                 Spectral Irradiance')
print('  NUTNR / nutnr_a_dark_sample   Nitrate: Dark sample (explain please)')
print('  NUTNR / nutnr_a_sample        Nitrate: ...')
print('  VELPT / velpt                 Velocity (current) ambiguous: on SCIP or affixed to platform?')
print('  PCO2W / pco2w                 pCO2')
print()

stream_choice = 15       # the 15th element in the file list corresponds to the Shallow Profiler, Oregon Slope Base site

print()
print('Full stream name:', streamlist[stream_choice])
print()
InstrumentBreakout(streamlist[stream_choice])
print()
print('The first field has CE for Coastal Endurance or RS for Regional Cabled Array.')
print('The Oregon Offshore site is a CE site; the other two are RS sites.')
print('Fields 5, 6 and 7 give us shallow profiler site and choice of profiler or platform.')
print()
indenter = 4
print(' '*indenter + 'PC 01 B --> Oregon Offshore 200m Platform')
print(' '*indenter + 'SF 01 B --> Oregon Offshore Profiler')
print(' '*indenter + 'PC 01 A --> Oregon Slope Base 200m Platform')
print(' '*indenter + 'SF 01 A --> Oregon Slope Base Profiler')
print(' '*indenter + 'PC 03 A --> Axial Base 200m Platform')
print(' '*indenter + 'SF 03 A --> Axial Base Profiler')
print()
print('Non-shallow-profiler examples:')
print('  DP O3 A is the Axial Base Deep Profiler')
print('  LJ 01 A is the Oregon Slope Base Seafloor')
print()

List Oregon Slope Base Profiler streams:

ooi-data/RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample
ooi-data/RS01SBPS-SF01A-2D-PHSENA101-streamed-phsen_data_record
ooi-data/RS01SBPS-SF01A-3A-FLORTD101-streamed-flort_d_data_record
ooi-data/RS01SBPS-SF01A-3B-OPTAAD101-streamed-optaa_sample
ooi-data/RS01SBPS-SF01A-3C-PARADA101-streamed-parad_sa_sample
ooi-data/RS01SBPS-SF01A-3D-SPKIRA101-streamed-spkir_data_record
ooi-data/RS01SBPS-SF01A-4A-NUTNRA101-streamed-nutnr_a_dark_sample
ooi-data/RS01SBPS-SF01A-4A-NUTNRA101-streamed-nutnr_a_sample
ooi-data/RS01SBPS-SF01A-4B-VELPTD102-streamed-velpt_velocity_data
ooi-data/RS01SBPS-SF01A-4F-PCO2WA101-streamed-pco2w_a_sami_data_record

Translation:
  CTDPF / ctdpf                 CTD
  PHSEN / phsen                 pH
  FLORT / flort                 Fluorometer { FDOM, Chlor-A, Backscatter }
  OPTAA / optaa                 Spectrophotometer
  PARAD / parad                 PAR
  SPKIR / spkir                 Spectral Irradiance
  NUTNR / nutnr_

In [3]:
def loadData(stream_name):
    fs = s3fs.S3FileSystem(anon=True)
    zarr_store = fs.get_mapper(stream_name)
    ds = xr.open_zarr(zarr_store, consolidated=True)
    return ds

def ShallowProfilerDataReduce(ds, t0, t1, keepers, rename):
    """
    From a zarr Dataset for a shallow profiler stream ('ds') write a NetCDF file with just 
    time/sensor-value/depth over a time period typically no longer than a month.
      - check that the dimension and coordinate is 'time'
      - drop or rename data vars
      - drop attributes
    """
    if not len(ds.dims) == 1: 
        for dim in ds.dims:
            if not dim == 'time':
                ds = ds.drop_dims(dim)
        if not len(ds.dims) == 1: return False, "Failed to drop non-time dimensions"
    if not list(dict(ds.dims))[0] == 'time': 
        return False, "Dataset dim is not 'time' (as assumed)"
    if not len(ds.coords) == 1: 
        return False, "Dataset coords count is not 1 (as assumed)"
    if not list(dict(ds.coords))[0] == 'time': 
        return False, "Dataset coord is not 'time' (as assumed)"
    
    ds = ds.sel(time=slice(t0, t1))
    
    for s in ds.data_vars:
        if not s in keepers: ds = ds.drop(s)                              # drop extraneous data vars
    for s in ds.data_vars: ds = ds.rename({s:rename[keepers.index(s)]})   # rename the others
    a = [s for s in ds.attrs]
    for p in a: ds.attrs.pop(p)
    return ds, 'looks ok'

In [4]:
osb_profiler_streams = [sname for sname in streamlist if 'SF01A' in sname]

for s in osb_profiler_streams: 
    if 'ctdpf' in s: print('CTD:', s)
    if 'velpt' in s: print('Current:', s)
    
print("There are", len(osb_profiler_streams), "Oregon Slope Base profiler streams")

CTD: ooi-data/RS01SBPS-SF01A-2A-CTDPFA102-streamed-ctdpf_sbe43_sample
Current: ooi-data/RS01SBPS-SF01A-4B-VELPTD102-streamed-velpt_velocity_data
There are 10 Oregon Slope Base profiler streams


### Go through all 10 osb profiler streams in sequence

In this order: 

- ctdpf
- phsen
- flort
- optaa
- parad
- spkir
- nutnr_a_dark_sample
- nutnr_a_sample
- velpt
- pco2w


#### 1 of 10: **ctdpf** i.e. CTD

In [None]:
for s in osb_profiler_streams: 
    if 'ctdpf' in s: 
        print('Found CTD:', s)
        stream_ctd = s
        break
        
ds = loadData(stream_ctd)                             # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

We pull five sensor time series datasets from the CTD stream. These are named data variables and
they will be renamed using the table found in the **Data** chapter. Each sensor dataset also contains
time (as dimension/coordinate) and depth (as a second data variable). 
Depth is the `sea_water_pressure` data variable renamed. 

```
corrected_dissolved_oxygen              do              depth  
sea_water_density                       density         depth                     
sea_water_electrical_conductivity       conductivity    depth       
sea_water_practical_salinity            salinity        depth
sea_water_temperature                   temp            depth
```

In [None]:
t0, t1 = '2022-01-01T00', '2022-01-31T23'
ds_do, reply1           = ShallowProfilerDataReduce(ds, t0, t1, ['corrected_dissolved_oxygen', 'sea_water_pressure'], ['do', 'depth'])
ds_density, reply2      = ShallowProfilerDataReduce(ds, t0, t1, ['sea_water_density', 'sea_water_pressure'], ['density', 'depth'])
ds_conductivity, reply3 = ShallowProfilerDataReduce(ds, t0, t1, ['sea_water_electrical_conductivity', 'sea_water_pressure'], ['conductivity', 'depth'])
ds_salinity, reply4    = ShallowProfilerDataReduce(ds, t0, t1, ['sea_water_practical_salinity', 'sea_water_pressure'], ['salinity', 'depth'])
ds_temp, reply5        = ShallowProfilerDataReduce(ds, t0, t1, ['sea_water_temperature', 'sea_water_pressure'], ['temp', 'depth'])

if False: 
    print(reply1)
    print(reply2)
    print(reply3)
    print(reply4)
    print(reply5)

if False: 
    print(ds_do)
    print(ds_density)
    print(ds_conductivity)
    print(ds_salinity)
    print(ds_temp)

ds_do.to_netcdf('./data/rca/sensors/osb/do_jan_2022.nc') 
ds_density.to_netcdf('./data/rca/sensors/osb/density_jan_2022.nc') 
ds_conductivity.to_netcdf('./data/rca/sensors/osb/conductivity_jan_2022.nc') 
ds_salinity.to_netcdf('./data/rca/sensors/osb/salinity_jan_2022.nc') 
ds_temp.to_netcdf('./data/rca/sensors/osb/temp_jan_2022.nc') 

In [None]:
ds_temp = xr.open_dataset('./data/rca/sensors/osb/temp_jan_2022.nc')

# temperature: ascent versus descent
fig,axs = ChartTwoSensors(profiles, [ranges['temperature'], ranges['temperature']], [0], 
                          ds_temp.temp, -ds_temp.depth,
                          'T-Ascent',   colors['temperature'],    'ascent',
                          ds_temp.temp, -ds_temp.depth,
                          'T-Descent',  'green',                  'descent', 6, 4)

#### 2 of 10: **phsen** i.e. pH

In [None]:
instrument_key = 'phsen'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

We pull one sensor time series dataset from this stream. See the CTD section above
and the Data chapter. The data variable of interest is `ph_seawater` which will be
renamed ph. Depth will be a rename of the `int_ctd_pressure` data variable. 
This stream has multiple dimensions so there is a preliminary step to isolate
just time, ph_seawater and int_ctd_pressure. 

In [None]:
t0, t1 = '2022-01-01T00', '2022-01-31T23'
ds_ph, reply = ShallowProfilerDataReduce(ds, t0, t1, ['ph_seawater', 'int_ctd_pressure'], ['ph', 'depth'])
print(reply)
print(ds_ph)

ds_ph.to_netcdf('./data/rca/sensors/osb/ph_jan_2022.nc') 

In [None]:
ds_ph = xr.open_dataset('./data/rca/sensors/osb/ph_jan_2022.nc')

# ph: is measured on descent
fig, axes = ChartSensor(profiles, ranges['ph'], [3, 8, 12, 17], ds_ph.ph, -ds_ph.depth, 'profile pH', 'black', 'descent', 6, 4)

#### 3 of 10: **flort** i.e. fluorometer: Chlor-A, FDOM, particulate backscatter

In [None]:
instrument_key = 'flort'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

We pull three sensor time series datasets from this stream. See the CTD section above
and the Data chapter. Data variables: 

```
fluorometric_cdom           >      fdom, depth
fluorometric_chlorophyll_a  >      chlora, depth
optical_backscatter         >      backscatter, depth
```

Depth from `int_ctd_pressure`.

In [None]:
t0, t1 = '2022-01-01T00', '2022-01-31T23'
ds_fdom, reply        = ShallowProfilerDataReduce(ds, t0, t1, ['fluorometric_cdom', 'int_ctd_pressure'], ['fdom', 'depth'])
ds_chlora, reply      = ShallowProfilerDataReduce(ds, t0, t1, ['fluorometric_chlorophyll_a', 'int_ctd_pressure'], ['chlora', 'depth'])
ds_backscatter, reply = ShallowProfilerDataReduce(ds, t0, t1, ['optical_backscatter', 'int_ctd_pressure'], ['backscatter', 'depth'])

ds_fdom.to_netcdf('./data/rca/sensors/osb/fdom_jan_2022.nc')
ds_chlora.to_netcdf('./data/rca/sensors/osb/chlora_jan_2022.nc')
ds_backscatter.to_netcdf('./data/rca/sensors/osb/backscatter_jan_2022.nc')

ds_fdom        = xr.open_dataset('./data/rca/sensors/osb/fdom_jan_2022.nc')
ds_chlora      = xr.open_dataset('./data/rca/sensors/osb/chlora_jan_2022.nc')
ds_backscatter = xr.open_dataset('./data/rca/sensors/osb/backscatter_jan_2022.nc')

In [None]:
fig, axes = ChartSensor(profiles, ranges['fdom'],        [0], ds_fdom.fdom,               -ds_fdom.depth,        'profile fdom', 'black', 'ascent', 6, 4)
fig, axes = ChartSensor(profiles, ranges['chlora'],      [0], ds_chlora.chlora,           -ds_chlora.depth,      'profile chlora', 'black', 'ascent', 6, 4)
fig, axes = ChartSensor(profiles, ranges['backscatter'], [0], ds_backscatter.backscatter, -ds_backscatter.depth, 'profile backscatter', 'black', 'ascent', 6, 4)

#### 4 of 10: **optaa** i.e. spectrophotometer 2 signals x 83 channels

In [None]:
instrument_key = 'optaa'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

...paused here: This will require unique code to retain the wavelength dimension...

#### 5 of 10: **parad** i.e. PAR (photosynthetically available radiation)

In [None]:
instrument_key = 'parad'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

Found this instrument stream: ooi-data/RS01SBPS-SF01A-3C-PARADA101-streamed-parad_sa_sample


#### 6 of 10: **spkir** i.e. spectral irradiance

In [None]:
instrument_key = 'spkir'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

#### 7 of 10: **nutnr_a_dark_sample** i.e. nitrate explain dark

In [None]:
instrument_key = 'nutnr_a_dark_sample'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

#### 8 of 10: **nutnr_a_sample** i.e. nitrate

In [None]:
instrument_key = 'nutnr_a_sample'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

#### 9 of 10: **velpt** i.e. current velocity

In [None]:
instrument_key = 'velpt'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument

#### 10 of 10: **pco2w** i.e. pCO2

In [None]:
instrument_key = 'pco2w'
for s in osb_profiler_streams: 
    if instrument_key in s: 
        print('Found this instrument stream:', s)
        instrument_stream = s
        break
        
ds = loadData(instrument_stream)                      # lazy load
t0, t1 = '2022-01-01T00', '2022-12-31T23'             # January 2022
ds = ds.sel(time=slice(t0, t1))                       # Subset the full time range to one month
print(ds.time[0], '              ', ds.time[-1])      # verify selected one month time range
ds                                                    # get a 'data variable' list of sensors/metadata for this instrument