# Converting Slocum data to a standard DataFrame

In [1]:
from IPython.lib.pretty import pprint
import logging
logger = logging.getLogger('gutils')
logger.handlers = [logging.StreamHandler()]
logger.setLevel(logging.DEBUG)

In [3]:
import sys
from pathlib import Path

# Just a hack to be able to `import gutils`
sys.path.append(str(Path('.').absolute().parent.parent))

binary_folder = Path('.').absolute().parent.parent / 'gutils' / 'tests' / 'resources' / 'slocum' / 'real' / 'binary'

In [4]:
bass_binary = binary_folder / 'bass-20160909T1733'
!ls $bass_binary

8e6d1b16.cac		    usf-bass-2016-252-1-23.tbd
991560ed.cac		    usf-bass-2016-252-1-24.sbd
da485e91.cac		    usf-bass-2016-252-1-24.tbd
usf-bass-2016-252-0-0.tbd   usf-bass-2016-252-1-2.sbd
usf-bass-2016-252-1-0.sbd   usf-bass-2016-252-1-2.tbd
usf-bass-2016-252-1-0.tbd   usf-bass-2016-252-1-3.sbd
usf-bass-2016-252-1-10.sbd  usf-bass-2016-252-1-3.tbd
usf-bass-2016-252-1-10.tbd  usf-bass-2016-252-1-4.sbd
usf-bass-2016-252-1-11.sbd  usf-bass-2016-252-1-4.tbd
usf-bass-2016-252-1-11.tbd  usf-bass-2016-252-1-5.sbd
usf-bass-2016-252-1-12.sbd  usf-bass-2016-252-1-5.tbd
usf-bass-2016-252-1-12.tbd  usf-bass-2016-252-1-6.sbd
usf-bass-2016-252-1-13.sbd  usf-bass-2016-252-1-6.tbd
usf-bass-2016-252-1-13.tbd  usf-bass-2016-252-1-7.sbd
usf-bass-2016-252-1-14.sbd  usf-bass-2016-252-1-7.tbd
usf-bass-2016-252-1-14.tbd  usf-bass-2016-252-1-8.sbd
usf-bass-2016-252-1-15.sbd  usf-bass-2016-252-1-8.tbd
usf-bass-2016-252-1-15.tbd  usf-bass-2016-252-1-9.sbd
usf-bass-2016-252-1-16.sbd  usf-bass-2016-252-1-9.tbd
u

# SlocumMerger

Convert binary (*.bd) files into ASCII

### Merge a subset of binary files

If you know the flight/science pair you wish to merge

In [4]:
import tempfile
from gutils.slocum import SlocumMerger

ascii_output = tempfile.mkdtemp()

merger = SlocumMerger(
    str(bass_binary),
    ascii_output,
    globs=[
        'usf-bass-2016-252-1-12.sbd',
        'usf-bass-2016-252-1-12.tbd'
    ]
)

# The merge results contain a reference to the new produced ASCII file
# as well as which binary files were involved in its creation
merge_results = merger.convert()

Converted usf-bass-2016-252-1-12.sbd,usf-bass-2016-252-1-12.tbd to usf_bass_2016_252_1_12_sbd.dat


### Merge all files in a directory

This matches science and flight files together

In [5]:
merger = SlocumMerger(
    str(bass_binary),
    ascii_output,
)

# The merge results contain a reference to the new produced ASCII file as well as what binary files went into it.
merge_results = merger.convert()

Converted usf-bass-2016-252-1-0.sbd,usf-bass-2016-252-1-0.tbd to usf_bass_2016_252_1_0_sbd.dat
Converted usf-bass-2016-252-1-10.sbd,usf-bass-2016-252-1-10.tbd to usf_bass_2016_252_1_10_sbd.dat
Converted usf-bass-2016-252-1-11.sbd,usf-bass-2016-252-1-11.tbd to usf_bass_2016_252_1_11_sbd.dat
Converted usf-bass-2016-252-1-12.sbd,usf-bass-2016-252-1-12.tbd to usf_bass_2016_252_1_12_sbd.dat
Converted usf-bass-2016-252-1-13.sbd,usf-bass-2016-252-1-13.tbd to usf_bass_2016_252_1_13_sbd.dat
Converted usf-bass-2016-252-1-14.sbd,usf-bass-2016-252-1-14.tbd to usf_bass_2016_252_1_14_sbd.dat
Converted usf-bass-2016-252-1-15.sbd,usf-bass-2016-252-1-15.tbd to usf_bass_2016_252_1_15_sbd.dat
Converted usf-bass-2016-252-1-16.sbd,usf-bass-2016-252-1-16.tbd to usf_bass_2016_252_1_16_sbd.dat
Converted usf-bass-2016-252-1-17.sbd,usf-bass-2016-252-1-17.tbd to usf_bass_2016_252_1_17_sbd.dat
Converted usf-bass-2016-252-1-18.sbd,usf-bass-2016-252-1-18.tbd to usf_bass_2016_252_1_18_sbd.dat
Converted usf-bass-2016

### What does the ASCII file look like?

In [6]:
ascii_file = merge_results[0]['ascii']
!cat $ascii_file

dbd_label: DBD_ASC(dinkum_binary_data_ascii)file
encoding_ver: 2
num_ascii_tags: 14
all_sensors: 0
filename: usf-bass-2016-252-1-0
the8x3_filename: 02470000
filename_extension: sbd
filename_label: usf-bass-2016-252-1-0-sbd(02470000)
mission_name: SLOPE.MI
fileopen_time: Fri_Sep__9_13:40:04_2016
sensors_per_cycle: 33
num_label_lines: 3
num_segments: 1
segment_filename_0: usf-bass-2016-252-1-0
c_heading c_wpt_lat m_altitude m_avg_speed m_ballast_pumped m_battery m_battpos m_depth m_depth_rate m_gps_lat m_gps_lon m_heading m_lat m_leakdetect_voltage m_lon m_mission_avg_speed_climbing m_mission_avg_speed_diving m_pitch m_present_time m_roll m_vacuum m_vehicle_temp m_water_depth m_water_vx m_water_vy sci_bbfl2s_bb_scaled sci_bbfl2s_cdom_scaled sci_bbfl2s_chlor_scaled sci_m_present_time sci_oxy3835_oxygen sci_water_cond sci_water_pressure sci_water_temp 
rad lat m m/s cc volts in m m/s lat lon rad lat volts lon m/s m/s rad timestamp rad inHg degC m m/s m/s nodim ppb ug/l timestamp nodim s/m 

# SlocumReader

### Load the ASCII file into a pandas DataFrame

In [7]:
import json
from gutils.slocum import SlocumReader

slocum_data = SlocumReader(ascii_file)
print('Mode:  ', slocum_data.mode)
print('ASCII: ', slocum_data.ascii_file)
print('Headers: ', json.dumps(slocum_data.metadata, indent=4))

Mode:   rt
ASCII:  /tmp/tmp1klm6yhu/usf_bass_2016_252_1_0_sbd.dat
Headers:  {
    "dbd_label": "DBD_ASC(dinkum_binary_data_ascii)file",
    "encoding_ver": "2",
    "num_ascii_tags": "14",
    "all_sensors": "0",
    "filename": "usf-bass-2016-252-1-0",
    "the8x3_filename": "02470000",
    "filename_extension": "sbd",
    "filename_label": "usf-bass-2016-252-1-0-sbd(02470000)",
    "mission_name": "SLOPE.MI",
    "fileopen_time": "Fri_Sep__9_13:40:04_2016",
    "sensors_per_cycle": "33",
    "num_label_lines": "3",
    "num_segments": "1",
    "segment_filename_0": "usf-bass-2016-252-1-0"
}


In [8]:
slocum_data.data.columns.tolist()

['c_heading',
 'c_wpt_lat',
 'm_altitude',
 'm_avg_speed',
 'm_ballast_pumped',
 'm_battery',
 'm_battpos',
 'm_depth',
 'm_depth_rate',
 'm_gps_lat',
 'm_gps_lon',
 'm_heading',
 'm_lat',
 'm_leakdetect_voltage',
 'm_lon',
 'm_mission_avg_speed_climbing',
 'm_mission_avg_speed_diving',
 'm_pitch',
 'm_present_time',
 'm_roll',
 'm_vacuum',
 'm_vehicle_temp',
 'm_water_depth',
 'm_water_vx',
 'm_water_vy',
 'sci_bbfl2s_bb_scaled',
 'sci_bbfl2s_cdom_scaled',
 'sci_bbfl2s_chlor_scaled',
 'sci_m_present_time',
 'sci_oxy3835_oxygen',
 'sci_water_cond',
 'sci_water_pressure',
 'sci_water_temp']

In [9]:
slocum_data.data.head(20)[[
    'sci_m_present_time',
    'm_depth',
    'm_gps_lat',
    'm_gps_lon',
    'sci_water_pressure',
    'sci_water_temp'
]]

Unnamed: 0,sci_m_present_time,m_depth,m_gps_lat,m_gps_lon,sci_water_pressure,sci_water_temp
0,,0.452354,2821.1215,-8017.0038,,
1,,0.187865,,,,
2,,0.246208,,,,
3,,,2821.1215,-8017.0038,,
4,,0.05951,,,,
5,,0.012835,,,,
6,,0.168417,,,,
7,,0.246208,,,,
8,,0.40568,,,,
9,1473428000.0,,,,0.0,0.0


### Standardize into a glider-independent DataFrame

* Lossless (adds columns)
* Common axis names
* Common variable names used in computations of density, salinity, etc.
* Interpolates GPS coordinates
* Converts to decimal degrees
* Calcualtes depth from pressure if available
* Calculates pressure from depth if need be
* Calculates density and salinity

In [10]:
standard = slocum_data.standardize()
# Which columns were added?
set(standard.columns).difference(slocum_data.data.columns)

{'conductivity',
 'density',
 'drv_m_gps_lat',
 'drv_m_gps_lon',
 'pressure',
 'salinity',
 't',
 'temperature',
 'u_orig',
 'v_orig',
 'x',
 'y',
 'z'}

In [11]:
standard.head(20)[[
    't',
    'z',
    'y',
    'x',
    'pressure',
    'temperature'
]]

Unnamed: 0,t,z,y,x,pressure,temperature
0,2016-09-09 13:39:20.610660,,28.352025,-80.283397,,
1,2016-09-09 13:40:12.190980,,28.352025,-80.283397,,
2,2016-09-09 13:40:26.168790,,28.352025,-80.283397,,
3,2016-09-09 13:40:31.282230,,28.352025,-80.283397,,
4,2016-09-09 13:40:52.248500,,28.352071,-80.283521,,
5,2016-09-09 13:41:07.697850,,28.352104,-80.28361,,
6,2016-09-09 13:41:12.875520,,28.352115,-80.283639,,
7,2016-09-09 13:41:18.172790,,28.352128,-80.283674,,
8,2016-09-09 13:41:23.478300,,28.352139,-80.283704,,
9,2016-09-09 13:41:24.495510,0.0,28.352141,-80.28371,0.0,0.0


## Now the data can (should) be able to be compared and manipulated in the same way as other glider data