Valid for ping 0.28.1 (10/01/2025)

## Data access advanced concepts

In [1]:
%matplotlib widget
import numpy as np
import themachinethatgoesping as pingmachine
import time
from matplotlib import pyplot as plt
from tqdm.auto import tqdm

### 1. Advanced use of find_files()

In [2]:
# find_files() searches the input folder recursively (aka, including subfolders)
folder = r'C:\Users\Schimel_Alexandre\Data\MBES\Kongsberg all'
files = pingmachine.echosounders.index_functions.find_files(folder, [".all","wcd"])
print(len(files))

Found 482 files
482


In [3]:
# find_files() can use a list of folders as input
folders = []
folders.append(r'C:\Users\Schimel_Alexandre\Data\MBES\Kongsberg all\EM302')
folders.append(r'C:\Users\Schimel_Alexandre\Data\MBES\Kongsberg all\EM710')
folders.append(r'C:\Users\Schimel_Alexandre\Data\MBES\Kongsberg all\EM2040')
files = pingmachine.echosounders.index_functions.find_files(folders, [".all","wcd"])
print(len(files))

Found 335 files
335


In [4]:
# pairs of files (e.g. .all and .wcd) don't have to be in the same folders. 
# Remember they are only paired later, by a File Handler

### 2. Advanced use of File Handler
#### 2.1 Indexing

In [5]:
folder = r"C:\Users\Schimel_Alexandre\Data\MBES\Kongsberg all\EM2040\KV-Meritaito_2024_EM2040_Sorvest-F-1-1-extracts-WCD"
files = pingmachine.echosounders.index_functions.find_files(folder, [".all","wcd"])
print(len(files))

# A file handler indexes the datagrams in the files, which can take a while
start_time = time.time()
fileHandler = pingmachine.echosounders.kongsbergall.KongsbergAllFileHandler(files)
end_time = time.time()
print(f"Execution time: {end_time-start_time} seconds")

Found 8 files
8
indexing files ⠐ 100% [00m:00s<00m:00s] [.._20240427_083531.all (1/8)]                              
indexing files ⠠ 100% [00m:00s<00m:00s] [.._20240430_140406.wcd (8/8)]                              
indexing files ⢀ 100% [00m:00s<00m:00s] [Found: 7463 datagrams in 8 files (258MB)]                                          
Initializing ping interface ⠄ 75% [00m:00s<00m:00s] [Done]                                              
Execution time: 0.22498559951782227 seconds


In [6]:
# If you know you may reuse those files in the future, you can save the index to a file on your machine.
# This is called caching and will speed up future calls of a file handler with the same files.

# First, get names of cached index from the names of the files, using the function get_index_paths()
index_files = pingmachine.echosounders.index_functions.get_index_paths(file_paths=files)
# The cached index do not exist yet, but you can request the file handler to save them using the index_paths argument
start_time = time.time()
fileHandler = pingmachine.echosounders.kongsbergall.KongsbergAllFileHandler(files, index_paths=index_files)
end_time = time.time()
print(f"Execution time (from scratch): {end_time-start_time} seconds")

# Notice that a new "index" folder was created, containing the cached indexes

indexing files ⠄ 99% [00m:00s<00m:00s] [Found: 7463 datagrams in 8 files (258MB)]                                          
Initializing ping interface ⢀ 75% [00m:00s<00m:00s] [Done]                                              
Execution time (from scratch): 0.1719512939453125 seconds


In [7]:
# Now, next time you want to read the same files, you can request the file handler to use the cached index
start_time = time.time()
fileHandler = pingmachine.echosounders.kongsbergall.KongsbergAllFileHandler(files, index_paths=index_files)
end_time = time.time()
print(f"Execution time (from cached index): {end_time-start_time} seconds")

indexing files ⢀ 99% [00m:00s<00m:00s] [Found: 7463 datagrams in 8 files (258MB)]                                          
Initializing ping interface ⢀ 75% [00m:00s<00m:00s] [Done]                                              
Execution time (from cached index): 0.0710000991821289 seconds


### 3. Advanced access of Pings

In [8]:
# Remember you use the function get_pings() to get a ping container from a file handler
pingContainer = fileHandler.get_pings()

In [9]:
# Access an individual ping in a Ping Container by indexing
ping42 = pingContainer[42]
type(ping42)

themachinethatgoesping.echosounders_cppy.kongsbergall.filetypes.KongsbergAllPing

In [10]:
#  This approach only works for a single ping. If you index a Ping Container for multiple pings, you will get a (smaller) Ping Container object
pings0To42 = pingContainer[0:42]
type(pings0To42)

themachinethatgoesping.echosounders_cppy.kongsbergall.filedatacontainers.KongsbergAllPingContainer

In [11]:
# So, if you want a list of Ping objects, you need to loop over each element of the Ping Container
pings0To42 = []
for i in range(42):
    pings0To42.append(pingContainer[i])
print(f"pings0To42 is now a {type(pings0To42)} where each element is a {type(pings0To42[0])}")

pings0To42 is now a <class 'list'> where each element is a <class 'themachinethatgoesping.echosounders_cppy.kongsbergall.filetypes.KongsbergAllPing'>


In [12]:
# But the real value of a Ping Container is for fast selection of pings based on some criteria

# For example, let's select only pings containing watercolumn data
pingsWithWC = pingmachine.pingprocessing.filter_pings.by_features(pingContainer,['watercolumn.amplitudes'])
print(f"pingsWithWC is a {type(pingsWithWC)} where each element is a {type(pingsWithWC[0])}")

pingsWithWC is a <class 'list'> where each element is a <class 'themachinethatgoesping.echosounders_cppy.kongsbergall.filetypes.KongsbergAllPing'>


### 4. Access data in datagrams

In [13]:
# Use the datagram_interface of a file handler to access the datagrams in the file
print(fileHandler.datagram_interface.datagrams())

DatagramContainer
#################
-
Time info (Datagrams) 
------------------------ 
- Start time: 27/04/2024 08:35:30.79 
- End time:   30/04/2024 14:05:32.41 
- Sorted:     no                     

 Contained datagrams 
---------------------        
- Total:                                       7463 
- Datagrams [PUIDOutput]:                      8    [30]
- Datagrams [PUStatusOutput]:                  40   [31]
- Datagrams [AttitudeDatagram]:                39   [41]
- Datagrams [ClockDatagram]:                   42   [43]
- Datagrams [SurfaceSoundSpeedDatagram]:       1    [47]
- Datagrams [InstallationParametersStart]:     8    [49]
- Datagrams [RawRangeAndAngle]:                836  [4e]
- Datagrams [PositionDatagram]:                39   [50]
- Datagrams [RuntimeParameters]:               14   [52]
- Datagrams [SoundSpeedProfileDatagram]:       8    [55]
- Datagrams [XYZDatagram]:                     836  [58]
- Datagrams [SeabedImageData]:                 836  [59]
- Datagra

In [14]:
# datagram_interface.datagrams() is an iterator so you can loop over all datagrams with:
for d in tqdm(fileHandler.datagram_interface.datagrams()):
    pass

# or loop over specific datagram types with
for d in tqdm(fileHandler.datagram_interface.datagrams("RuntimeParameters")):
    pass

  0%|          | 0/7463 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

In [15]:
# index to access a specific datagram
d  = fileHandler.datagram_interface.datagrams("RuntimeParameters")[0]
print(d)

RuntimeParameters
#################
- bytes:               52       
- stx:                 0x02     
- datagram_identifier: 0x52     [RuntimeParameters]
- model_number:        EM2040   [2040]
- date:                20240427 [YYYYMMDD]
- time_since_midnight: 30931461 [ms]

 date/time 
-----------  
- timestamp: 1714.207e⁶   [s]
- date:      27/04/2024   [MM/DD/YYYY]
- time:      08:35:31.461 [HH:MM:SS]

 datagram content 
------------------             
- ping_counter:                         57744      
- system_serial_number:                 333        
- operator_station_status:              0          
- processing_unit_status:               0          
- bsp_status:                           0          
- sonar_head_or_transceiver_status:     0          
- mode:                                 0b10000001 
- filter_identifier:                    0b10100110 
- minimum_depth:                        10         [m]
- maximum_depth:                        150        [m]
- absorption_coe

In [16]:
# and use "get_" methods to get data of interest from a datagram
print(f"Model number: {d.get_model_number()}")
print(f"Mode: {d.get_mode()}")
print(f"System serial number: {d.get_system_serial_number()}")
print(f"Transmit pulse length (raw): {d.get_transmit_pulse_length()}")
print(f"Transmit pulse length (in sec): {d.get_transmit_pulse_length_in_seconds()}")

Model number: 2040
Mode: 129
System serial number: 333
Transmit pulse length (raw): 200
Transmit pulse length (in sec): 0.00019999999494757503


In [17]:
# Note you can also access runtime parameters applying to a given ping
ping = fileHandler.get_pings()[0]
rp = ping.file_data.get_runtime_parameters()
print(f"Transmit pulse length (in sec) for this ping: {d.get_transmit_pulse_length_in_seconds()}")

Transmit pulse length (in sec) for this ping: 0.00019999999494757503
