### SBD Stitcher Program for Popup Buoys

Allows the retrieval of a specific file but not fully implemented wget

__pyversion__==3.7 ***tested***  
__author__==S.Bell

In [2]:
source_url = 'http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/'

***Unix command line for cronjobs***

`wget http://eclipse.pmel.noaa.gov/sbd/data/ecofoci.popupsbd/ -np -r -nc -R "index.html*"`

### Test reassembly of files

In [3]:
import os
import glob #python >= 3.7

import datetime

import pandas as pd

In [4]:
# read hex incoded binary
def HexView(file):
    with open(file, 'rb') as in_file:
        while True:
            hexdata = in_file.read().hex().upper()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(hexdata.upper())

# convert identifying bytes for processing
def HexIdent(hexstr):
    if hexstr[0:4] == '0001':
        message_source = 'summary.txt'
    elif hexstr[0:4] == '0002':
        message_source = 'prodat.txt'
    elif hexstr[0:4] == '0003':
        message_source = 'icedat.txt'
    elif hexstr[0:4] == '0004':
        message_source = 'botdat.txt'
    elif hexstr[0:4] == 'FFFE':
        message_source = 'sstdat.txt'        
    else:
        message_source = 'image{}.txt'.format(int(hexstr[0:4],16))        
        
    return(message_source)        
        
def Summary(hexstr):
    '''SUMMARY.TXT'''
    
    PositionInFile = int(hexstr[4:12],16)
    ProfileDataSize = int(hexstr[12:20],16)
    IceDataSize = int(hexstr[20:28],16) 
    BottomDataSize = int(hexstr[28:36],16)
    NumberofImages = int(hexstr[36:40],16)
    
    return({'PositionInFile':PositionInFile, 'ProfileDataSize':ProfileDataSize, 
            'IceDataSize': IceDataSize, 'BottomDataSize':BottomDataSize, 'NumberofImages':NumberofImages})

def Data(hexstr):
    ''' PRODAT.TXT
        ICEDAT.TXT
        SSTDAT.TXT
        BOTDAT.TXT
    '''
    
    PositionInFile = int(hexstr[4:12],16)
    Data = hexstr[12:]
    
    return(PositionInFile, Data)

def Image(file):
    '''{IMAGENUMER}.JPG'''
    
    with open(file, 'rb') as in_file:
        while True:
            header = in_file.read(8).hex()
            hexdata = in_file.read()     # Read the shortest possible line
            if len(hexdata) == 0:                      # breaks loop once no more binary data is read
                break
            
            return(header,hexdata)
    



## A list of pop-up units that transmitted data from the field
#### *year deployed* - *imei number* - *site: SBD Active/Inactive* 
 - 2018 - 300434063921240 - C2: Inactive  <br>
 - 2018 - 300434063823800 - C10/C11: Inactive  <br>
 - 2018 - 300434063928220 - C12: Inactive  <br>
 - 2018 - 300434063925210 - M5: Inactive  <br>
 <br>
 - 2019 - 300434063470010 - S.W. of M5: Inative  <br>
 - 2019 - 300434063477010 - N.W. of M5: Inactive  <br>
 - 2019 - 300434063861360 - N.E. of Saint Lawrence: Inactive  <br>
 - 2019 - 300434063474010 - M8 PopTop: Inactive  <br>
 <br>
 - 2020 - 300434063479200 - C2: Active  <br>
 - 2020 - 300434063924230 - C12: Active  <br>

In [5]:
#list id's in download path
root_path = os.path.join("..", "rawdata", "ecofoci.popupsbd")

id_dir = os.listdir(root_path)
print(id_dir)

['300434063470010', '300434063470020', '300434063471040', '300434063471670', '300434063471680', '300434063472010', '300434063472030', '300434063472040', '300434063473020', '300434063474010', '300434063477010', '300434063477030', '300434063479010', '300434063479200', '300434063663610', '300434063682460', '300434063684440', '300434063684770', '300434063764860', '300434063823800', '300434063826810', '300434063861360', '300434063862560', '300434063863000', '300434063863350', '300434063863550', '300434063921240', '300434063924230', '300434063925210', '300434063928220']


<div class="alert alert-block alert-success">
<b>INPUT:</b> COPY THE IMEI OF INTEREST FROM ABOVE AND PASTE IT IN THE id_dir=[' imei# '] BELOW. INPUT THE YEAR THE DATA WAS TRANSMITTED AND THE YEAR THE FLOAT WAS DEPLOYED. 
</div>

# *INPUT*

In [6]:
# INPUT the imei number of the unit of interest. 
# Can list multiple, separated by a comma (as long as year of data transmission and deployment are the same)
id_dir=['300434063473020']

# INPUT the year of data transmission you are interested in looking at
tx = '2021'

# INPUT the year the float was deployed
year = '2020'

# the result path points to where result outputs should be saved
results = (os.path.join('..', 'results', year))
results

'..\\results\\2020'

# *RUN SCRIPT*

In [7]:
# cycle through each id
#  Within each ID is a sequential transmission number.  Within the transmissions there may be multiple
#   sample periods.  Unfortunately, the only way to identify these is when a file restarts and the 
#   byte position is 0.  Thus, to get the most recent (and therefore most likely desired) sample period
#   one must progress in reverse order through the files to find the transimission number with a 0 position.
#
# Filelist needs to be sorted into sequential files


begin_index = {}
verbose = True

for msg_id_dir in id_dir:

    #find summary information which provides record information - exit loop after summary is found
    for filename in sorted(glob.iglob(os.path.join(root_path, msg_id_dir, tx, '*.sbd*'), recursive=True), reverse=True):
        hexstr = HexView(filename)
        
        print(filename)

        if (HexIdent(hexstr)) == 'summary.txt':
            begin_index.update({msg_id_dir: filename.split('.sbd')[0]})
            break



    if not os.path.exists(results + '/' + msg_id_dir):
        os.makedirs(results + '/' + msg_id_dir)
    else:
        files = glob.glob(results + '/' + msg_id_dir + '/*')
        for f in files:
            os.remove(f)
            
    read_datafiles=False

    for filename in sorted(glob.iglob(os.path.join(root_path, msg_id_dir, tx, '*.sbd*'), recursive=True), reverse=False):

        try: 
            if filename.split('.sbd')[0] == begin_index[msg_id_dir]:
                read_datafiles = True
        except KeyError:
            print("Directory {} does not have a summary file".format(msg_id_dir))

        if not read_datafiles:
            continue

        print(filename.split('ecofoci.popupsbd')[-1])

        hexstr = HexView(filename)

        if verbose:
            print(HexIdent(hexstr))
            
        if (HexIdent(hexstr)) == 'summary.txt':
            print(Summary(hexstr))

        elif (HexIdent(hexstr)) == 'prodat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir +'/PRODAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))               

        elif (HexIdent(hexstr)) == 'sstdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/SSTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'icedat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/ICEDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        elif (HexIdent(hexstr)) == 'botdat.txt':
            position,outstr = Data(hexstr)

            #open file, seek, write
            with open(results + '/' + msg_id_dir + '/BOTDAT.TXT', 'ab+') as out_file:
                out_file.seek(position, 0)
                out_file.truncate()
                out_file.write( bytes.fromhex(outstr))

        else:

            print("image file")
            header, hexdata = Image(filename)

            try:

                with open(results + '/' + msg_id_dir + '/{:05d}.jpg'.format(int(header[12:16],16)), 'rb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))

            except FileNotFoundError:

                with open(results + '/' + msg_id_dir + '/{:05d}.jpg'.format(int(header[12:16],16)), 'wb+') as out_file:
                    out_file.seek(int(header[4:12],16), 0)
                    out_file.truncate()
                    out_file.write((hexdata))
                

..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000565.sbd.211020_110238
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000564.sbd.211020_110135
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000563.sbd.211020_035520
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000562.sbd.211019_015737
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000561.sbd.211018_160812
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000560.sbd.211018_152312
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000559.sbd.211017_150917
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000558.sbd.211017_042114
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000557.sbd.211016_155107
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000556.sbd.211015_193902
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000555.sbd.211015_173407
..\rawdata

..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000466.sbd.211006_005309
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000465.sbd.211005_225101
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000464.sbd.211005_224953
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000463.sbd.211005_210527
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000462.sbd.211005_200217
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000461.sbd.211005_200159
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000460.sbd.211005_171730
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000459.sbd.211005_151201
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000458.sbd.211005_142646
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000457.sbd.211005_100034
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000456.sbd.211005_055553
..\rawdata

..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000366.sbd.210924_083624
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000365.sbd.210924_063404
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000364.sbd.210924_041100
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000363.sbd.210924_031004
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000362.sbd.210924_022728
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000361.sbd.210923_182041
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000360.sbd.210923_171505
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000359.sbd.210923_125126
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000358.sbd.210923_102703
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000357.sbd.210923_052125
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000356.sbd.210923_023909
..\rawdata

..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000266.sbd.210918_143334
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000265.sbd.210918_143149
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000264.sbd.210918_143139
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000263.sbd.210918_142819
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000262.sbd.210918_142732
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000261.sbd.210918_142511
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000260.sbd.210918_142459
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000259.sbd.210918_142450
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000258.sbd.210918_142439
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000257.sbd.210918_142428
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000256.sbd.210918_142419
..\rawdata

..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000168.sbd.210918_103835
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000167.sbd.210918_103823
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000166.sbd.210918_103754
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000165.sbd.210918_103730
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000164.sbd.210918_103708
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000163.sbd.210918_103214
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000162.sbd.210918_103116
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000161.sbd.210918_103041
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000160.sbd.210918_103026
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000159.sbd.210918_100935
..\rawdata\ecofoci.popupsbd\300434063473020\2021\300434063473020_000158.sbd.210918_100831
..\rawdata

\300434063473020\2021\300434063473020_000151.sbd.210918_095036
botdat.txt
\300434063473020\2021\300434063473020_000152.sbd.210918_095506
botdat.txt
\300434063473020\2021\300434063473020_000153.sbd.210918_095516
botdat.txt
\300434063473020\2021\300434063473020_000154.sbd.210918_095900
botdat.txt
\300434063473020\2021\300434063473020_000155.sbd.210918_095910
botdat.txt
\300434063473020\2021\300434063473020_000156.sbd.210918_100106
botdat.txt
\300434063473020\2021\300434063473020_000157.sbd.210918_100357
botdat.txt
\300434063473020\2021\300434063473020_000158.sbd.210918_100831
botdat.txt
\300434063473020\2021\300434063473020_000159.sbd.210918_100935
botdat.txt
\300434063473020\2021\300434063473020_000160.sbd.210918_103026
image65535.txt
image file
\300434063473020\2021\300434063473020_000161.sbd.210918_103041
botdat.txt
\300434063473020\2021\300434063473020_000162.sbd.210918_103116
botdat.txt
\300434063473020\2021\300434063473020_000163.sbd.210918_103214
botdat.txt
\300434063473020\2021\3

\300434063473020\2021\300434063473020_000321.sbd.210920_113405
image65535.txt
image file
\300434063473020\2021\300434063473020_000322.sbd.210920_143954
image65535.txt
image file
\300434063473020\2021\300434063473020_000323.sbd.210920_212633
image65535.txt
image file
\300434063473020\2021\300434063473020_000324.sbd.210920_222829
image65535.txt
image file
\300434063473020\2021\300434063473020_000325.sbd.210921_021413
image65535.txt
image file
\300434063473020\2021\300434063473020_000326.sbd.210921_065913
image65535.txt
image file
\300434063473020\2021\300434063473020_000327.sbd.210921_065951
botdat.txt
\300434063473020\2021\300434063473020_000328.sbd.210921_084545
image65535.txt
image file
\300434063473020\2021\300434063473020_000329.sbd.210921_101135
image65535.txt
image file
\300434063473020\2021\300434063473020_000330.sbd.210921_111523
image65535.txt
image file
\300434063473020\2021\300434063473020_000331.sbd.210921_111533
botdat.txt
\300434063473020\2021\300434063473020_000332.sbd.21

\300434063473020\2021\300434063473020_000470.sbd.211006_050258
image65535.txt
image file
\300434063473020\2021\300434063473020_000471.sbd.211006_060545
image65535.txt
image file
\300434063473020\2021\300434063473020_000472.sbd.211006_060625
botdat.txt
\300434063473020\2021\300434063473020_000473.sbd.211006_145606
image65535.txt
image file
\300434063473020\2021\300434063473020_000474.sbd.211006_202059
image65535.txt
image file
\300434063473020\2021\300434063473020_000475.sbd.211006_204250
image65535.txt
image file
\300434063473020\2021\300434063473020_000476.sbd.211006_212622
image65535.txt
image file
\300434063473020\2021\300434063473020_000477.sbd.211007_081425
image65535.txt
image file
\300434063473020\2021\300434063473020_000478.sbd.211007_083512
image65535.txt
image file
\300434063473020\2021\300434063473020_000479.sbd.211007_093804
image65535.txt
image file
\300434063473020\2021\300434063473020_000480.sbd.211007_094008
botdat.txt
\300434063473020\2021\300434063473020_000481.sbd.21

In [8]:
#remove placeholder .jpg file that is leftover from Arduino code written to the image file folder
#the placeholder .jpg file is always named ".jpg"
try:
    os.remove(os.path.join(results, msg_id_dir, "65535.jpg" ))
except OSError:
    pass

### Build MSG summary File - latlons and status messages

example

    MOMSN: 49
    MTMSN: 0
    Time of Session (UTC): Sun Mar  3 00:13:51 2019
    Session Status: 13 - Incomplete Transfer
    Message Size (bytes): 0

    Unit Location: Lat = 59.89769 Long = -171.24297
    CEPradius = 2

In [9]:

position_only = True
good_gps = True


for msg_id_dir in id_dir:
    #find summary information which provides record information - exit loop after summary is found
    print(msg_id_dir)

    with open(results + '/' + msg_id_dir + '/message_position' +'.csv', 'a') as out_file:
        out_file.write("IMEI_message_id,datetime,latitude,longitude,CEPradius\n")
    
    
    for filename in sorted(glob.iglob(os.path.join(root_path, msg_id_dir,  tx, '*.msg*'), recursive=True), reverse=False):
        with open(filename, 'r') as in_file:
            for line in in_file:
                if 'MOMSN:' in line:
                    MOMSN = line.split()[-1]
                elif 'MTMSN:' in line:
                    MTMSN = line.split()[-1]
                elif 'Time' in line:
                    TimeUTC = line.split(': ')[-1].strip()
                    datetimeutc = datetime.datetime.strptime(TimeUTC,'%a %b  %d %H:%M:%S %Y')
                elif 'Status:' in line:
                    Status = line.split(': ')[-1].strip()
                elif 'Message Size' in line:
                    MSize = line.split()[-1]
                elif 'Unit Location:' in line:
                    lat = line.split()[4]
                    lon = line.split()[7]
                elif 'CEPradius' in line:
                    rad = line.split()[-1]
                else:
                    continue
            if position_only:
                if good_gps:
                    if int(rad) < 10:
                        with open(results + '/' + msg_id_dir + '/message_position' +'.csv', 'a') as out_file:
                            out_file.write("{},{},{},{},{}\n".format(msg_id_dir,datetimeutc,lat,lon,rad))   
                else:
                    with open(results + '/' + msg_id_dir + '/message_position' +'.csv', 'a') as out_file:
                        out_file.write("{},{},{},{},{}\n".format(msg_id_dir,datetimeutc,lat,lon,rad)) 
            else:
                print(MOMSN,MTMSN,datetimeutc,Status,MSize,lat,lon,rad)
                

300434063473020


#  End Script... Continue in 03_PopUpFloat_Translater.ipynb