In [1]:
import os

# your paths may vary
notes_file = r'C:\Users\joefutrelle\Desktop\EN627_FLRTD492_logging_notes.txt'
#start_stop_file = r'C:\Users\joefutrelle\Desktop\en627_flrtd_start_stop_times.csv'
assert os.path.exists(notes_file)

In [2]:
import re

def lines_that_match(filename, regex):
    with open(filename) as fin:
        lines = fin.readlines()
    return [l.rstrip() for l in lines if re.match(regex, l)]

Read the logging notes to get start and stop times for FLRTD

In [3]:
import pandas as pd

lines = lines_that_match(notes_file, r'.*recording')

datetimes = []
filenames = []
sss = []

for line in lines:
    try:
        datetime, _, filename, ss = re.match(r'(\d{8} \d\d:\d\d(:\d\d)?) \(\w+\) - (\w+),? (.*) recording', line).groups()
        datetimes.append(pd.Timestamp(datetime, tz='UTC'))
        filenames.append(filename)
        sss.append(ss)
    except AttributeError:
        print(line)
        
ss = pd.DataFrame({
    'datetime': datetimes,
    'filename': filenames,
    'start_stop': sss
})
ss.head()

Unnamed: 0,datetime,filename,start_stop
0,2019-02-02 21:45:00+00:00,en627_20190202_214500,start
1,2019-02-02 23:16:00+00:00,en627_20190202_214500,stop
2,2019-02-02 23:17:30+00:00,en627_20190202_231730,start
3,2019-02-03 03:38:00+00:00,en627_20190202_231730,stop
4,2019-02-03 03:40:00+00:00,en627_20190203_034000,start


In [4]:
start_times = ss[ss['start_stop'] == 'start']

def start_time(filename):
    rows = start_times[start_times['filename'] == filename]
    if len(rows) == 1:
        return rows.iloc[0]['datetime']

start_time('en627_20190202_214500')

Timestamp('2019-02-02 21:45:00+0000', tz='UTC')

Parse the FLRTD log files to get the fluorometer data (var2)

In [5]:
from glob import glob
from io import StringIO

# Your paths may vary
FLRTD_DIR = r'C:\Users\joefutrelle\Desktop\EN627_FLRTD492'
OUT_DIR = r'C:\Users\joefutrelle\Desktop\en627_flrtd_cleaned'

dfs = {}

for flrtd_path in glob(os.path.join(FLRTD_DIR,'*.raw')):
    filename_with_ext = os.path.basename(flrtd_path)
    filename, _ = os.path.splitext(filename_with_ext)
    lines = lines_that_match(flrtd_path, r'\d\d/\d\d/\d\d\s+\d\d:\d\d:\d\d\s+\d+\s\d+\s')
    tsv = '\n'.join(lines)
    df = pd.read_csv(StringIO(tsv), delimiter='\t')
    df.columns = ['bad_date','bad_time','var1','var2','var3']
    dfs[filename] = df

def filename_start_time(filename):
    y, m, d, H, M, S = re.match(r'\w+_(\d{4})(\d\d)(\d\d)_(\d\d)(\d\d)(\d\d).*', filename).groups()
    return pd.Timestamp('{}/{}/{} {}:{}:{}'.format(y,m,d,H,M,S), tz='UTC')

start_times = {}

for filename, df in dfs.items():
    ts = filename_start_time(filename)
    start_times[filename] = ts

Generate ~1hz timestamps for each FLRTD log starting from the start time

In [6]:
def ticks(start_time, frequency, n):
    return [start_time + pd.Timedelta(frequency) * i for i in range(n)]

# 1.18hz = 847ms
# rate appears to be 1.046hz = 965ms

for filename, df in dfs.items():
    ts = start_times[filename]
    timestamps = ticks(ts, '965ms', len(df))
    df['datetime'] = timestamps
    out_path = os.path.join(OUT_DIR, '{}.csv'.format(filename))
    try:
        df.pop('bad_date')
        df.pop('bad_time')
    except KeyError:
        pass
    df.to_csv(out_path, index=None)

Now compare the end of those timestamps with the logged stop time to determine whether we have the right sampling rate

In [7]:
fns = []
end_times = []

for fn, df in dfs.items():
    fns.append(fn)
    end_times.append(pd.Timestamp(df['datetime'].values[-1], tz='UTC'))
    
merged = pd.DataFrame(dict(filename=fns,end_time=end_times)).merge(ss, on='filename')
merged = merged[merged['start_stop'] == 'stop']
merged.pop('start_stop')
merged['time_difference'] = merged['datetime'] - merged['end_time']
merged.columns = ['filename','estimated_end_time','logged_end_time','difference']
merged.difference.median()

Timedelta('0 days 00:00:02.730000')