### Functions used by RDT_to_JSON

In [None]:
using CSV, CurveFit
using Dates, DataFrames, DSP
using NativeFileDialog
using Plots, Printf
using Statistics

# Function to apply polynomial fit to WSE's affected by GPS errors
function fix_gps_errors(heave, date, gps_flag)   
##############################################
    
    gps_errors = findall(x -> x == 1, gps_flag)
    heave_length = length(heave)
    
    if !isempty(gps_errors)
        
        println(length(gps_errors), " GPS errors at ", Dates.format.(date, "yyyy-mm-dd HH:MM"))
        flush(stdout)
        
        for ii in reverse(gps_errors)

            error_center = ii

            if error_center <= 3
                error_center = 3
            end

            if error_center >= heave_length - 3
                error_center = heave_length - 3
            end
            
            # User-selected offset either side of GPS error
            lower_offset = upper_offset = 120

            if error_center <= lower_offset
                lower_offset = error_center - 1
            end

            if error_center + upper_offset > heave_length
                upper_offset = heave_length - error_center
            end

            # Ensure there are at least 3 points for fitting
            lower_offset = max(lower_offset, 2)
            upper_offset = max(upper_offset, 2)
    
            # Handle edge cases
            left_side_points = max(1, error_center - lower_offset):error_center
            right_side_points = error_center:min(heave_length, error_center + upper_offset)

            # Fit curve to subset of heave before GPS error
            fit1 = curve_fit(Polynomial, left_side_points, heave[left_side_points], 2)
            yfit1 = fit1.(left_side_points)
            yfit1[end] = 0.0  # set the last point of the left fit to 0

            # Fit curve to subset of heave after GPS error
            fit2 = curve_fit(Polynomial, right_side_points, heave[right_side_points], 2)
            yfit2 = fit2.(right_side_points)
            yfit2[1] = 0.0  # set the first point of the right fit to 0

            # Apply polynomial results to WSEs on both sides of GPS error
            heave[left_side_points] .= heave[left_side_points] - yfit1
            heave[right_side_points] .= heave[right_side_points] - yfit2
            heave[ii] = 0.0  # set WSE at GPS error location to 0

        end
    
    end

    return(heave)
    
end  # fix_gps_errors()


# smooth the spectra into bands centered on 0.05Hz spacing (i.e. 0:0.005:0.64)
function smooth_spectra(Pden_in, sample_frequency)
##################################################

    nyquist = sample_frequency/2

    freq_in = range(0, stop=nyquist, length=length(Pden_in))

    freq_out = [0.0]
    Pden_smoothed = [mean(Pden_in[1:8])]

    i = 9
    while i <= length(Pden_in)

        push!(freq_out,freq_in[i+8])

        if i < length(Pden_in)-16

            push!(Pden_smoothed, mean(Pden_in[i:i+16]))

        end

        i+=16

    end

    push!(Pden_smoothed, mean(Pden_in[end-8:end]))
            
    return(freq_out, Pden_smoothed)
        
end    # smooth_spectra()


function calc_f2_Pden2(heave, sample_frequency)
###############################################
    
    # Show spectra using Welch's method to better define bimodal events
    ps_w = welch_pgram(Float64.(heave), 256, 128; onesided=true, nfft=256, fs=sample_frequency, window=hanning);
    f2 = freq(ps_w);
    Pden2 = power(ps_w)    

    return(f2, Pden2)

end    # calc_f2_Pden2()
    

# Function to read binary file
function read_binary_file(filename)
###################################
    
    open(filename, "r") do file
        return(read(file))
    end
    
end    # read_binary_file()


# Function to find all header indices using list comprehension
function find_headers(data, header)
###################################
    
    header_length = length(header)
    data_length = length(data)
    [i for i in 1:(data_length - header_length + 1) if data[i:i+header_length-1] == header]
    
end    # find_headers()


# Function to calculate the sample rate
function get_sample_frequency(ii, RDT_data)
######################################
    
    sample_frequency_hex = parse(UInt32, "0x" * string(RDT_data[ii+11], base=16, pad=2) * string(RDT_data[ii+12], base=16, pad=2) *
                           string(RDT_data[ii+13], base=16, pad=2) * string(RDT_data[ii+14], base=16, pad=2))
    reinterpret(Float32, sample_frequency_hex)
    
end    # get_sample_frequency()


# apply coarse filter to displacements that are greater than 20
function filter_large_values(arr)
#################################
    
    return([abs(x) >= 20 ? 0.0 : x for x in arr])
    
end    # filter_large_values()


# Function to parse records and update DataFrame
function parse_record(RDT_data, ii, displacement_df, message_length, sample_frequency)
########################################################################
    
    utc = try
        yr = parse(Int, string(RDT_data[ii+5], base=16) * string(RDT_data[ii+6], base=16, pad=2), base=16)
        month = parse(Int, string(RDT_data[ii+7], base=16, pad=2), base=16)
        day = parse(Int, string(RDT_data[ii+8], base=16, pad=2), base=16)
        hour = parse(Int, string(RDT_data[ii+9], base=16, pad=2), base=16)
        minute = parse(Int, string(RDT_data[ii+10], base=16, pad=2), base=16)
        DateTime(yr, month, day, hour, minute)
    catch e
        return(nothing)
    end

    heave = AbstractFloat[]; north = AbstractFloat[]; west = AbstractFloat[] 
    gps_flag = []

    for jj in 15:6:message_length
        
        push!(heave,Float64(reinterpret(Int16, parse(UInt16, "0x" * string(RDT_data[ii+jj], base=16, pad=2) * 
            string(RDT_data[ii+jj+1], base=16, pad=2))) / 100))
        north_hex = parse(UInt16, "0x" * string(RDT_data[ii+jj+2], base=16, pad=2) * string(RDT_data[ii+jj+3], base=16, pad=2))
        push!(north,Float64(reinterpret(Int16, north_hex) / 100))
        push!(west,Float64(reinterpret(Int16, parse(UInt16, "0x" * string(RDT_data[ii+jj+4], base=16, pad=2) * 
            string(RDT_data[ii+jj+5], base=16, pad=2))) / 100))
        gps_error = parse(Int, last(string(north_hex, base=2, pad=16), 1))
        push!(gps_flag, gps_error)

    end

    # Set heave value to 0 if its absolute value is greater than or equal to 20
    global heave = filter_large_values(heave)
    global north = filter_large_values(north)
    global west = filter_large_values(west)
    
    if sum(gps_flag) > 0
        heave = fix_gps_errors(heave, utc, gps_flag)
    end

    global f2, Pden2 = calc_f2_Pden2(heave, sample_frequency)

    push!(displacement_df, (utc, heave, north, west, f2, Pden2))
    
end    # parse_record()


function process_RDT_file(infil)
################################    
    RDT_data = read_binary_file(infil)
    header = UInt8[0x2a, 0x30, 0x36, 0x0a, 0x26]
    header_indices = find_headers(RDT_data, header)

    reclen = 13840
    good_records = findall(==(reclen), diff(header_indices))

    displacement_df = DataFrame(Date = DateTime[], Heave = Vector{Float64}[], North = Vector{Float64}[], West = Vector{Float64}[], 
        f2 = Vector{Float64}[], Pden2 = Vector{Float64}[])

    for ii in header_indices[good_records]
        sample_frequency = get_sample_frequency(ii, RDT_data)
        parse_record(RDT_data, ii, displacement_df, reclen, sample_frequency)
    end

    return(displacement_df)
    
end    # process_RDT_file()

### Read a single .RDT file

In [None]:
#######################################################################################################
#######################################################################################################
#######################################################################################################

# Read the binary file
infil = pick_file("F:\\Card Data\\", filterlist="*RDT")

In [None]:
@time begin
println("Selected ",infil)
    
displacement_df = process_RDT_file(infil)
    
end

### Read an entire directory of .RDT files

In [None]:
using Glob

#######################################################################################################
#######################################################################################################
#######################################################################################################

# Widen screen for better viewing
display(HTML("<style>.jp-Cell { width: 120% !important; }</style>"))

# Main code to process all .RDT files in a selected directory
directory_path = pick_folder()
rdt_files = glob(".//*.RDT", directory_path)
global rdt_files = rdt_files[1:end-1]  # remove the file named TMP.RDT

@time begin
    
    println("Processing files in directory: ", directory_path)
    
    all_displacement_dfs = []

    for infil in rdt_files
        println("Processing ", infil)
        displacement_df = process_RDT_file(infil)
        push!(all_displacement_dfs, displacement_df)
    end

    # Combine all displacement DataFrames
    displacement_df = vcat(all_displacement_dfs...)

    all_displacement_dfs = nothing
    

    # sort the displacement_df on Date ascending
    sort!(displacement_df, :Date)

    # Remove rows where the year is less than 1990
    filter!(row -> year(row.Date) >= 1990, displacement_df)

end



### Save Serialized output df to .gzip file

In [None]:
using CodecZlib, Serialization

# Serialize and compress the DataFrame to a file
# Serialize the DataFrame to a file
outfil = ".\\Data\\" * split(directory_path,"\\")[end-1]*"_"*Dates.format(Date(Date(displacement_df.Date[1])), "yyyy-mm-dd")*
    "_to_"*Dates.format(Date(Date(displacement_df.Date[end])), "yyyy-mm-dd")*".bin"

open(outfil, "w") do io
    gz = GzipCompressorStream(io)    # Create a Gzip compressor stream
    serialize(gz, displacement_df)   # Serialize the DataFrame and write it to the compressed stream
    close(gz)                        # Close the compressor stream to ensure all data is written
end

### Write the RDT data to a JSON file

In [None]:
using JSON

JSon_file = ".\\Data\\" * split(directory_path,"\\")[end-1]*"_"*Dates.format(Date(Date(displacement_df.Date[1])), "yyyy-mm-dd")*
    "_to_"*Dates.format(Date(Date(displacement_df.Date[end])), "yyyy-mm-dd")*".csv"

println("Writing JSON-formatted data to ",JSon_file)
flush(stdout)

@time begin

    # Remove rows where the year is less than 1990
    filter!(row -> year(row.Date) >= 1990, displacement_df)

    # Convert array columns to JSON strings
    json_displacement_df = copy(displacement_df)
    
    for col in names(displacement_df)
        if col != :Date
            json_displacement_df[!, col] = JSON.json.(displacement_df[!, col])
        end
    end
    
    # Write the JSON-encoded DataFrame to a CSV 
    CSV.write(JSon_file, json_displacement_df);

end

In [None]:
using Distributed

@time brgin
    for i in 1:1000
        print(i)
    end
end

addprocs(4)  # Adjust the number of processors

@time brgin
    @distributed for i in 1:10
    print(i)
    end
end

### Plot spectra for each month in displacement_df

### Write data to HDF5 file

In [None]:
using DataFrames, Hdisplacement_df5, Serialization, Dates

# Function to serialize a vector of arrays
function serialize_column(col)
    io = IOBuffer()
    serialize(io, col)
    return take!(io)
end

# Convert DateTime to Unix timestamp (in seconds)
dates_unix = [Dates.datetime2unix(d) for d in displacement_df.Date]

# Serialize the columns with arrays
heave_serialized = serialize_column(displacement_df.Heave)
north_serialized = serialize_column(displacement_df.North)
west_serialized = serialize_column(displacement_df.West)
f2_serialized = serialize_column(displacement_df.f2)
pden2_serialized = serialize_column(displacement_df.Pden2)

# Write to Hdisplacement_df5 file with chunked storage and compression
h5open("displacement_df.h5", "w") do file
    dset = create_dataset(file, "Date", datatype(dates_unix), dataspace(dates_unix); chunk=length(dates_unix), compress=true, deflate=9)
    write(dset, dates_unix)

    dset = create_dataset(file, "Heave", datatype(heave_serialized), dataspace(heave_serialized); chunk=length(heave_serialized), compress=true, deflate=9)
    write(dset, heave_serialized)

    dset = create_dataset(file, "North", datatype(north_serialized), dataspace(north_serialized); chunk=length(north_serialized), compress=true, deflate=9)
    write(dset, north_serialized)

    dset = create_dataset(file, "West", datatype(west_serialized), dataspace(west_serialized); chunk=length(west_serialized), compress=true, deflate=9)
    write(dset, west_serialized)

    dset = create_dataset(file, "f2", datatype(f2_serialized), dataspace(f2_serialized); chunk=length(f2_serialized), compress=true, deflate=9)
    write(dset, f2_serialized)

    dset = create_dataset(file, "Pden2", datatype(pden2_serialized), dataspace(pden2_serialized); chunk=length(pden2_serialized), compress=true, deflate=9)
    write(dset, pden2_serialized)
end

println("Done!")

### Use Seralize to save df to .bin file

In [None]:
# Serialize the DataFrame to a file
outfil = ".\\Data\\" * split(directory_path,"\\")[end-1]*"_"*Dates.format(Date(Date(displacement_df.Date[1])), "yyyy-mm-dd")*
    "_to_"*Dates.format(Date(Date(displacement_displacement_df.Date[end])), "yyyy-mm-dd")*".bin"

println("Writing binary-formatted data to ",outfil)
flush(stdout)

open(outfil, "w") do io
    serialize(io, displacement_df)
end

### Use Seralize to read gzipped .bin file to df

In [None]:
using CodecZlib, Serialization, DataFrames
using NativeFileDialog

function read_gzip_file(io)
    
    gz = GzipDecompressorStream(io)                # Create a Gzip decompressor stream
    deserialized_displacement_df = deserialize(gz) # Deserialize the DataFrame from the decompressed stream
    close(gz)                                      # Close the decompressor stream
    
    return(deserialized_displacement_df)
    
end    # read_gzip_file()


#######################################################################################################
#######################################################################################################
#######################################################################################################


# Select the binary file
infil = pick_file(pwd()*"\\Data\\", filterlist="*bin")

println("Selected ", infil)

@time begin
    # Deserialize the DataFrame from the file
    displacement_df2 = open(read_gzip_file, infil, "r")
end

# Verify the contents
println(displacement_df2)


In [None]:
dates = displacement_df2.Date

using Dates, DSP
using NativeFileDialog, Plots
using Tk

function plot_long_wave(start_date,tt,heave)
############################################
    
    start_tt = tt[1]; last_tt=tt[end]
    
##    responsetype = Lowpass(0.04)
    responsetype = Bandpass(.01, .04)
    designmethod = Butterworth(4)
    long_heave = filt(digitalfilter(responsetype, designmethod), heave);
    
    tm_tick = range(first(tt),last(tt),step=Minute(5))
    ticks = Dates.format.(tm_tick,"MM:SS")
    
    p1 = plot(tt, heave, label="")
    p1 = plot!(tt, long_heave, lw=:3, lc=:red, label="Long waves > 25s")
    
    plot1 = plot(p1, 
            xlabel="Time", xlim=(start_tt,last_tt), xticks=(tm_tick,ticks), xtickfontsize=7,
            ylabel="Heave (m)", tickfontsize=8, 
            title=file_choice[1]*" Long waves", framestyle = :box,
            leftmargin = 15Plots.mm, bottommargin = 15Plots.mm, grid=true, size=(1200, 600), colorbar=false,    
        gridlinewidth=0.5, gridstyle=:dot, gridalpha=1)
    
    display(plot1)

end    # plot_long_wave()


dates_array = Dates.format.(dates, "yyyy-mm-ddTHH:MM:SS")

w = Toplevel("Select Start Date", 235, 650)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, dates_array)

scrollbars_add(f1, lb)
pack(f1,  expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Tk.Button(f, "Ok")
pack(b)

println("Select a time from the menu!")
flush(stdout)

bind(b, "command") do path
                    
    get_value(lb);

    global file_choice = get_value(lb);
    global start_date = DateTime(file_choice[1])
    
    index = findfirst(==(start_date), dates)
    heave = displacement_df2.Heave[index]
    ll = length(heave)
    tt = displacement_df2.Date[index] .+ Microsecond.(ceil.((0:ll-1) * 1000000))
    plot_long_wave(start_date,tt,heave)

end

In [None]:
using Wavelets, Plots

index = findfirst(==(start_date), dates)

ll = length(displacement_df2.Heave[index])
tt = displacement_df2.Date[index] .+ Microsecond.(ceil.((0:ll-1) * 1000000))
heave = displacement_df2.Heave[index]

# Perform the continuous wavelet transform using the Morlet wavelet
wavelet_coeffs = cwt(heave, wavelet(WT.morl))

# Get the dimensions of the wavelet coefficients matrix
num_scales, num_times = size(wavelet_coeffs)

# Generate a time vector (assuming a sampling rate of 1 for simplicity)
time = 1:num_times

# Generate a scale vector (for visualization purposes, these are just scale indices)
scales = 1:num_scales

# Plot the scalogram (magnitude of the wavelet coefficients)
heatmap(time, scales, abs.(wavelet_coeffs),
    xlabel = "Time",
    ylabel = "Scale",
    title = "Continuous Wavelet Transform",
    color = :viridis)

# Display the plot
plot!(size=(1000,600))

In [None]:
using DataFrames, HDF5, Dates

# Flatten nested arrays and store dimensions
flattened_heave = vcat(displacement_df.Heave...)
heave_dims = Int32[length(arr) for arr in displacement_df.Heave]

flattened_north = vcat(displacement_df.North...)
north_dims = Int32[length(arr) for arr in displacement_df.North]

flattened_west = vcat(displacement_df.West...)
west_dims = Int32[length(arr) for arr in displacement_df.West]

flattened_f2 = vcat(displacement_df.f2...)
f2_dims = Int32[length(arr) for arr in displacement_df.f2]

flattened_pden2 = vcat(displacement_df.Pden2...)
pden2_dims = Int32[length(arr) for arr in displacement_df.Pden2]

dates_unix = [Dates.datetime2unix(d) for d in displacement_df.Date]

# Write data to Hdisplacement_df5
h5write("dataframe2.h5", "Date", dates_unix)
h5write("dataframe2.h5", "Heave", flattened_heave)
h5write("dataframe2.h5", "Heave_dims", heave_dims)
h5write("dataframe2.h5", "North", flattened_north)
h5write("dataframe2.h5", "North_dims", north_dims)
h5write("dataframe2.h5", "West", flattened_west)
h5write("dataframe2.h5", "West_dims", west_dims)
h5write("dataframe2.h5", "f2", flattened_f2)
h5write("dataframe2.h5", "f2_dims", f2_dims)
h5write("dataframe2.h5", "Pden2", flattened_pden2)
h5write("dataframe2.h5", "Pden2_dims", pden2_dims)


In [None]:
using DataFrames, HDF5, Dates

# Read data from HDF5
dates_unix = h5read("dataframe2.h5", "Date")
flattened_heave = h5read("dataframe2.h5", "Heave")
heave_dims = h5read("dataframe2.h5", "Heave_dims")

flattened_north = h5read("dataframe2.h5", "North")
north_dims = h5read("dataframe2.h5", "North_dims")

flattened_west = h5read("dataframe2.h5", "West")
west_dims = h5read("dataframe2.h5", "West_dims")

flattened_f2 = h5read("dataframe2.h5", "f2")
f2_dims = h5read("dataframe2.h5", "f2_dims")

flattened_pden2 = h5read("dataframe2.h5", "Pden2")
pden2_dims = h5read("dataframe2.h5", "Pden2_dims")

# Reconstruct nested arrays
function reconstruct_nested_array(flattened, dims)
    arrs = []
    start_idx = 1
    for dim in dims
        end_idx = start_idx + dim - 1
        push!(arrs, flattened[start_idx:end_idx])
        start_idx = end_idx + 1
    end
    return arrs
end

heave = reconstruct_nested_array(flattened_heave, heave_dims)
north = reconstruct_nested_array(flattened_north, north_dims)
west = reconstruct_nested_array(flattened_west, west_dims)
f2 = reconstruct_nested_array(flattened_f2, f2_dims)
pden2 = reconstruct_nested_array(flattened_pden2, pden2_dims)

dates = [Dates.unix2datetime(unix) for unix in dates_unix]

df2 = DataFrame(Date=dates, Heave=heave, North=north, West=west, f2=f2, Pden2=pden2)

### Read data from Hdisplacement_df5 file

In [None]:
using DataFrames, Hdisplacement_df5, Serialization, Dates

# Function to deserialize a vector of arrays
function deserialize_column(data)
    io = IOBuffer(data)
    return deserialize(io)
end

# Read from Hdisplacement_df5 file
dates_unix = h5read("displacement_df.h5", "Date")
heave_serialized = h5read("displacement_df.h5", "Heave")
north_serialized = h5read("displacement_df.h5", "North")
west_serialized = h5read("displacement_df.h5", "West")
f2_serialized = h5read("displacement_df.h5", "f2")
pden2_serialized = h5read("displacement_df.h5", "Pden2")

# Convert Unix timestamp back to DateTime
dates = [Dates.unix2datetime(d) for d in dates_unix]

# Deserialize the columns
heave = deserialize_column(heave_serialized)
north = deserialize_column(north_serialized)
west = deserialize_column(west_serialized)
f2 = deserialize_column(f2_serialized)
pden2 = deserialize_column(pden2_serialized)

# Reconstruct DataFrame
displacement_df = DataFrame(Date = dates, Heave = heave, North = north, West = west, f2 = f2, Pden2 = pden2)
