### Main Program

In [None]:
## Julia program to read a selected .BVA file and display 30-minute time series plots
## JW October 2022
#using ContinuousWavelets 
using Dates, DataFrames, Distributions, DSP
using Gtk
using LaTeXStrings
using NativeFileDialog
using Plots, Printfget_matches
using Statistics #, StatsPlots
#using Suppressor: @suppress
#using Wavelets

##import Pkg; Pkg.add("Suppressor")
## See https://github.com/JuliaIO/Suppressor.jl
##using Suppressor: @suppress

include("./read_BVA_processing_tools.jl")
include("./read_BVA_plotting_tools.jl")

function get_matches(Data, f23_df)
##################################
    
    # Create a dictionary to store indices of hex strings in Data
    index_dict = Dict{String, Vector{Int}}()
    
    # Populate the dictionary
    for (i, hex_str) in enumerate(Data)
        if haskey(index_dict, hex_str)
            push!(index_dict[hex_str], i)
        else
            index_dict[hex_str] = [i]
        end
    end
    
    # Initialize a vector to store indices
    matching_indices = []
    
    # Iterate through each hex string in f23_df and lookup in the dictionary
    for hex_str in f23_df.Match_vector
        if haskey(index_dict, hex_str)
            push!(matching_indices, index_dict[hex_str][1])
        else
            push!(matching_indices, nothing)  # If no match found, store an empty vector
        end
    end

    f23_df[!,"Data_vector"] = matching_indices

    return(f23_df)

end    # get_matches()


# Need to check first row of the f23_df in case 23:00 is stored there
function f23_first_row_check(f23_df)
################################
    
    # Get the first row of the DataFrame
    first_row = first(f23_df)
    
    # Check if the time of the first row's Date column is 23:00:00
    time_of_first_row = Time(first_row.Date)

    if time_of_first_row == Time(23, 0, 0)

        if ismissing(first_row.Data_vector) || isnothing(first_row.Data_vector) || isnan(first_row.Data_vector)
            f23_df = f23_df[2:end, :]  # Drop the first row
        end

    end
    
    return(f23_df)
    
    end    # f23_first_row_check()


################################################
################################################
##           START OF MAIN PROGRAM
################################################
################################################

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

# Select a HVA daily .CSV file
infil = pick_file("C:\\QGHL\\Wave_data\\Bris\\BVA\\", filterlist="*BVA");
println("Selected ",infil)
#Change the type-interpretation of the binary file data to unsigned integer
println("Reading BINARY data from ",infil)
data = reinterpret(UInt8, read(infil));

# turn the data vector into a matrix of 12 values matching hexadecimal bytes - see DWTP 2.1 p.18
cols = 12
rows = Int(length(data) / cols)
mat = reshape(view(data, :), cols, :);

# Interleave last 4 matrix columns to form packet vector
## based on mschauer @ https://discourse.julialang.org/t/combining-two-arrays-with-alternating-elements/15498/2
packet = collect(Iterators.flatten(zip(mat[10,:],mat[11,:],mat[12,:])));

# find all occurrences of 0x7e in packet vector
aa = findall(x->x.==0x7e, vec(packet));

# Create the df's to hold the processed data and setup their column structure
f20_vals = []; f21_vals = []; f23_vals = []; f25_vals = []; f26_vals = []; f28_vals = []; f29_vals = [];
    f80_vals = []; f81_vals = []; f82_vals = []; fc1_vals = []; fc3_vals = []

f20_df = DataFrame(Date = [], Segments = [], Smax = [])
for i in 0:99
    col_name = "S$i"
    f20_df[!,col_name] = []
end
f21_df = DataFrame(Date = [], Segments = [])

for i in 0:99
    col_name = "Dir$i"
    f21_df[!,col_name] = []
end
for i in 0:99
    col_name = "Spread$i"
    f21_df[!,col_name] = []
end
f23_df = DataFrame(Date = [], Segments = [], Match_vector = [], Sample_number = [])
f25_df = DataFrame(Date = [], Segments = [], Hs = [], Ti = [], Te = [], T1 = [], Tz = [], T3 = [], Tc = [], 
    Rp = [], Tp = [], Smax = [], Theta_p = [], Sigma_p = [])
f26_df = DataFrame(Date = [], Hmax = [], Thmax = [], Tmax = [], Htmax = [], Havg = [], Tavg = [], Hsrms = [], 
    Nw = [], Nc = [], Epsilon = [], Coverage = [])
f28_df = DataFrame(Date = [], Segments = [])
for i in 0:99
    col_name = "m2_$i"
    f28_df[!,col_name] = []
end
for i in 0:99
    col_name = "n2_$i"
    f28_df[!,col_name] = []
end
for i in 0:99
    col_name = "k$i"
    f28_df[!,col_name] = []
end
f29_df = DataFrame(Date = [], Coverage = [], Nw = [], Epsilon = [], Hmax = [], THmax = [], H10 = [], TH10 = [], H3 = [], 
    TH3 = [], Havg = [], Tavg = [])
for i in 0:22
    col_name = "Hq$i"
    f29_df[!,col_name] = []
end
f80_df = DataFrame(Date = [], Latitude = [], Longitude = [])
f81_df = DataFrame(Date = [], SST = [])
f82_df = DataFrame(Date = [], Firmware = [], Speed = [], Direction = [], SST = [])
fc1_df = DataFrame(Date = [], Firmware = [], Hatch_uid = [], Hull_uid = [], Uptime = [], Battery_energy = [], Boostcaps_energy = [],
    Hatch_temp = [], Battery_voltage = [], Batteries_per_section = [], Battery_section_number = [], Initial_battery_energy = [], Ov = [], Cv = [],
    Ox = [], Oy = [], Cx = [], Cy = [], Mu0 = [], Sigma0 = [], Mui = [], Sigmai = [], Muh = [], Sigmah = [], Cpitch = [], Croll = [], Tensor = [])
fc3_df = DataFrame(Date = [], Battery_life = [])

# determine number of records
max_val = length(aa)-1

# Decode the packet data to messages
# refer to Section 2.1.2 Decoding the packet data to messages p. 20
messages = []

println("Processing the Packet vectors - this takes time!")
flush(stdout)
for i in 1:max_val

    # determine packet length
    first = aa[i]+1
    last = aa[i+1]
    
    if (last-first > 1)
        decoded = []
        decoded = packet[first:last-1]
        
        bb = findall(x->x.==0x7d, vec(decoded));
            
        if bb != []

            # do an xor of elements with 0x7d
            for ii in bb
                decoded[ii+1] = decoded[ii+1] ⊻ 0x20 # set the xor value as 0x20 vide 2.1.2 p.20
            end

            # remove the 0x7d
            deleteat!(decoded::Vector, bb)

        end
        
        # look for vectors of the spectrum synchronisation message (0xF23)
        if decoded[2] == 0x20
            
            heave_spectrum = []
            append!(f20_vals,decoded)
            timestamp,segments,smax,heave_spectrum = process_f20(decoded,heave_spectrum)         
            list_1 = [timestamp,segments,smax]
            push!(f20_df, [list_1; heave_spectrum])
            
        elseif decoded[2] == 0x21
            
            direction = []
            spread = []
            append!(f21_vals,decoded)
            timestamp,segments,direction,spread = process_f21(decoded,direction,spread)
                        
            list1 = [timestamp,segments]
            list2 = [direction; spread]
            
            push!(f21_df, [list1; list2])
            
        elseif decoded[2] == 0x23
                  
            append!(f23_vals,decoded)
            timestamp,segments_used,match_vector,sample_number = process_f23(decoded)
            push!(f23_df, [timestamp,segments_used,match_vector,sample_number])

        elseif decoded[2] == 0x25

            append!(f25_vals,decoded)
            timestamp,segments,hs,ti,te,t1,tz,t3,tc,rp,tp,smax,theta_p,sigma_p = process_f25(decoded)
            push!(f25_df, [timestamp,segments,hs,ti,te,t1,tz,t3,tc,rp,tp,smax,theta_p,sigma_p])
          
        elseif decoded[2] == 0x26
                  
            append!(f26_vals,decoded)
            timestamp,hmax,thmax,tmax,htmax,havg,tavg,hsrms,nw,nc,epsilon,coverage = process_f26(decoded)
            push!(f26_df, [timestamp,hmax,thmax,tmax,htmax,havg,tavg,hsrms,nw,nc,epsilon,coverage])

        elseif decoded[2] == 0x28
            
            m2 = []
            n2 = []
            k = []
            append!(f28_vals,decoded)
            timestamp,segments,m2,n2,k = process_f28(decoded,m2,n2,k)
                        
            list1 = [timestamp,segments]
            list2 = [m2; n2; k]
            
            push!(f28_df, [list1; list2])
            
        elseif decoded[2] == 0x29
            hq = []
            append!(f29_vals,decoded)
            timestamp,coverage,nw,epsilon,hmax,thmax,h10,th10,h3,th3,havg,tavg,hq = process_f29(decoded,hq)         
            list_1 = [timestamp,coverage,nw,epsilon,hmax,thmax,h10,th10,h3,th3,havg,tavg]
            push!(f29_df, [list_1; hq])

        elseif decoded[2] == 0x80
            
            append!(f80_vals,decoded)
            timestamp,latitude,longitude = process_f80(decoded)
            push!(f80_df, [timestamp,latitude,longitude])
            
        elseif decoded[2] == 0x81
            
            append!(f81_vals,decoded)
            timestamp,sst = process_f81(decoded)
            push!(f81_df, [timestamp,sst])
            
        elseif decoded[2] == 0x82
            
            append!(f82_vals,decoded)
            timestamp,firmware,speed,direction,sst = process_f82(decoded)
            push!(f82_df, [timestamp,firmware,speed,direction,sst])
      
        elseif decoded[2] == 0xc1
            
            append!(fc1_vals,decoded)
            timestamp,firmware,hatch_uid,hull_uid,uptime,battery_energy,boostcaps_energy,hatch_temp,battery_voltage,
            batteries_per_section,battery_section_number,initial_battery_energy,ov,cv,ox,oy,cx,cy,mu0,sigma0,mui,sigmai,muh,
            sigmah,cpitch,croll,tensor = process_fc1(decoded)   
            
            push!(fc1_df, [timestamp,firmware,hatch_uid,hull_uid,uptime,battery_energy,boostcaps_energy,hatch_temp,
                battery_voltage,batteries_per_section,battery_section_number,initial_battery_energy,ov,cv,ox,oy,cx,cy,mu0,
                sigma0,mui,sigmai,muh,sigmah,cpitch,croll,tensor])

        elseif decoded[2] == 0xc3
            
            append!(fc3_vals,decoded)
            timestamp,ble = process_fc3(decoded)
            push!(fc3_df, [timestamp,ble])

         end
        
    end
    
end
    
# remove duplicates from dataframes
f20_df = unique(f20_df)
f21_df = unique(f21_df)
f23_df = unique(f23_df)
f25_df = unique(f25_df)
f26_df = unique(f26_df)    
f28_df = unique(f28_df)
f29_df = unique(f29_df)
f80_df = unique(f80_df)
f81_df = unique(f81_df)
f82_df = unique(f82_df)
fc1_df = unique(fc1_df)
fc3_df = unique(fc3_df)

## Calculate the Heave, North, and West displacements
hex_matrix = string.(mat'[:,1:9], base=16, pad=2)
Data = [join(row) for row in eachrow(hex_matrix)];

println("All file data read!")
println("Preparing to plot data")
flush(stdout)

f23_df = get_matches(Data, f23_df)

# remove those vectors from F23 df that are not located in the Data vector df
f23_df = f23_first_row_check(f23_df)

# Do time-series plot of available data
plot_f29(f29_df)

# Plot current speed and direction
##plot_f82(f82_df)

println("Select date from menu for more plots")

################################################
################################################
##           END OF MAIN PROGRAM
################################################
################################################

### get heave data for all available dates

In [None]:
function pad_or_truncate(record, target_length=4608)
####################################################

    length(record) < target_length ? vcat(record, zeros(Float32, target_length - length(record))) :
                                     record[1:target_length]

end    # pad_or_truncate()


function get_heave(Data, f23_df)
################################
    
    heave_array = []
    X_date = []
    
    for idx in 1:nrow(f23_df)

        if !isnothing(f23_df.Data_vector[idx])
    
            start_date, start_val, end_val = get_start_end_dates(f23_df,idx)
            if start_val > 0
                print(".")
                heave, north, west = get_hnw(Data,start_val,end_val)

                # ensure we have 4608 data points
                push!(heave_array,pad_or_truncate(heave, 4608))
                push!(X_date,start_date)
            end

        end
    
    end

    return(hcat(heave_array...), X_date)

end    # get_heave()


X_train, X_date = get_heave(Data, f23_df);

In [None]:
using Statistics, Plots, Dates, Printf

plotly()

# Widen screen for better viewing
display(HTML("<style>.jp-Cell { width: 120% !important; }</style>"))

# Function to calculate confidence limits
function calc_confidence_limits(data, confidence_interval)
####################################################################################
    
    mean_val = mean(data)
    std_dev = std(data)
    upper_limit = mean_val + confidence_interval * std_dev
    lower_limit = mean_val - confidence_interval * std_dev

    return(lower_limit, upper_limit)
    
end    # calc_confidence_limits()


function modified_z_score(data, threshold)
##########################################
    
    med = median(data)
    mad = median(abs.(data .- med))
    mod_z_scores = 0.6745 * (data .- med) ./ mad

    outlier_indices = findall(x -> abs(x) > threshold, mod_z_scores)
    
    return(outlier_indices, mod_z_scores)
    
end    # modified_z_score()

# Define dynamic modified z-score threshold
function dynamic_z_score_threshold(heave, base_threshold=3.0, k=0.5)
####################################################################
    
    median_wave_height = median(heave)
    std_wave_height = std(heave)
    dynamic_threshold = base_threshold * (1 + k * (median_wave_height / std_wave_height))
    
    return(dynamic_threshold)
    
end    # dynamic_z_score_threshold()


for ii in 1:10 #length(X_date)

    # Initialize the plot
    start_time = X_date[ii]
    heave = X_train[:,ii]
    end_time = start_time + Minute(30)
    xvals = start_time + Microsecond.((0:4608-1) / 2.56 * 1000000)
    max_heave = maximum(heave)

    p1 = plot(size=(1200,300), framestyle = :box, fg_legend=:transparent, bg_legend=:transparent, legend=:topright,
        xtickfont=font(8), ytickfont=font(8),
        grid=true, gridlinewidth=0.125, gridstyle=:dot, gridalpha=1)
            
    tm_tick = range(start_time, end_time, step=Minute(1))
    ticks = Dates.format.(tm_tick,"MM")
       
#########################################################################################################################
##    confidence_interval = 2.576  # corresponds to a 99% confidence interval (for a normal distribution)
##    confidence_interval = 3.0  # corresponds to a 99.73% confidence interval (for a normal distribution)    
##    confidence_interval = 3.29  # corresponds to a 99.9% confidence interval (for a normal distribution)

    # Use dynamic threshold for modified z-score
    confidence_interval = dynamic_z_score_threshold(heave)
#########################################################################################################################    
    
    outlier_indices, mod_z_scores = modified_z_score(heave, confidence_interval)
    if !isempty(outlier_indices) && any(x -> x > 0, mod_z_scores[outlier_indices])
        scatter!(p1, xvals[outlier_indices], heave[outlier_indices], 
            markersize=4, markerstrokecolor=:red, markerstrokewidth=1, 
            markercolor=:white, markershape=:circle,
            label="") #"Possible Outliers")
    end

    confidence_limits = calc_confidence_limits(heave, confidence_interval)
       
    hline!(p1, [confidence_limits[1]], color=:red, lw=:0.5, linestyle=:dash, label="") #"99% Confidence Limits")
    hline!(p1, [confidence_limits[2]], color=:red, lw=:0.5, linestyle=:dash, label="")
    
    plot!(p1, xvals, heave, xlims=(xvals[1],xvals[end]), lw=:0.5, lc=:blue, alpha=:0.5, 
            xticks=(tm_tick, ticks), label=Dates.format(start_time, "yyyy-mm-dd HH:MM"))

    # Count the number of possible outliers
    num_outliers = length(outlier_indices)
    suspect_string = string("  ", num_outliers, " Possible outliers using Confidence Interval of ", @sprintf("%.2f", confidence_interval))
    
    # Annotate plot with the number of outliers
    annotate!(p1, xvals[1], max_heave * 0.9, text(suspect_string, :left, 10))
    
    display(p1)

end


In [None]:
using Statistics, Plots, Dates, Printf

plotly()

# Function to calculate confidence limits
function calc_confidence_limits(data, confidence_interval)
    mean_val = mean(data)
    std_dev = std(data)
    upper_limit = mean_val + confidence_interval * std_dev
    lower_limit = mean_val - confidence_interval * std_dev
    return (lower_limit, upper_limit)
end

# Function to compute modified z-scores and find outliers
function modified_z_score(data, threshold)
    med = median(data)
    mad = median(abs.(data .- med))
    mod_z_scores = 0.6745 * (data .- med) ./ mad
    outlier_indices = findall(x -> abs(x) > threshold, mod_z_scores)
    return outlier_indices, mod_z_scores
end

# Function for dynamic threshold based on mean wave height
function dynamic_z_score_threshold(heave, base_threshold=3.0, k=0.5)
    mean_wave_height = mean(heave)
    std_wave_height = std(heave)
    dynamic_threshold = base_threshold * (1 + k * (mean_wave_height / std_wave_height))
    return dynamic_threshold
end

# Loop through wave records
for ii in 1:10 #length(X_date)
    # Initialize variables
    start_time = X_date[ii]
    heave = X_train[:, ii]
    end_time = start_time + Minute(30)
    xvals = start_time + Microsecond.((1:4608 .- 1) / 2.56 * 1000000)

    # Plot initialization
    p1 = plot(size=(1200, 300), framestyle=:box, fg_legend=:transparent, bg_legend=:transparent, 
        legend=:topright, xtickfont=font(8), ytickfont=font(8),
        grid=true, gridlinewidth=0.125, gridstyle=:dot, gridalpha=1)
    
    tm_tick = range(start_time, end_time, step=Minute(1))
    ticks = Dates.format.(tm_tick, "MM")
    
    # Calculate dynamic confidence interval
    confidence_interval = dynamic_z_score_threshold(heave)

    # Identify outliers using modified z-score
    outlier_indices, mod_z_scores = modified_z_score(heave, confidence_interval)
    if !isempty(outlier_indices)
        scatter!(p1, xvals[outlier_indices], heave[outlier_indices], 
            markersize=4, markerstrokecolor=:red, markerstrokewidth=1, 
            markercolor=:white, markershape=:circle, label="")
    end

    # Plot confidence limits
    confidence_limits = calc_confidence_limits(heave, confidence_interval)
    hline!(p1, [confidence_limits[1], confidence_limits[2]], color=:red, lw=0.5, linestyle=:dash, label="")

    # Plot heave data
    plot!(p1, xvals, heave, xlims=(xvals[1], xvals[end]), lw=0.5, lc=:blue, alpha=0.5, 
        xticks=(tm_tick, ticks), label=Dates.format(start_time, "yyyy-mm-dd HH:MM"))

    # Annotate plot with the number of outliers and confidence interval
    num_outliers = length(outlier_indices)
    suspect_string = string("  ", num_outliers, " Possible outliers using Confidence Interval of ", @sprintf("%.2f", confidence_interval))
    annotate!(p1, xvals[1], maximum(heave) * 0.9, text(suspect_string, :left, 10))

    display(p1)
end


In [None]:
Dates.format.(X_date, "yyyy-mm-dd HH:MM")

In [None]:
using Tk

w = Toplevel("Select Date", 235, 400)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, Dates.format.(X_date, "yyyy-mm-dd HH:MM"))
scrollbars_add(f1, lb)
pack(f1,  expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Button(f, "Ok")
pack(b)

bad_array = []

bind(b, "command") do path
    
    global file_choice = get_value(lb);
    push!(bad_array,file_choice[1])


end

In [None]:
bad_array

In [None]:
X_train

In [None]:
DateTime.(bad_array, "yyyy-mm-dd HH:MM")

In [None]:
# Convert bad_array to DateTime format
bad_dates = DateTime.(bad_array, "yyyy-mm-dd HH:MM")

# Find indices of bad_dates in X_date
bad_cols = findall(x -> x in bad_dates, X_date)

# Remove columns from X_train whose column numbers are in bad_cols
X_train = X_train[:, setdiff(1:size(X_train, 2), bad_cols)]

In [12]:
using DataFrames: DataFrame, ncol, nrow
using Dates: Dates, DateTime, unix2datetime, datetime2unix, Hour, Minute, Microsecond
using NativeFileDialog: pick_folder
using Statistics: median, std
using Tk: bind, Button, destroy, Frame, get_value, pack, scrollbars_add, tcl, Toplevel, Treeview
using Plots: Plots, plot, plot!, annotate!, vline!, @layout, text, plotly
using Printf: @sprintf

plotly()

rec_len = 4608
sample_frequency = 2.56 # sample frequency in Hertz
sample_length = 1800 # record length in seconds
sample_rate = Float64(1/sample_frequency) # sample spacing in seconds

#using Logging: NullLogger, with_logger

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

bva_directory = pick_folder()

# build list of all bva files in selected directory
#bva_files = filter(x->occursin(".bva",x), readdir(bva_directory));
#bva_files = bva_files[findall(x->endswith(uppercase(x), ".bva"), bva_files)];

# Build list of all bva files (case-insensitive: handles both ".bva" and ".BVA")
bva_files = filter(x -> endswith(lowercase(x), ".bva"), readdir(bva_directory));

w = Toplevel("Select File", 235, 800)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, bva_files)
scrollbars_add(f1, lb)
pack(f1, expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Button(f, "Exit")
pack(b)

infil_ref = Ref("")


function get_matches(Data, f23_df)
##################################
    
    # Create a dictionary to store indices of hex strings in Data
    index_dict = Dict{String, Vector{Int}}()
    
    # Populate the dictionary
    for (i, hex_str) in enumerate(Data)
        if haskey(index_dict, hex_str)
            push!(index_dict[hex_str], i)
        else
            index_dict[hex_str] = [i]
        end
    end
    
    # Initialize a vector to store indices
    matching_indices = []
    
    # Iterate through each hex string in f23_df and lookup in the dictionary
    for hex_str in f23_df.Match_vector
        if haskey(index_dict, hex_str)
            push!(matching_indices, index_dict[hex_str][1])
        else
            push!(matching_indices, nothing)  # If no match found, store an empty vector
        end
    end

    f23_df[!,"Data_vector"] = matching_indices

    return(f23_df)

end    # get_matches()

# function to calculate selected parameters from Spectrum synchronisation message (0xF23)
function process_f23(f23_vals)
#######################################
    
    # refer to DWTP (Ver. 16 January2019) Section 4.3 pp.43-44

    # get Timestamp in UTC
    timestamp = unix2datetime(parse(Int, bitstring(f23_vals[3]) * bitstring(f23_vals[4]) * bitstring(f23_vals[5]) * bitstring(f23_vals[6]); base=2))
    
    # convert time to Australian Eastern Standard Time
    timestamp = timestamp + Hour(0)  # Adjust this for the correct time zone

    # get Data Stamp
    data_stamp = parse(Int, bitstring(f23_vals[7]) * bitstring(f23_vals[8]); base=2)

    # get Segments Used
    segments_used = parse(Int, bitstring(f23_vals[9]) * bitstring(f23_vals[10]) * bitstring(f23_vals[11]); base=2)

    # get Sample Number
    sample_number = parse(Int, bitstring(f23_vals[12]) * bitstring(f23_vals[13]); base=2)

    # Create Match Vector
    match_vector = lpad(string(f23_vals[14], base=16), 2, "0")
    for i in 15:22
        match_vector = match_vector * lpad(string(f23_vals[i], base=16), 2, "0")
    end
    
    return(timestamp, segments_used, match_vector, sample_number)
    
end    #  process_f23()


# convert binary data into F23_df and Hex array
function get_hex_array(infil)
#############################
    
    # Read binary data from the input file
    println("Reading BINARY data from ", infil)
    data = reinterpret(UInt8, read(infil))
    
    # Turn the data vector into a matrix of 12 values matching hexadecimal bytes
    cols = 12
    rows = Int(length(data) / cols)
    mat = reshape(view(data, :), cols, :)
    
    # Calculate the Heave, North, and West displacements
    hex_matrix = string.(mat'[:,1:9], base=16, pad=2)
    Data = [join(row) for row in eachrow(hex_matrix)]
    
    println("All file data read!")
    
    # Interleave the last 3 matrix columns (10, 11, 12) to form the packet vector
    packet = collect(Iterators.flatten(zip(mat[10,:], mat[11,:], mat[12,:])))
    
    # Find all occurrences of 0x7e in the packet vector
    aa = findall(x -> x == 0x7e, vec(packet))
    
    # Create DataFrame to hold the processed data
    f23_df = DataFrame(Date = DateTime[], Segments = Int[], Match_vector = String[], Sample_number = Int[])
    
    # Decode the packet data into messages
    max_val = length(aa) - 1
    
    for i in 1:max_val
        first = aa[i] + 1
        last = aa[i + 1]
        
        if (last - first > 1)
            decoded = packet[first:last-1]
            
            # Handle the 0x7d escape sequences (XOR with 0x20)
            bb = findall(x -> x == 0x7d, decoded)
            for ii in bb
                decoded[ii + 1] = decoded[ii + 1] ⊻ 0x20
            end
            deleteat!(decoded, bb)
            
            # If the message is F23 (0x23)
            if decoded[2] == 0x23
                timestamp, segments_used, match_vector, sample_number = process_f23(decoded)
                push!(f23_df, [timestamp, segments_used, match_vector, sample_number])
            end
        end
    end
    
    # Remove duplicates from f23_df
    f23_df = unique(f23_df);

    return(f23_df, Data)
    
    end    # get_hex_array()


# Function to calculate confidence limits
function calc_confidence_limits(data, confidence_interval)
##########################################################
    
    mean_val = mean(data)
    std_dev = std(data)
    upper_limit = mean_val + confidence_interval * std_dev
    lower_limit = mean_val - confidence_interval * std_dev
    
    return (lower_limit, upper_limit)
    
end    # calc_confidence_limits()


# Function to compute modified z-scores and find outliers
function modified_z_score(data, threshold)
##########################################
    
    med = median(data)
    mad = median(abs.(data .- med))
    mod_z_scores = 0.6745 * (data .- med) ./ mad
    outlier_indices = findall(x -> abs(x) > threshold, mod_z_scores)
    
    return(outlier_indices, mod_z_scores)
    
end    # modified_z_score()


# Function for dynamic threshold based on mean wave height
function dynamic_z_score_threshold(heave, base_threshold=3.0, k=0.5)
    
    mean_wave_height = mean(heave)
    std_wave_height = std(heave)
    dynamic_threshold = base_threshold * (1 + k * (mean_wave_height / std_wave_height))
    
    return(dynamic_threshold)
    
end    # dynamic_z_score_threshold()


function pad_or_truncate(record, target_length=4608)
####################################################

    length(record) < target_length ? vcat(record, zeros(Float32, target_length - length(record))) :
                                     record[1:target_length]

end    # pad_or_truncate()


function get_heave(Data, f23_df)
################################
    
    heave_array = []
    X_date = []
    
    for idx in 1:nrow(f23_df)

        if !isnothing(f23_df.Data_vector[idx])
    
            start_date, start_val, end_val = get_start_end_dates(f23_df,idx)
            if start_val > 0
                print(".")
                heave, north, west = get_hnw(Data,start_val,end_val)

                # ensure we have 4608 data points
                push!(heave_array,pad_or_truncate(heave, 4608))
                push!(X_date,start_date)
            end

        end
    
    end

    return(hcat(heave_array...), X_date)

end    # get_heave()


function handle_selection(infil_ref)
####################################
    
    file_choice = get_value(lb)
    infil_ref[] = bva_directory * "\\" * file_choice[1]
    println("Selected ", infil_ref[])
    flush(stdout)
    
    infil = infil_ref[]

    f23_df, Data = get_hex_array(infil)

    f23_df = get_matches(Data, f23_df)

    # remove those vectors from F23 df that are not located in the Data vector df
    f23_df = f23_first_row_check(f23_df)

    X_train, X_date = get_heave(Data, f23_df);

    # Loop through wave records
    for ii in 1:10 #length(X_date)
        
        # Initialize variables
        start_time = X_date[ii]
        heave = X_train[:, ii]
        end_time = start_time + Minute(30)
        xvals = start_time + Microsecond.((1:4608 .- 1) / 2.56 * 1000000)
    
        # Plot initialization
        p1 = plot(size=(1200, 300), framestyle=:box, fg_legend=:transparent, bg_legend=:transparent, 
            legend=:topright, xtickfont=font(8), ytickfont=font(8),
            grid=true, gridlinewidth=0.125, gridstyle=:dot, gridalpha=1)
        
        tm_tick = range(start_time, end_time, step=Minute(1))
        ticks = Dates.format.(tm_tick, "MM")
        
        # Calculate dynamic confidence interval
        confidence_interval = dynamic_z_score_threshold(heave)
    
        # Identify outliers using modified z-score
        outlier_indices, mod_z_scores = modified_z_score(heave, confidence_interval)
        if !isempty(outlier_indices)
            scatter!(p1, xvals[outlier_indices], heave[outlier_indices], 
                markersize=4, markerstrokecolor=:red, markerstrokewidth=1, 
                markercolor=:white, markershape=:circle, label="")
        end
    
        # Plot confidence limits
        confidence_limits = calc_confidence_limits(heave, confidence_interval)
        hline!(p1, [confidence_limits[1], confidence_limits[2]], color=:red, lw=0.5, linestyle=:dash, label="")
    
        # Plot heave data
        plot!(p1, xvals, heave, xlims=(xvals[1], xvals[end]), lw=0.5, lc=:blue, alpha=0.5, 
            xticks=(tm_tick, ticks), label=Dates.format(start_time, "yyyy-mm-dd HH:MM"))
    
        # Annotate plot with the number of outliers and confidence interval
        num_outliers = length(outlier_indices)
        suspect_string = string("  ", num_outliers, " Possible outliers using Confidence Interval of ", @sprintf("%.2f", confidence_interval))
        annotate!(p1, xvals[1], maximum(heave) * 0.9, text(suspect_string, :left, 10))
    
        display(p1)
        
    end

end    # handle_selection()


function exit_callback()
########################
    
    destroy(w)  # Close the window when Exit button is pressed

end    # exit_callback()

###############################################################################
###############################################################################
###############################################################################

bind(b, "command") do path
    
    exit_callback()
    
end    # exit bind()

# Bind double-click event to the Treeview
bind(lb, "<Double-1>") do event
    
    handle_selection(infil_ref)
        
end


""

Selected F:\Card Data\Brisbane\Brisbane_2020-2021\20210101.BVA
Reading BINARY data from F:\Card Data\Brisbane\Brisbane_2020-2021\20210101.BVA
All file data read!
error during Tk callback: 


[91m[1mERROR: [22m[39mArgumentError: column name :Data_vector not found in the data frame
Stacktrace:
  [1] [0m[1mlookupname[22m
[90m    @[39m [90mC:\Users\Jim\.julia\packages\DataFrames\kcA9R\src\other\[39m[90m[4mindex.jl:431[24m[39m[90m [inlined][39m
  [2] [0m[1mgetindex[22m
[90m    @[39m [90mC:\Users\Jim\.julia\packages\DataFrames\kcA9R\src\other\[39m[90m[4mindex.jl:440[24m[39m[90m [inlined][39m
  [3] [0m[1mgetindex[22m[0m[1m([22m[90mdf[39m::[0mDataFrame, ::[0mtypeof(!), [90mcol_ind[39m::[0mSymbol[0m[1m)[22m
[90m    @[39m [32mDataFrames[39m [90mC:\Users\Jim\.julia\packages\DataFrames\kcA9R\src\dataframe\[39m[90m[4mdataframe.jl:557[24m[39m
  [4] [0m[1mgetproperty[22m
[90m    @[39m [90mC:\Users\Jim\.julia\packages\DataFrames\kcA9R\src\abstractdataframe\[39m[90m[4mabstractdataframe.jl:448[24m[39m[90m [inlined][39m
  [5] [0m[1mget_heave[22m[0m[1m([22m[90mData[39m::[0mVector[90m{String}[39m, [90mf23_df[39m:

In [None]:
using Statistics, DataFrames, Dates

# convert binary data into F23_df and Hex array
function get_hex_array(infil)
#############################
    
    # Read binary data from the input file
    println("Reading BINARY data from ", infil)
    data = reinterpret(UInt8, read(infil))
    
    # Turn the data vector into a matrix of 12 values matching hexadecimal bytes
    cols = 12
    rows = Int(length(data) / cols)
    mat = reshape(view(data, :), cols, :)
    
    # Calculate the Heave, North, and West displacements
    hex_matrix = string.(mat'[:,1:9], base=16, pad=2)
    Data = [join(row) for row in eachrow(hex_matrix)]
    
    println("All file data read!")
    
    # Interleave the last 3 matrix columns (10, 11, 12) to form the packet vector
    packet = collect(Iterators.flatten(zip(mat[10,:], mat[11,:], mat[12,:])))
    
    # Find all occurrences of 0x7e in the packet vector
    aa = findall(x -> x == 0x7e, vec(packet))
    
    # Create DataFrame to hold the processed data
    f23_df = DataFrame(Date = DateTime[], Segments = Int[], Match_vector = String[], Sample_number = Int[])
    
    # Decode the packet data into messages
    max_val = length(aa) - 1
    
    for i in 1:max_val
        first = aa[i] + 1
        last = aa[i + 1]
        
        if (last - first > 1)
            decoded = packet[first:last-1]
            
            # Handle the 0x7d escape sequences (XOR with 0x20)
            bb = findall(x -> x == 0x7d, decoded)
            for ii in bb
                decoded[ii + 1] = decoded[ii + 1] ⊻ 0x20
            end
            deleteat!(decoded, bb)
            
            # If the message is F23 (0x23)
            if decoded[2] == 0x23
                timestamp, segments_used, match_vector, sample_number = process_f23(decoded)
                push!(f23_df, [timestamp, segments_used, match_vector, sample_number])
            end
        end
    end
    
    # Remove duplicates from f23_df
    f23_df = unique(f23_df);

    return(f23_df, Data)
    
    end    # get_hex_array()

f23_df, Data = get_hex_array(infil);

In [None]:
using Dates, DataFrames, DSP, NativeFileDialog, Plots, Statistics, Printf

# Include necessary functions
include("./read_BVA_processing_tools.jl")
include("./read_BVA_plotting_tools.jl")

# Optimized function to get matches
function get_matches_optimized(Data, f23_df)
    index_dict = Dict{String, Int}()
    for (i, hex_str) in enumerate(Data)
        index_dict[hex_str] = i
    end
    
    f23_df[!,"Data_vector"] = [get(index_dict, hex_str, nothing) for hex_str in f23_df.Match_vector]
    return f23_df
end

# Optimized function to check the first row of f23_df
function f23_first_row_check_optimized(f23_df)
    if Time(first(f23_df).Date) == Time(23, 0, 0) && 
       (ismissing(first(f23_df).Data_vector) || isnothing(first(f23_df).Data_vector))
        f23_df = f23_df[2:end, :]
    end
    return f23_df
end

function process_packets(packet, aa, f20_df, f21_df, f23_df, f25_df, f26_df, f28_df, f29_df, f80_df, f81_df, f82_df, fc1_df, fc3_df)
    max_val = length(aa) - 1
    @inbounds for i in 1:max_val
        first, last = aa[i]+1, aa[i+1]
        if last - first > 1
            decoded = packet[first:last-1]
            bb = findall(x -> x == 0x7d, decoded)
            for ii in bb
                decoded[ii+1] = decoded[ii+1] ⊻ 0x20
            end
            deleteat!(decoded, bb)

            header = decoded[2]
            if header == 0x20
                process_f20!(decoded, f20_df)
            elseif header == 0x21
                process_f21!(decoded, f21_df)
            elseif header == 0x23
                process_f23!(decoded, f23_df)
            elseif header == 0x25
                process_f25!(decoded, f25_df)
            elseif header == 0x26
                process_f26!(decoded, f26_df)
            elseif header == 0x28
                process_f28!(decoded, f28_df)
            elseif header == 0x29
                process_f29!(decoded, f29_df)
            elseif header == 0x80
                process_f80!(decoded, f80_df)
            elseif header == 0x81
                process_f81!(decoded, f81_df)
            elseif header == 0x82
                process_f82!(decoded, f82_df)
            elseif header == 0xc1
                process_fc1!(decoded, fc1_df)
            elseif header == 0xc3
                process_fc3!(decoded, fc3_df)
            end
        end
    end
end

# Main Program
function main()
    
    infil = pick_file("C:\\QGHL\\Wave_data\\Bris\\BVA\\", filterlist="*BVA")
    data = reinterpret(UInt8, read(infil))
    
    cols = 12
    mat = reshape(view(data, :), cols, :)
    packet = vcat(collect(Iterators.flatten(zip(mat[10,:], mat[11,:], mat[12,:]))))
    aa = findall(x -> x == 0x7e, packet)

    f20_df = DataFrame(Date = [], Segments = [], Smax = [])
    f21_df = DataFrame(Date = [], Segments = [])
    f23_df = DataFrame(Date = [], Segments = [], Match_vector = [], Sample_number = [])
    f25_df = DataFrame(Date = [], Segments = [], Hs = [], Ti = [], Te = [], T1 = [], Tz = [], T3 = [], Tc = [], Rp = [], Tp = [], Smax = [], Theta_p = [], Sigma_p = [])
    f26_df = DataFrame(Date = [], Hmax = [], Thmax = [], Tmax = [], Htmax = [], Havg = [], Tavg = [], Hsrms = [], Nw = [], Nc = [], Epsilon = [], Coverage = [])
    f28_df = DataFrame(Date = [], Segments = [])
    f29_df = DataFrame(Date = [], Coverage = [], Nw = [], Epsilon = [], Hmax = [], THmax = [], H10 = [], TH10 = [], H3 = [], TH3 = [], Havg = [], Tavg = [])
    f80_df = DataFrame(Date = [], Latitude = [], Longitude = [])
    f81_df = DataFrame(Date = [], SST = [])
    f82_df = DataFrame(Date = [], Firmware = [], Speed = [], Direction = [], SST = [])
    fc1_df = DataFrame(Date = [], Firmware = [], Hatch_uid = [], Hull_uid = [], Uptime = [], Battery_energy = [], Boostcaps_energy = [], Hatch_temp = [], Battery_voltage = [], Batteries_per_section = [], Battery_section_number = [], Initial_battery_energy = [], Ov = [], Cv = [], Ox = [], Oy = [], Cx = [], Cy = [], Mu0 = [], Sigma0 = [], Mui = [], Sigmai = [], Muh = [], Sigmah = [], Cpitch = [], Croll = [], Tensor = [])
    fc3_df = DataFrame(Date = [], Battery_life = [])

    process_packets(packet, aa, f20_df, f21_df, f23_df, f25_df, f26_df, f28_df, f29_df, f80_df, f81_df, f82_df, fc1_df, fc3_df)

    f23_df = unique!(f23_df)
    f23_df = get_matches_optimized([join(string.(row, base=16, pad=2)) for row in eachrow(mat[:,1:9])], f23_df)
    f23_df = f23_first_row_check_optimized(f23_df)

    plot_f29(f29_df)
end

# Run the main program
main()


In [None]:
using DataFrames

function process_f20!(f20_vals, heave_spectrum_df)
    #######################################
    # Function to calculate parameters from Upcross wave height quantiles message  (0xf20)
    # Refer to DWTP (Ver. 16 January 2019) Section 4.8 pp.56-61
    
    # Get Timestamp in UTC - refer Section 3.2 HF link header pp. 25-26
    timestamp = unix2datetime.(parse(Int, bitstring(f20_vals[3])*bitstring(f20_vals[4])*bitstring(f20_vals[5])*bitstring(f20_vals[6]); base=2))
    
    # Convert time to Australian Eastern Standard Time
    timestamp = timestamp + Hour(0)
    println(timestamp)

    # Get Number of Segments Used
    segments = parse(Int, bitstring(f20_vals[9]); base=2)

    # Get Smax
    smax = 5000 * (exp(parse(Int, bitstring(f20_vals[10]) * bitstring(f20_vals[11])[1:4]; base=2) / 200) - 1) / (exp(4094/200) - 1)
    println(smax)
    
    # Obtain the heave_spectrum from s0 to s99
    heave_spectrum = Float64[]
    for ii in 12:3:159
        try
            value1 = (exp(parse(Int, bitstring(f20_vals[ii]) * bitstring(f20_vals[ii+1])[1:4]; base=2) / 200) - 1) / (exp(4094/200) - 1)
            value2 = (exp(parse(Int, bitstring(f20_vals[ii+1])[5:8] * bitstring(f20_vals[ii+2]); base=2) / 200) - 1) / (exp(4094/200) - 1)
            
            push!(heave_spectrum, value1)
            push!(heave_spectrum, value2)
        catch
            # Handle the error: skip, replace with default, or log
            # push!(heave_spectrum, 0.0)  # Example: Replace missing with a default value (0.0)
            push!(heave_spectrum, missing)
            push!(heave_spectrum, missing)
        end
    end

    # Push a row with timestamp, segments, and Smax to the DataFrame
    push!(heave_spectrum_df, (Date=timestamp, Segments=segments, Smax=smax))

    return (timestamp, segments, smax, heave_spectrum)
end


infil = pick_file("C:\\QGHL\\Wave_data\\Bris\\BVA\\", filterlist="*BVA")
data = reinterpret(UInt8, read(infil))

cols = 12
mat = reshape(view(data, :), cols, :)
packet = vcat(collect(Iterators.flatten(zip(mat[10,:], mat[11,:], mat[12,:]))))
aa = findall(x -> x == 0x7e, packet)

f20_df = DataFrame(Date = DateTime[], Segments = Int[], Smax = Float64[])

max_val = length(aa) - 1
@inbounds for i in 1:max_val
    first, last = aa[i]+1, aa[i+1]
    if last - first > 1
        decoded = packet[first:last-1]
        bb = findall(x -> x == 0x7d, decoded)
        for ii in bb
            decoded[ii+1] = decoded[ii+1] ⊻ 0x20
        end
        deleteat!(decoded, bb)

        header = decoded[2]
        if header == 0x20
            println(decoded)
            process_f20!(decoded, f20_df)
        end
    end
end 

In [None]:
aa = unique(f20_df)
plot(aa.Date,aa.Smax)

In [None]:
@time begin
    
    hex_matrix = string.(mat'[:,1:9], base=16, pad=2)
   
    result_vector = [join(row) for row in eachrow(hex_matrix)]

    # Initialize an empty vector to store the split strings
    split_vector = String[]
    
    # Iterate through each string in result_vector and split it in half
    for string in result_vector
        half_length = div(length(string), 2)
        push!(split_vector, string[1:half_length])
        push!(split_vector, string[half_length+1:end])
    end

end

In [None]:
val = 3
end_val = f23_df.Data_vector[val]
start_val = end_val - div(f23_df.Sample_number[val],2) + 1
start_time = f23_df.Date

### Don't use this - but, keep it anyway

In [None]:
filter(:Data_vector => x -> !(ismissing(x) || isnothing(x) || isnan(x)), first(f23_df))

In [None]:
split_vector = String[]

for string in result_vector
    half_length = div(length(Data[1]), 2)
    push!(split_vector, string[1:half_length])
    push!(split_vector, string[half_length+1:end])
end

In [None]:
function get_matches(Data, f23_df)
##################################
    
    # Create a dictionary to store indices of hex strings in Data
    index_dict = Dict{String, Vector{Int}}()
    
    # Populate the dictionary
    for (i, hex_str) in enumerate(Data)
        if haskey(index_dict, hex_str)
            push!(index_dict[hex_str], i)
        else
            index_dict[hex_str] = [i]
        end
    end
    
    # Initialize a vector to store indices
    matching_indices = []
    
    # Iterate through each hex string in f23_df and lookup in the dictionary
    for hex_str in f23_df.Match_vector
        if haskey(index_dict, hex_str)
            push!(matching_indices, index_dict[hex_str][1])
        else
            push!(matching_indices, nothing)  # If no match found, store an empty vector
        end
    end

    f23_df[!,"Data_vector"] = matching_indices

    return(f23_df)

end    # get_matches()

### Write contents of df's to .csv files

In [None]:
using CSV

outfil = split(split(infil,"\\")[3],".")[1]
df = [f20_df, f21_df, f23_df, f25_df, f26_df, f28_df, f29_df, f80_df, f81_df, f82_df, fc1_df, fc3_df]
fx = ["f20_df","f21_df","f23_df","f25_df","f26_df","f28_df","f29_df","f80_df","f81_df","f82_df","fc1_df","fc3_df"]

for i in 1:length(fx)
   
    outfil_fx = ".\\"*outfil*"_"*fx[i]*".csv"
    println(outfil_fx)
    if fx[i] == "f23_df"
        replace!(df[i].Data_vector, nothing => -99)
    end
    CSV.write(outfil_fx, df[i])

end

In [None]:
using Dates, DataFrames, Distributions, DSP
using Gtk
using LaTeXStrings
using NativeFileDialog
using Plots, Printf

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

csv_directory = pick_folder()

# build list of all csv files in selected directory
csv_files = filter(x->occursin(".csv",x), readdir(csv_directory));
f26_file = csv_files[findfirst(contains("{0xF26}"),csv_files[findall(x->endswith(uppercase(x), ".CSV"), csv_files)])];



p1 = plot(f26_df.Date, f26_df.Hmax, lc=:green, lw=:4, label="Hmax")
p1 = plot!(f26_df.Date, f26_df.Thmax, lc=:yellow, lw=:4, label="THmax")    
p1 = plot!(f26_df.Date, f26_df.Tmax, lc=:blue, lw=:4, label="Tmax")    
p1 = plot!(f26_df.Date, f26_df.Htmax, lc=:pink, lw=:4, label="Htmax") 

plot(p1,layout=(4,1),size=(1800,800))

In [None]:
function get_hnw(Data,start_val,end_val)
######################################## 
    
    # get WSEs for desired 30-minute record
    heave = get_displacement(Data[start_val:end_val,:], 1, 3);              
    north = get_displacement(Data[start_val:end_val,:], 4, 6);
    west = get_displacement(Data[start_val:end_val,:], 7, 9);
    
    # Check for missing or extra points in data
    for wse in [heave, north, west]
        
        wse_length = length(wse)
        
        if wse_length > 4608

            # truncate if too long
            wse = wse[1:4608]
            
        else

            # zero pad if too short (leave it unchanged if right length)
            append!(wse,zeros(4608-wse_length))
            
        end      

    end
    
    return (heave, north, west)
    
end    # get_hnw()


function get_start_end_dates(df1,idx)   
    start_date = df1[idx[1],:].Date # <------- NOTE subtracted 30min from start_date to match Waves4 results
    segments = df1[idx[1],:].Segments
#   match_vector = df1[idx[1],:].Match_vector
    sample_nos = df1[idx[1],:].Sample_number
    data_vector = df1[idx[1],:].Data_vector
    start_val = data_vector - Int(sample_nos/2) + 1
    end_val = data_vector
    
    return(start_date,start_val, end_val)
    
end    #(get_start_end_dates)


function plot_hnw(df1,df2,Data,idx)
######################################## 

    function spike_value(wse)
    #####################################    
        median_value = median(wse)
        std_value = std(wse)
        
        return(median_value + 3*std_value)
        
        end    # spike_value()


    println("Preparing to plot heave, north, and west time series")
    # Extract parameters from F23 df
    start_date, start_val, end_val = get_start_end_dates(df1,idx)
    println(idx," ",start_date, " ", start_val, " ", end_val)

    # get WSEs for desired 30-minute record
    heave, north, west = get_hnw(Data,start_val,end_val)

    spike = spike_value(heave)
    heave_spikes = findall(i->(i>=spike), abs.(heave));

    spike = spike_value(north)
    north_spikes = findall(i->(i>=spike), abs.(north));

    spike = spike_value(west)
    west_spikes = findall(i->(i>=spike), abs.(west));

    # time stamp each WSE
    points = collect(0:1:length(heave)-1)/2.56
    times = []

    for i in 1:length(points)
        push!(times,unix2datetime(datetime2unix(start_date) + points[i]))
    end

    # create plots of heave, north, and west
    title_string = Dates.format(start_date, "dd/mm/yyyy HH:MM")
    p1_hnw = Plots.scatter(times[heave_spikes], heave[heave_spikes], label="", markershape=:circle, ms=4, mc=:white, ma=1, msc=:red, msa=0.25, msw=0.5)
    p1_hnw = plot!(times,heave, label="", c="#4a536b", lw=0.5, title=title_string, titlefontsize=12) ##last(split(infil,"\\")))

    # get plotting limits
    x_lim1 = xlims(p1_hnw)[1]; y_lim1 = ylims(p1_hnw)[1]
    x_lim2 = xlims(p1_hnw)[2]; y_lim2 = ylims(p1_hnw)[2]

    x_pos = x_lim1 + abs(x_lim2-x_lim1)*0.02
    p1_hnw = annotate!(x_pos, y_lim2*1.1, Plots.text("Firmwave ver. = " * df2.Firmware[1], :grey, :left, 7))
    x_pos = x_lim1 + abs(x_lim2-x_lim1)*0.13
    p1_hnw = annotate!(x_pos, y_lim2*1.1, Plots.text("Hatch UID = " * string(df2.Hatch_uid[1]), :grey, :left, 7))
    x_pos = x_lim1 + abs(x_lim2-x_lim1)*0.26
    p1_hnw = annotate!(x_pos, y_lim2*1.1, Plots.text("Hull UID = " * string(df2.Hull_uid[1]), :grey, :left, 7))

    p2_hnw = Plots.scatter(times[north_spikes], north[north_spikes], label="", markershape=:circle, ms=4, mc=:white, ma=1, msc=:red, msa=0.25, msw=0.5)
    p2_hnw = plot!(times,north, label="", c="#aed6dc", lw=0.5)
    p3_hnw = Plots.scatter(times[west_spikes], west[west_spikes], label="", markershape=:circle, ms=4, mc=:white, ma=1, msc=:red, msa=0.25, msw=0.5)
    p3_hnw = plot!(times,west, label="", c="#ff9a8d", lw=0.5)

    hline!(p1_hnw, [0], lw=1, label="")
    hline!(p2_hnw, [0], lw=1, label="")
    hline!(p3_hnw, [0], lw=1, label="")

    # get plotting limits
    x_lim1 = xlims(p1_hnw)[1]; y_lim1 = ylims(p1_hnw)[1]
    x_lim2 = xlims(p1_hnw)[2]; y_lim2 = ylims(p1_hnw)[2]

    tm_tick = range(times[1],times[end],step=Minute(5))
    ticks = Dates.format.(tm_tick,"MM")

    # display plots to screen
    plot_wse = Plots.plot(p1_hnw, p2_hnw, p3_hnw, layout = (3, 1), size = (1400, 900), xticks=(tm_tick,ticks),
        xlim=(first(times),last(times)),  xtickfontsize=7, ytickfontsize=8,
        framestyle = :box,fg_legend=:transparent, legend=:bottomleft,
        margin = 1Plots.mm, grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1)            

    display(plot_wse)

    # create a plot file to be saved as a .PNG
##    plt_file = first(infil, length(infil)-4)*"_plot_hnw_"*Dates.format(start_date, "yyyy_mm_dd_HHMM")*".png"

    # Save plot to file
##    savefig(plt_file)
##    println("Plot file saved as ",plt_file)
       
end    # plot_hnw()




## Select individual records to plot

In [None]:
using Tk

#f23_df_copy = deepcopy(f23_df)
df1 = deepcopy(f23_df)
df2 = deepcopy(fc1_df)
first_row = first(df1)


if ismissing(first_row.Data_vector) || isnothing(first_row.Data_vector) || isnan(first_row.Data_vector)
    println("Dropping " * string(df1.Date[1]))
    df1 = df1[2:end, :]  # Drop the first row
    df2 = df2[2:end,:]
end

## Plot 30-minute records
# create a vector of dates from the F23 df
date_vector = Dates.format.(df1.Date, "yyyy-mm-dd HH:MM:SS");

# add last datetime - it is only a part record
#last(f23_df.Date) + Minute(30)
#push!(date_vector, (Dates.format.(last(f23_df.Date) + Minute(30), "yyyy-mm-dd HH:MM:SS")))

w = Toplevel("Select Date", 235, 650)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, date_vector)

scrollbars_add(f1, lb)
pack(f1,  expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Tk.Button(f, "Ok")
pack(b)

println("Select a time from the menu!")
flush(stdout)

bind(b, "command") do path
                    
    file_choice = get_value(lb)
    global idx = Int(findall(x -> x==file_choice[1], date_vector)[1])
    println(idx," ",date_vector[idx] * " selected")
    plot_hnw(df1,df2,Data,idx)
##    plot_spectra(f23_df,df2,Data,idx)
##    plot_2d(f23_df,Data,idx)
##    plot_hnw_2d(f23_df,Data,idx)
##    plot_3d(f23_df,Data,idx)

end

In [None]:
f23_df

In [None]:
last(f23_df.Date) + Minute(30)

push!(date_vector, (Dates.format.(last(f23_df.Date) + Minute(30), "yyyy-mm-dd HH:MM:SS")))

## Show individual WSE's and zero-crossing points

In [None]:
found_list = 44 # <<<=== For testing only

start_date, start_val, end_val = get_start_end_dates(f23_df,found_list)
    
# get WSEs for desired 30-minute record
heave, north, west = get_hnw(Data,start_val,end_val);

zero_up = []; valid_zero_up = []

for i in 2:length(heave)-2
    if (heave[i]*heave[i+1] < 0 && heave[i+1] > 0) || (heave[i] == 0 && heave[i-1] < 0 && heave[i+1] > 0)
        push!(zero_up,i)
    end
end

med = median(heave)
stdev = std(heave)
stdev_3 = med + stdev*3

wse_point = 1:1:length(heave)
wse_1 = plot(wse_point, heave[wse_point], c=:blue, alpha=.5, label = "WSE's")
wse_1 = scatter!(wse_point, heave[wse_point], c=:white, ms=3, 
    markerstrokecolor=:blue, alpha=0.5, markerstrokewidth=0.5, label="WSE points")
wse_1 = scatter!(zero_up, heave[zero_up], ms=4, c=:lightgreen, 
    markerstrokecolor=:green, alpha=0.5, series_annotations = text.(zero_up, :bottom, :red, :size, 10), 
    annotationhalign = :hcenter, label="Zero up-cross points")

# Heave Threshold set at 10mm. Refer to Section 9 Wave statistics pp. 9-10 in Datawell Library Manual
threshold = 0.1

wse_1 = hline!([threshold; threshold], lw=0.2, ls =:dot, c=:red, label="Threshold\n")
wse_1 = hline!([-threshold; -threshold], lw=0.2, ls =:dot, c=:red, label="")

##wse_1 = hline!([stdev_3; stdev_3], lw=0.2, ls =:dot, c=:green, label="3 sigma")
##wse_1 = hline!([-stdev_3; -stdev_3], lw=0.2, ls =:dot,  c=:green, label="")

wse_plot = plot(wse_1, size = (1400, 800),xlim=(0,200), ylim=(-1.5,1.5), framestyle = :box, 
    fg_legend=:transparent, bg_legend=:transparent, legend=:topright,
    margin = 1Plots.mm, grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1, show=true)

display(wse_plot)

## Identify individual waves and calculate time-domain heights

In [None]:
using Printf

valid_zero_up = []
i = 1; j = 2

while j < length(zero_up)-1
    
    crest = maximum(heave[zero_up[i]:zero_up[j]])
    crest_point = zero_up[i] + argmax(heave[zero_up[i]:zero_up[j]]) - 1
    trough = minimum(heave[crest_point:zero_up[j]])

    # Check that crest higher than threshold AND trough less than threshold - Possible Valid Wave!!
    if (crest > threshold) & (trough < -threshold)
        crest_point = zero_up[i] + argmax(heave[zero_up[i]:zero_up[j]]) - 1
        trough_point = crest_point + argmin(heave[crest_point:zero_up[j]]) - 1
        
        next_crest = maximum(heave[zero_up[j]:zero_up[j+1]])
        
        # Check that NEXT crest also exceeds threshold (if so then Valid Wave)
        if (next_crest > threshold)
##            println("Crest found at ",crest_point," Trough at ",trough_point)
            push!(valid_zero_up,(zero_up[i],zero_up[j]));
            i = j
        end
        
        j = j+1
        
    else 
        
        j = j+1
        
    end

end

# Process last recorded wave
i = j
j = j+1

crest = maximum(heave[zero_up[i]:zero_up[j]])
trough = minimum(heave[zero_up[i]:zero_up[j]])

if (crest > threshold) & (trough < -threshold)

    crest_point = zero_up[i] + argmax(heave[zero_up[i]:zero_up[j]]) - 1
    trough_point = crest_point + argmin(heave[crest_point:zero_up[j]]) - 1
    push!(valid_zero_up,(zero_up[i],zero_up[j]));

end

heights = []

for i in 1:length(valid_zero_up)
    
    crest = maximum(heave[valid_zero_up[i][1]:valid_zero_up[i][2]]);
    trough = minimum(heave[valid_zero_up[i][1]:valid_zero_up[i][2]]);
    push!(heights,crest - trough)
##    @printf("Wave %d = %2.3f\n",i,crest - trough)

end 

# Get time-domain height parameters
sorted_heights = sort(heights, rev=true) # sort heights in reverse order heighestwave to lowest wave
hmax = maximum(sorted_heights)
hs = mean(sorted_heights[1:Int(ceil(length(sorted_heights)/3))])
h10 = mean(sorted_heights[1:Int(ceil(length(sorted_heights) / 10))])
hmean = mean(sorted_heights)

@printf("%s; Waves = %3d; Hmean = %4.2fm; Hs = %4.2fm; H10 = %4.2fm; Hmax = %4.2fm\n",Dates.format(start_date, "yyyy-mm-dd HH:MM"),length(heights), hmean, hs, h10, hmax)

## Locate the zero-crossing points

In [None]:
x_point = []
for i in 1:length(valid_zero_up)
    push!(x_point,valid_zero_up[i][1] + abs(heave[valid_zero_up[i][1]]) / (heave[valid_zero_up[i][1]+1] - heave[valid_zero_up[i][1]]))
end

# Process final zero-crossing point
i = length(valid_zero_up)
push!(x_point,valid_zero_up[i][2] + abs(heave[valid_zero_up[i][2]]) / (heave[valid_zero_up[i][2]+1] - heave[valid_zero_up[i][2]]))

# Do plots
wse_1 = plot(wse_point, heave[wse_point], c=:blue, alpha=0.5, label = "WSE's")
wse_1 = scatter!(wse_point, heave[wse_point], c=:white, ms=3, 
    markerstrokecolor=:blue, alpha=0.5, markerstrokewidth=0.5,label="WSE points")
wse_1 = scatter!(zero_up, heave[zero_up], ms=3, c=:lightgreen, 
    markerstrokecolor=:lightgreen, series_annotations = text.(zero_up, :bottom, :red, :size, 10), 
    annotationhalign = :hcenter, label="Zero up-cross points\n")
wse_1 = scatter!(x_point, zeros(length(x_point)), c=:yellow, ms=5, 
    markerstrokecolor=:yellow, markershape=:diamond, label="Zero-crossing points")

# Heave Threshold set at 10mm. Refer to Section 9 Wave statistics pp. 9-10 in Datawell Library Manual
threshold = 0.1 

wse_1 = hline!([threshold; threshold], lw=0.2, ls =:dot, c=:red, label="Threshold ("*string(threshold)*")\n")
wse_1 = hline!([-threshold; -threshold], lw=0.2, ls =:dot, c=:red, label="")

wse_plot = plot(wse_1, size = (1400, 800),xlim=(0,200), ylim=(-1.5,1.5), framestyle = :box, 
    fg_legend=:transparent, bg_legend=:transparent, legend=:topright,
    margin = 1Plots.mm, grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1, show=true)

display(wse_plot)

## Calculate time-domain periods

In [None]:
sample_frequency = 2.56 # Hertz Mk4
periods = []

for i in 1:length(x_point)-1
    push!(periods,(x_point[i+1]-x_point[i]) / sample_frequency) # wave period in seconds
end

sorted_periods = sort(periods, rev=true) # sort periods in reverse order longest period to shortest period
tmean = mean(sorted_periods)
ths = periods[argmin(abs.(heights .- hs))] 
th10 = periods[argmin(abs.(heights .- h10))]
thmax = periods[argmax(heights)]
tmax = maximum(sorted_periods)

# get Datawell parameters from f29_df
row = f29_df[f29_df.Date .== start_date + Minute(30), :]

# Print results
@printf("\nQGHL values:     ")
@printf("%s; Waves = %3d; Hmean = %5.2fm; Hs = %5.2fm; H10 = %5.2fm; Hmax = %5.2fm;",Dates.format(start_date, "yyyy-mm-dd HH:MM"),length(heights), hmean, hs, h10, hmax)
@printf(" Tmean = %5.2fs; THs = %5.2fs; TH10 = %5.2fs; THmax = %5.2fs; Tmax = %5.2fs",tmean,ths,th10,thmax,tmax)

@printf("\nDatawell values: ")
@printf("%s; Waves = %3d; Hmean = %5.2fm; Hs = %5.2fm; H10 = %5.2fm; Hmax = %5.2fm;",Dates.format(row.Date[1], "yyyy-mm-dd HH:MM"),row.Nw[1], row.Havg[1], row.H3[1], row.H10[1], row.Hmax[1])
@printf(" Tmean = %5.2fs; THs = %5.2fs; TH10 = %5.2fs; THmax = %5.2fs\n",row.Tavg[1],row.TH3[1],row.TH10[1],row.THmax[1])


## Plot sorted wave heights

In [None]:
title_string = Dates.format(start_date, "dd/mm/yyyy HH:MM")

wave_heights = scatter(sorted_heights, label="Sorted wave heights " * string(length(heights)))

wave_heights = hline!([hmax; hmax], lw=2, ls =:dot, c=:red, fillrange = 0, fillalpha = 0.015, fillcolor = :red, label="Hmax " * string(round(hmax, digits=2)) * "m\n")
wave_heights = hline!([h10; h10], lw=2, ls =:dot, c=:orange, fillrange = 0, fillalpha = 0.02, fillcolor = :orange,label="H10 " * string(round(h10, digits=2)) * "m\n")
wave_heights = hline!([hs; hs], lw=2, ls =:dot, c=:yellow, fillrange = 0, fillalpha = 0.04, fillcolor = :yellow, label="Hsig " * string(round(hs, digits=2)) * "m\n")
wave_heights = hline!([hmean; hmean], lw=2, ls =:dot, c=:green, fillrange = 0, fillalpha = 0.05, fillcolor = :green, label="Hmean " * string(round(hmean, digits=2)) * "m\n")

subplot = Plots.twinx()

wave_heights = scatter!(sort(periods),c=:red)

wave_heights_plot = plot(wave_heights, layout = (1, 1), size = (1400, 600),xlim=(0,length(heights)*1.015), ylim=(0,hmax*1.01), xlabel="Wave number", ylabel="Wave height (m)",
    framestyle = :box, title=title_string, titlefontsize=12, fg_legend=:transparent, bg_legend=:transparent, legend=:bottomleft,
    margin = 10Plots.mm, grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1, show=true)

display(wave_heights_plot)

In [None]:
wave_heights = scatter(sorted_heights, c=:blue, yguidefontcolor=:blue, 
    ylim=(0,hmax*1.01), ylabel="Wave heights (m)", label="Sorted wave heights " * string(length(heights)), 
    gridcolor=:blue, legend=:bottomleft, foreground_color_grid="blue", yforeground_color_text="blue", ytickfonthalign=:left)

subplot = Plots.twinx()

wave_heights = scatter!(subplot,sort(periods), c=:red, yguidefontcolor=:red, 
    ylim=(0,maximum(periods)*1.01), ylabel="Wave periods (s)", label="Sorted wave periods " * string(length(periods)),
    gridcolor=:red, legend=:bottomright, foreground_color_grid="red", yforeground_color_text="red", ytickfonthalign=:right)

wave_heights_plot = plot(wave_heights, layout = (1, 1), size = (1400, 600),
    xlim=(0,length(heights)*1.015), xlabel="Wave number",
    framestyle = :box, title=title_string, titlefontsize=12, fg_legend=:transparent, bg_legend=:transparent,
    bottommargin = 10Plots.mm, leftmargin = 10Plots.mm, rightmargin = 18Plots.mm, 
    grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1, show=true)

In [None]:
## Julia program to read a selected .BVA file and display 30-minute time series plots
## JW October 2022
#using ContinuousWavelets 
using Dates, DataFrames, Distributions, DSP
using NativeFileDialog
using Printf
#using Suppressor: @suppress
#using Wavelets

##import Pkg; Pkg.add("Suppressor")
## See https://github.com/JuliaIO/Suppressor.jl
##using Suppressor: @suppress

#include("./read_BVA_processing_tools.jl")
#include("./read_BVA_plotting_tools.jl")

################################################
################################################
##           START OF MAIN PROGRAM
################################################
################################################

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

# Select a HVA daily .CSV file
infil = pick_file("C:\\QGHL\\Wave_data\\Bris\\BVA\\", filterlist="*BVA");
println("Selected ",infil)

#readdir(path)

In [None]:
# Set path where data cards are located
path = "Z:/Monitoring/Instruments/Wave/Deployment Data/Card Data"
aa = readdir(path, join=true);

# get the names of the sites in the directory
sites = []
for i in aa
    push!(sites,last(splitpath(i)))
end

In [None]:
aa

In [None]:
sites

In [None]:
using Gtk


## Allow user to get a 30-minute record and do plots
cb = GtkComboBoxText()
choices = sites

for choice in choices
    push!(cb,choice)
end

set_gtk_property!(cb,:active,1)

signal_connect(cb, "changed") do widget, others...

    # get the active index
    idx = get_gtk_property(cb, "active", Int) + 2
  
    # get the active string 
    str = Gtk.bytestring( GAccessor.active_text(cb) ) 
    
    println(idx)
    println(choice[idx])

end

win = GtkWindow("Select Date",200,200);
Gtk.GAccessor.position(win, Gtk.GtkWindowPosition.CENTER);
push!(win, cb);
showall(win);

In [None]:
f82_df

In [None]:
currents_df = DataFrame([[],[]], ["v", "u"])
for i in 1:nrow(f82_df)
    push!(currents_df,[sin(deg2rad(f82_df.Direction[i])) * f82_df.Speed[i],cos(deg2rad(f82_df.Direction[i])) * f82_df.Speed[i]])
end

In [None]:
scatter(currents_df.v,currents_df.u)

In [None]:
# calculate vectors from speed and direction for x-y plotting
distance = f82_df.Speed
angle = (90 .- f82_df.Direction)
angle[np.where(angle< -180)] += 360
angle = np.radians(angle)

u, v = (distance * np.cos(angle), distance * np.sin(angle))
GPS_Currents_df['u'] = u; GPS_Currents_df['v'] = v
values = np.hypot(u,v)

In [None]:
angle = f82_df.Direction
dist = f82_df.Speed

In [None]:
scatter(sin.(deg2rad.(angle)).*dist,cos.(deg2rad.(angle)).*dist)