### Development code

In [None]:
using CSV: CSV
using CurveFit: curve_fit
using DataFrames: DataFrame, ncol, nrow
using Dates: Dates, DateTime, unix2datetime, datetime2unix, Minute
using DSP: welch_pgram, freq, power, hanning
using NativeFileDialog: pick_folder
using Serialization: deserialize
using Statistics: median
using Tk: bind, Button, Frame, get_value, pack, scrollbars_add, tcl, Toplevel, Treeview
using Plots: Plots, plot, plot!, annotate!, vline!, @layout, text
using Polynomials: Polynomial

# Function to apply polynomial fit to WSE's affected by GPS errors
# Uses selectable offset value to fine-tune result
function fix_gps_errors(heave_bad, date, gps_flag) 
##################################################
    
    heave = copy(heave_bad)
    
    gps_errors = findall(==(1), gps_flag)
    heave_length = length(heave)
    
    if !isempty(gps_errors)
        
        println(length(gps_errors), " GPS errors at ", Dates.format.(date, "yyyy-mm-dd HH:MM"))
        flush(stdout)
        
        for ii in reverse(gps_errors)

            error_center = ii

            if error_center <= 3
                error_center = 3
            end

            if error_center >= heave_length - 3
                error_center = heave_length - 3
            end
            
            # User-selected offset either side of GPS error
            lower_offset = upper_offset = 120

            if error_center <= lower_offset
                lower_offset = error_center - 1
            end

            if error_center + upper_offset > heave_length
                upper_offset = heave_length - error_center
            end

            # Ensure there are at least 3 points for fitting
            lower_offset = max(lower_offset, 2)
            upper_offset = max(upper_offset, 2)
    
            # Handle edge cases
            left_side_points = max(1, error_center - lower_offset):error_center
            right_side_points = error_center:min(heave_length, error_center + upper_offset)

            # Fit curve to subset of heave before GPS error
            fit1 = curve_fit(Polynomial, left_side_points, heave[left_side_points], 2)
            yfit1 = fit1.(left_side_points)
            yfit1[end] = 0.0  # set the last point of the left fit to 0

            # Fit curve to subset of heave after GPS error
            fit2 = curve_fit(Polynomial, right_side_points, heave[right_side_points], 2)
            yfit2 = fit2.(right_side_points)
            yfit2[1] = 0.0  # set the first point of the right fit to 0

            # Apply polynomial results to WSEs on both sides of GPS error
            heave[left_side_points] .= heave[left_side_points] - yfit1
            heave[right_side_points] .= heave[right_side_points] - yfit2
            heave[ii] = 0.0  # set WSE at GPS error location to 0

        end
    
    end

    return(heave)
    
end  # fix_gps_errors()


function get_displacements(arry)
#####################################
    
    displacements = []

    if length(arry[1]) == 3
    
        for i in arry
            append!(displacements,parse(Int, SubString.(i, 1, 1), base=16)*16^2 
                + parse(Int, SubString.(i, 2, 2), base=16)*16^1 
                + parse(Int, SubString.(i, 3, 3), base=16)*16^0)
        end
        
    else
        
        for i in arry
            append!(displacements,parse(Int, SubString.(i, 1, 1), base=16)*16^1 
                + parse(Int, SubString.(i, 2, 2), base=16)*16^0)
        end
        
    end

    displacements[findall(>=(2048), displacements)] = 2048 .- displacements[findall(>=(2048), displacements)];
    
    return(displacements./100)
    
    end     # get_displacements()


# Function to convert frequency in Hertz to Period in Seconds
function convert_frequency_to_period(frequencies)
#################################################
    
return(1.0 ./ frequencies)
    
end    # convert_frequency_to_period()


function read_noise_floor_file(io)
###########################
    
##    gz = GzipDecompressorStream(io)                # Create a Gzip decompressor stream
    deserialized_RDT_df = deserialize(io) # Deserialize the DataFrame from the decompressed stream
    close(io)                                      # Close the decompressor stream
    
    return(deserialized_RDT_df)
    
end    # read_noise_floor_file()


################################################
################################################
##           START OF MAIN PROGRAM
################################################
################################################

# Widen screen for better viewing
display(HTML("<style>.jp-Cell { width: 120% !important; }</style>"))

rec_len = 2304
sample_frequency = 1.28 # sample frequency in Hertz
sample_length = 1800 # record length in seconds
sample_rate = Float64(1/sample_frequency) # sample spacing in seconds

#using Logging: NullLogger, with_logger

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

#infil = "C:\\Users\\PC1\\Julia_programs\\Datawell\\RDT_vector\\Data\\Noise_floor.bin"
noise_floor_file = "C:\\Users\\Jim\\Julia_programs\\Datawell\\RDT_vector\\Data\\Noise_floor.bin"
println("Reading Noise Floor data from ",noise_floor_file)
flush(stdout)

# Deserialize the DataFrame from the file
noise_floors_df = open(read_noise_floor_file, noise_floor_file, "r")

# Extract all spectral arrays from the DataFrame
spectral_values = noise_floors_df.Pden2

# Convert the list of arrays into a matrix where each row is a spectrum
spectral_matrix = hcat(spectral_values...)'

# Calculate the median spectra (median of each column)
median_spectra = median(spectral_matrix, dims=1)

# Convert the results to vectors
median_spectra_vector = vec(median_spectra)

hxv_directory = pick_folder()

# build list of all hxv files in selected directory
hxv_files = filter(x->occursin(".hxv",x), readdir(hxv_directory));
hxv_files = hxv_files[findall(x->endswith(uppercase(x), ".HXV"), hxv_files)];

w = Toplevel("Select Date", 235, 800)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, hxv_files)
scrollbars_add(f1, lb)
pack(f1,  expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Button(f, "Ok")
pack(b)

bind(b, "command") do path
    
    file_choice = get_value(lb);
    
    # Select a HXV file
    infil = hxv_directory * "\\" * file_choice[1]
    println("Selected ",infil)

    df = DataFrame(CSV.File(infil,header=0, delim=",", types=String));

    # extract the datetime from the file name
    date_str = split(infil,".")[1]
    ll = length(date_str)
    start_date = DateTime.(date_str[ll-16:ll-1], "yyyy-mm-ddTHHhMMZ")

    # Create df of dates and NaN's
    global wse_df = DataFrame(
        Date = unix2datetime.(datetime2unix.(start_date) .+ (0:sample_rate:sample_length - sample_rate)), 
        Heave = fill(NaN, rec_len),
        North = fill(NaN, rec_len),
        West = fill(NaN, rec_len),  
        GPS_flag = fill(0, rec_len)  
    )
    
    # read HXV file to df
    df = DataFrame(CSV.File(infil, header=0, delim=",", types=String))
    
    # remove df rows where string length != 4
    filter!(row -> all(length(row[i]) == 4 for i in 1:ncol(df)), df)
    println(nrow(df)," rows available for processing")
    
    # determine if buoy is DWR-G
    is_gps = false
    
    sync_word_location = findall(==("7FFF"), df.Column2)
##    sync_word_location = findall(<(2304),findall(==("7FFF"), df.Column2))
    
    if !isempty(sync_word_location)  # Proceed only if we found any "7FFF"
##        next_row_data = df.Column2[sync_word_location .+ 1]
        next_row_data = df.Column2[sync_word_location[findall(<(rec_len-1),sync_word_location)] .+ 1]
        global word_numbers = parse.(Int, SubString.(next_row_data, 1, 1), base=16)
        global words = parse.(Int, SubString.(next_row_data, 2, 4), base=16)

        if any((word_numbers .== 7) .& (words .== 0))
            is_gps = true
        end
        
    end
    
    is_gps ? println("GPS buoy") : println("MkIII buoy")
        
    hex_arr = SubString.(df.Column1, 3, 4)
    arr = parse.(Int, hex_arr, base=16)
    diffs = diff(arr)
    diffs[diffs .< 0] .+= 256
    cumulative_values = cumsum([1; diffs])
    valid_indices = findall(<=(rec_len), cumulative_values)
    valid_rows = cumulative_values[valid_indices]
    
    # truncate number of rows if > rec_len
    df = df[1:min(nrow(df), rec_len), :]
    
    # Calculate heave, north, and west WSEs
    wse_df[valid_rows, 2] .= get_displacements(SubString.(df.Column3, 1, 3))
    north_hex = SubString.(df.Column3, 4, ) .* SubString.(df.Column4, 1, 2)
    wse_df[valid_rows, 3] .= get_displacements(north_hex)
    wse_df[valid_rows, 4] .= get_displacements(SubString.(df.Column4, 3, 4) .* SubString.(df.Column5, 1, 1))
    
    
    # Function to check the LSB of a hexadecimal value
    check_lsb(hex_str) = parse(Int, hex_str, base=16) & 1 == 1 ? 1 : 0
    
    # Apply the function to each element in the array using map
    wse_df[valid_rows, 5] = Int16.(map(check_lsb, north_hex))

    # need to replace any NaN's with 0's in order to calculate spectra
    replace_nan(v) = map(x -> isnan(x) ? zero(x) : x, v)
    global heave = map(replace_nan, wse_df.Heave)
    
    ps_w = welch_pgram(heave, 256, 128; onesided=true, nfft=256, fs=sample_frequency, window=hanning)
    global f2 = freq(ps_w)
    global Pden2 = power(ps_w)
    
    p1 = plot()
    
    global tm_tick = range(first(wse_df.Date), last(wse_df.Date), step=Minute(5))
    global ticks = Dates.format.(tm_tick,"MM:SS")
    
    # Find indices of all values equal to 1 (represents Datawell GPS flag)
    gps_flag = findall(==(1), wse_df.GPS_flag)
    gps_errors_count = length(gps_flag)
    
    gps_errors_count > 0 ? error_string = string(length(gps_flag)," GPS errors flagged") : error_string = "No GPS errors flagged"

    fixed_heave = fix_gps_errors(heave, wse_df.Date[1],  wse_df.GPS_flag)
    ps_w1 = welch_pgram(fixed_heave, 256, 128; onesided=true, nfft=256, fs=sample_frequency, window=hanning)
    f2_fixed = freq(ps_w1)
    Pden2_fixed = power(ps_w1)

    p1 = plot()
 
    # show GPS errors
    for jj in gps_flag
        p1 = vline!([wse_df.Date[jj]], lw=1, c=:red, label="")
    end
    
    global periods_sec = convert_frequency_to_period(f2)

    p1 = plot!(wse_df.Date, heave, lc=:yellow, lw=:0.5, alpha=:0.75, label="", ylabel="WSE (m)", xlims=(wse_df.Date[1],wse_df.Date[end]), 
        ylims=(minimum(heave), maximum(heave)), xticks=(tm_tick,ticks))  
    p1 = plot!(wse_df.Date, fixed_heave, lc=:blue, lw=:1, alpha=:0.75, label="")
    p1 = annotate!(wse_df.Date[50], maximum(heave)*0.9, text(error_string, :left, 12))
       
    p2 = plot(f2, Pden2, lc=:yellow, lw=:2, alpha=:1, xlim=(0,0.64), ylim=(0,Inf), label="", xlabel="Frequency (Hz)", 
        ylabel="S(f) (m²/Hz)", fg_legend=:transparent, bg_legend=:transparent)
    p2 = plot!(f2_fixed, Pden2_fixed, lc=:blue, lw=:0.75, alpha=0.75, fillrange=0, fillcolor=:blue, fillalpha=:0.1, label="")
#    p2 = plot!(f2, median_spectra_vector, lw=:2, lc=:red, fillrange = 0, fillalpha = 0.075, fillcolor = :red, label="Median Noise Floor")

    p3 = plot(periods_sec, Pden2, lc=:yellow, lw=:2, label="", yaxis=:log, yminorticks=10, minorgrid=:true, xlabel= "Wave Period (s)", 
        ylabel="S(f) (m²/Hz)", xlims=(0,200), legend=:bottomright, fg_legend=:transparent, bg_legend=:transparent) # yticks=[50, 100, 150, 200])       
    p3 = plot!(periods_sec, Pden2_fixed, lc=:blue, lw=:0.75, label="")
#    p3 = plot!(periods_sec, median_spectra_vector, lw=:2, lc=:red, fillrange = 0.00001, fillalpha = 0.075, fillcolor = :red, label="Median Noise Floor")
        

    # Define the layout with varying sizes
    l = @layout [a{0.5h}; b{0.5w} c{0.5w} [ Plots.grid(1,1) ] ]
    
    title = Dates.format(first(wse_df.Date), "dd/mm/yyyy HH:MM")
    
    # Combine the three plots
    p1_p2_p3_plot = plot(p1, p2, p3, framestyle = :box, leftmargin = 10Plots.mm, layout=l, suptitle=title, size=(1200, 800))
    
    display(p1_p2_p3_plot)

end