## Select a MSQ Tide file

In [None]:
using CSV
using Dates, DataFrames, DSP
using LaTeXStrings
using NativeFileDialog
using Plots, Printf
using Statistics
using Tk

################################################
################################################
################################################
##           START OF MAIN PROGRAM
################################################
################################################
################################################

# Widen screen for better viewing
display("text/html", "<style>.container { width:100% !important; }</style>")

# Pick directory containing bef .csv files
bef_directory = pick_folder()

# build list of all bef files in selected directory
bef_files = filter(x->occursin(".BEF",x), readdir(bef_directory));
bef_files = bef_files[findall(x->endswith(uppercase(x), ".BEF"), bef_files)]

# Check whether any bef files exist in selected directory. If not, EXIT!
if length(bef_files) == 0
    println("No bef files found in "*bef_directory)
    exit;
else
    println(string(length(bef_files)) * " bef tide files found")
end

w = Toplevel("Select Date", 235, 600)
tcl("pack", "propagate", w, false)
f = Frame(w)
pack(f, expand=true, fill="both")

f1 = Frame(f)
lb = Treeview(f1, bef_files)
scrollbars_add(f1, lb)
pack(f1,  expand=true, fill="both")

tcl("ttk::style", "configure", "TButton", foreground="blue", font="arial 16 bold")
b = Button(f, "Ok")
pack(b)

bind(b, "command") do path
    
    global file_choice = get_value(lb);
    
    # Select a bef file
    global infil = bef_directory * "\\" * file_choice[1]
    
    global title = file_choice[1]
    
    println("Selected ",infil)
    flush(stdout)
    
    # Read data from tide file to df - Note, this produces a single string column that needs stripping and splitting
    global bef_df = DataFrame(CSV.File(infil; header=false, skipto=2, delim='"', ignorerepeated=false));

    # Insert a space before -9999 values, and strip extra whate space at start of row
    bef_df = replace.(bef_df, "-"=>" -")
    bef_df = replace.(bef_df, "     "=>' '; count=1);

    # Split string column into respective columns
    select!(bef_df, :Column1 =>
       ByRow(x -> get.( Ref(split(x, ' ')), 1:15, missing)) =>
       [:MO, :Date, :Time, :T1, :T2, :T3, :T4, :T5, :T6, :T7, :T8, :T9, :T10, :T11, :T12]);
    
    # Convert Date and Time strings to DateTime value
    bef_df.Date = Date.(bef_df.Date, "yyyymmdd") + Time.(bef_df.Time, "HHMM")
    select!(bef_df, Not(:Time));

    # Convert all string WL values for Float metres and decimals
    [bef_df[!,i] .= parse.(Float64,bef_df[!,i])./1000 for i in 3:ncol(bef_df)]
    
    # Produce a df of date and WL for each value at 10-min spacing
    global tide_df = DataFrame([[],[]], ["Date", "WL"])

    for j in 1:nrow(bef_df)
        for i in 3:ncol(bef_df)
    #        println(bef_df[j,2] + Dates.Minute((i-3)*10),' ',bef_df[j,i])
            push!(tide_df, [bef_df[j,2] + Dates.Minute((i-3)*10), bef_df[j,i]])
        end
    end
    
####################################################################
    # convert df from 10-minute data to hourly values
    tide_df = filter(row -> minute(row.Date) == 0, tide_df);
####################################################################    
    # Convert -9999 values to Nans for plotting
    tide_df.WL[findall(abs.(tide_df.WL).>9)] .= NaN
    
    title = split(last(rsplit(infil, "\\")),".")[1]
    
    tm_tick = range(first(tide_df).Date,last(tide_df).Date,step=Year(1))
    ticks = Dates.format.(tm_tick,"mm-YYYY")
    
    # plot the tides for selected file    
    tides = plot(tide_df.Date,tide_df.WL, size = (1400, 600),
    xlim=(first(tide_df).Date,last(tide_df).Date), xtick=(tm_tick,ticks), 
    label="", xlabel="Date", ylabel="WL (m)", title=title, titlefontsize=10,
    framestyle = :box, fg_legend=:transparent, bg_legend=:transparent, legend=:topright,
    margin = 15Plots.mm, grid=true, gridlinewidth=0.5, gridstyle=:dot, gridalpha=1, show=true)

    display(tides)
    
    println("Calculating monthly means - this takes time!")
    flush(stdout)
    
    first_date = first(tide_df.Date)
    last_date = last(tide_df.Date)
    
    year_diff = (Year(last_date) |> Dates.value) - (Year(first_date) |> Dates.value)
    
    global start_date = first_date
    start_of_year = start_date
    end_of_year = start_date + Dates.Year(1)
    
    # determine total number of 10-minute records possible for the year
    total_year_possible = trunc(Int,(end_of_year - start_of_year) / Millisecond(1) * (1 / 600000))
    
    println(title,"\n")
    println("Mth Year  Gaps     Good     Minimum     Maximum     Mean      Range    St Devn")

    global total_gaps = 0
    global total_good = 0
    global gaps = 0
    global total_actual = 0
    
    total_year_gaps = 0
    total_year_good = 0

    yearly_mean = 0
    no_of_months = 0
    
    global yearly_averages_df = DataFrame([[],[]], ["Year", "Mean_WL"])
    global monthly_averages_df = DataFrame(Any[Int[], Int[], Int[], Int[], Float64[], Float64[], Float64[], Float64[], Float64[]], ["Month", "Year", "Gaps", "Good", "Minimum", "Maximum", "Mean", "Range", "St Devn"])
    
    while start_date <= last_date
        
        # select a month of data and store month of data in temporary df
        end_date = start_date + Dates.Month(1)
        
        try
            global month_df = tide_df[findall(start_date .<= tide_df.Date .< end_date),:]
            month_df = filter(row -> all(x -> !(x isa Number && isnan(x)), row), month_df)
        catch
            println("Error reading data in ",Dates.monthname(start_date))
        end

        if isempty(month_df)
            push!(month_df,[start_date,0])
        end
            
        # determine total number of 10-minute records possible for the month
##        global total_possible = trunc(Int,(end_date - start_date) / Millisecond(1) * (1 / 600000))
        global total_possible = trunc(Int,(end_date - start_date) / Millisecond(1) * (1 / 3.6e+6))
        
        try
            total_actual = nrow(month_df)
            # determine the number of gaps
            gaps = total_possible - total_actual
        catch
            println("No data for current month")
            total_actual = 0
            gaps = total_possible
        end
        
        # get totals of gaps and good data
        total_gaps = total_gaps + gaps
        total_good = total_good + total_actual
        total_year_gaps = total_year_gaps + gaps
        total_year_good = total_year_good + total_actual

        # get monthly values
        month = Month(start_date) |> Dates.value
        year = Year(start_date) |> Dates.value
        monthly_min = minimum(month_df.WL)
        monthly_max = maximum(month_df.WL)
        monthly_mean = mean(month_df.WL)
        monthly_std = std(month_df.WL)
        if isnan(monthly_std)
            montly_std = 0.0
        end
        monthly_range = monthly_max - monthly_min

        @printf("%3i %4i %5i %8i %10.3f %11.3f %10.3f %9.3f %9.3f\n", month, year, gaps, total_actual, monthly_min, monthly_max, monthly_mean, monthly_range, monthly_std)
        push!(monthly_averages_df,[month, year, gaps, total_actual, monthly_min, monthly_max, monthly_mean, monthly_range, monthly_std])
        
        # add monthly mean to yearly total, and increment number of months used
        yearly_mean = yearly_mean + monthly_mean
        no_of_months = no_of_months + 1

        # print annual averages for current year of data
        if (Year(end_date) |> Dates.value) != (Year(start_date) |> Dates.value)
            
            try
                global year_df = tide_df[findall(Year.(tide_df.Date) .|> Dates.value .== year),:].WL
                year_df = filter(row -> all(x -> !(x isa Number && isnan(x)), row), year_df)
            catch
                println("Error year")
            end

            println("=============================================================================")
            @printf("%s %4i %s %5.3f\n","                         Yearly average for",(Year(start_date) |> Dates.value),"= ", mean(year_df))
            @printf("%s%5.3f%s\n","                           From Monthly Averages = (",yearly_mean/no_of_months,")")
            println("=============================================================================\n")
            flush(stdout)
            # reset counters
            yearly_mean = 0
            no_of_months = 0
            
            # add yearly average to df
            push!(yearly_averages_df,[Dates.year(start_date),mean(year_df)])
        end

        # move to start of next month
        start_date = end_date

    end

    println("Total number of Gaps = ",total_gaps,"; Total number of Good values = ", total_good,"\n")
    println(title)
    println("    Year       Mean")
    for i in 1:nrow(yearly_averages_df)
        @printf("%8i %10.3f\n",yearly_averages_df.Year[i],yearly_averages_df.Mean_WL[i])
    end
    flush(stdout)
    
    CSV.write(".\\" * splitext(last(splitdir(infil)))[1] * "_monthly_averages.CSV",monthly_averages_df)
end

In [None]:
hourly_df = filter(row -> minute(row.Date) == 0, tide_df);

In [None]:
hourly_df

In [None]:
end_of_year