In [1]:
###Rudimentary test suite to ensure that updates do not break the code 
using Ronin
using Missings 
using HDF5 
using NCDatasets
using BenchmarkTools 
using StatsBase
using Scratch

using JLD2 
include("../src/DecisionTree/DecisionTree.jl")
global scratchspace = @get_scratch!("ronin_testing")


###Will undergo a basic training/QC pipeline. Model is not meant to output 
###Correct results, but rather simply show that it can produce data, train a model, 
###and correctly apply the model to a scan. 

###The below will be testing for a single-pass model

TRAINING_PATH = "../BENCHMARKING/benchmark_cfrads/"
config_file_path = "../BENCHMARKING/benchmark_setup/config.txt"
sample_model = "../BENCHMARKING/benchmark_setup/benchmark_model.joblib"

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling Ronin [905eca60-9fa9-4fb3-8835-f5cd63a3719c] 
ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.
[36m[1m┌ [22m[39m[36m[1mInfo: [22m[39mSkipping precompilation due to precompilable error. Importing Ronin [905eca60-9fa9-4fb3-8835-f5cd63a3719c].
[36m[1m└ [22m[39m  exception = Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.


"../BENCHMARKING/benchmark_setup/benchmark_model.joblib"

In [2]:
###NEED TO ALLOW THIS TO IGNORE COMMENTS 
tasks = Ronin.get_task_params(config_file_path)

placeholder_matrix = allowmissing(ones(3,3))
center_weight::Float64 = 0

###Weight matrixes for calculating spatial parameters 
iso_weights::Matrix{Union{Missing, Float64}} = allowmissing(ones(7,7))
iso_weights[4,4] = center_weight 
iso_window::Tuple{Int64, Int64} = (7,7)

avg_weights::Matrix{Union{Missing, Float64}} = allowmissing(ones(5,5))
avg_weights[3,3] = center_weight 
avg_window::Tuple{Int64, Int64} = (5,5)

std_weights::Matrix{Union{Missing, Float64}} = allowmissing(ones(5,5))
std_weights[3,3] = center_weight 
std_window::Tuple{Int64, Int64} = (5,5)


weight_matrixes = [placeholder_matrix, placeholder_matrix, std_weights, placeholder_matrix, placeholder_matrix, iso_weights]

path1 = joinpath(scratchspace, "_1.h5")
path2 = joinpath(scratchspace, "_2.h5")

        


"/Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/_2.h5"

In [3]:
##Create a toy sized cfradial file that we know the exact values of calculations for 
ds_path = joinpath(scratchspace, "toy_set.nc")
isfile(ds_path) && rm(ds_path)
ds = NCDataset(ds_path, "c")
# import Base.size
# import Base.length 
# function size(v::Matrix{Union{Missing, Float32}}, dim::Int64)
    
#     return 5
# end 

# function length(v::Nothing)
#     return 5
# end 
####start with 5x5 
range_dim = 5
time_dim  = 5

times = collect(1:1:5)
ranges = collect(1:1:5)

sample_DBZ = Matrix{Union{Missing, Float32}}(reshape(sample(1:65, range_dim*time_dim),(range_dim, time_dim)))
sample_VEL = Matrix{Float32}(reshape(sample(-20:20, range_dim*time_dim), (range_dim, time_dim)))
sample_NCP = fill(1., (range_dim, time_dim))
sample_NCP[:,1] .= .1

sample_PGG = fill(.1, (range_dim, time_dim))
sample_PGG[1,:] .= 1

sample_VG = Matrix{Union{Missing, Float32}}(sample_VEL)
###Collocate with the low values of NCP 
sample_VG[:,1] .= missing
###Add center ro 
sample_VG[3,:] .= missing 

defDim(ds, "range", range_dim)
defDim(ds, "time", time_dim)

tv = defVar(ds, "time", Float32, ("time",), attrib=Dict("units" => "s"))
tv[:] = times

rv = defVar(ds, "range", Float32, ("range",), attrib=Dict("units" => "m"))
rv[:] = ranges

NCP = defVar(ds, "NCP", Float32, ("range", "time"), attrib=Dict("units" => "NCP units"))
NCP[:,:] = sample_NCP 



VEL = defVar(ds, "VEL", Float32, ("range", "time"), attrib=Dict("units" => "m/s"))
VEL[:,:] = sample_VEL

defVar(ds, "DBZ", sample_DBZ, ("range", "time"), attrib=Dict("units" => "log"))
defVar(ds, "PGG", sample_PGG, ("range", "time"), attrib=Dict("units" => "percent"))
defVar(ds, "VG", sample_VG, ("range", "time"), attrib=Dict("units" => "m/s"))

close(ds)

closed Dataset

In [4]:
task_path = "./tasks.txt"


task_paths = [task_path, task_path] 
input_path = ds_path
num_models = 2
initial_met_prob = (.1, .9) 
final_met_prob = (.1,.9)

###Combine into vector for model configuration object 
###It's important to note that len(met_probs) is enforced to be equal to num_models 
met_probs = [initial_met_prob, final_met_prob]

###The following are default windows specified in RoninConstants.jl 
###Standard 7x7 window 
sw = Ronin.standard_window 
###7x7 window with only nonzero weights in azimuth dimension 
aw = Ronin.azi_window
###7x7 window with only nonzero weights in range dimension 
rw = Ronin.range_window 
###Placeholder window for tasks that do not require spatial context 
pw = Ronin.placeholder_window 

###Specify a weight matrix for each individual task in the configuration file 
weight_vec = [pw, rw]
###Specify a weight vector for each model pass 
###len(weight_vector) is enforced to be equal to num_models (should have a set of weights for each pass) 
task_weights = [weight_vec, weight_vec] 

base_name = "raw_model"
base_name_features = "output_features" 
###List of paths to output trained models to. Enforced to be same size as num_models 
model_output_paths = [base_name * "_$(i-1).jld2" for i in 1:num_models ]
###List of paths to output calculated features to. Enforced to be same size as num_models 
feature_output_paths = [base_name_features * "_$(i-1).h5" for i in 1:num_models]


###Options are "balanced" or "". If "balanced", the decision trees will be trained 
###on a weighted version of the existing classes in order to combat class imbalance 
class_weights = "balanced"

###Name of variable in cfradials that has already had interactive QC applied 
QC_var = "VG"

###Name of a variable in cfradials that will be used to mask what gates are predicted upon.
###Missing values in this variable mean that gates will be removed
remove_var = "VV"
###Name of a variable in input cfradials that has not had postprocessing applied. 
###This variable is used to determine where MISSING gates exist in the scan 
remove_var = "VEL"

###Whether or not the input features for the model have already been calculated 
file_preprocessed = [false, false]

###Where to write out the masks to in cfradial file. 
mask_names = ["PASS_1_MASK", "PASS_2_MASK"]




2-element Vector{String}:
 "PASS_1_MASK"
 "PASS_2_MASK"

In [5]:
###Create model config object
config = ModelConfig(num_models = num_models,model_output_paths =  model_output_paths,met_probs =  met_probs, 
                    feature_output_paths = feature_output_paths, input_path = input_path,task_mode="nan",file_preprocessed = [false, false],
                     task_paths = task_paths, QC_var = QC_var, remove_var = remove_var, QC_mask = false, mask_names = mask_names,
                     VARS_TO_QC = ["VEL"], class_weights = class_weights, HAS_INTERACTIVE_QC=true, task_weights = task_weights,
                     REMOVE_HIGH_PGG=false, REMOVE_LOW_NCP=false)

ModelConfig(2, ["raw_model_0.jld2", "raw_model_1.jld2"], Tuple{Float32, Float32}[(0.1, 0.9), (0.1, 0.9)], ["output_features_0.h5", "output_features_1.h5"], "/Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc", "nan", Bool[0, 0], ["./tasks.txt", "./tasks.txt"], [""], Vector[Matrix{Union{Missing, Float32}}[[1.0 1.0 1.0; 1.0 1.0 1.0; 1.0 1.0 1.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]], Matrix{Union{Missing, Float32}}[[1.0 1.0 1.0; 1.0 1.0 1.0; 1.0 1.0 1.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0]]], true, false, false, true, "VG", "VEL", -32000.0f0, false, true, false, ["PASS_1_MASK", "PASS_2_MASK"], ["VEL"], "_QC", "balanced", 21, 14, false, 0.2f0, 1.0f0)

In [6]:
config.REMOVE_HIGH_PGG = false  
config.REMOVE_LOW_NCP = false  

valid_NCP_gates = sum(sample_NCP .> config.NCP_THRESHOLD)
total_gates = length(sample_DBZ)

config.REMOVE_LOW_NCP = true 

try 
    train_multi_model(config)
catch 

end 

NCDataset(config.feature_output_paths[1]) do f
    @assert size(f["X"][:,:])[1] == valid_NCP_gates
end 

config.REMOVE_LOW_NCP = false 
try 
    train_multi_model(config)
catch 

end 

NCDataset(config.feature_output_paths[1]) do f
    @assert size(f["X"][:,:])[1] == total_gates
end 

# valid_PGG_gates = sum(sample_PGG .< config.PGG_THRESHOLD)

# config.REMOVE_HIGH_PGG = true 

# try 
#     train_multi_model(config)
# catch 

# end 

# NCDataset(config.feature_output_paths[1]) do f
#     print(size(f["X"][:,:]))
#     @assert size(f["X"][:,:])[1] == valid_PGG_gates
# end 






[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 1.2414519786834717 seconds
COMPLETED PROCESSING 1 FILES IN 1.37 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (20, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 2.41 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 20 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.7498788833618164 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 95.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0012240409851074219 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 16[31mDataset: /Users/ischluesche/.julia/scratc

In [7]:
# QC_scan(config)

# currs = NCDataset(ds_path)
# #currs["VEL_QC"].attrib["_FillValue"] == config.FILL_VAL
# #currs["PASS_2_MASK"].attrib["_FillValue"] == config.FILL_VAL 
# close(currs)

# config.FILL_VAL = 123.456 
# QC_scan(config)

# currs = NCDataset(ds_path)
# @assert currs["VEL_QC"].attrib["_FillValue"] == config.FILL_VAL
# #@assert currs["PASS_2_MASK"].attrib["_FillValue"] == config.FILL_VAL 
# close(currs)

In [8]:
###Test to ensure that QC_var is properly passed to the functions 

VG_map = map(! ismissing, sample_VG)
DBZ_map = map( ! ismissing, sample_DBZ)
@assert DBZ_map != VG_map 
config.QC_var = "VG"
X,Y = calculate_features(config.input_path, config.task_paths[1], config.feature_output_paths[1], config.HAS_INTERACTIVE_QC; 
                                    verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP,NCP_THRESHOLD=config.NCP_THRESHOLD, 
                                    REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG, PGG_THRESHOLD = config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                    remove_variable = config.remove_var, replace_missing = config.replace_missing,
                                    write_out = config.write_out)

@assert reshape(Y, (range_dim, time_dim)) == VG_map     

config.QC_var = "DBZ"
X,Y = calculate_features(config.input_path, config.task_paths[1], config.feature_output_paths[1], config.HAS_INTERACTIVE_QC; 
                                    verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP,NCP_THRESHOLD=config.NCP_THRESHOLD, 
                                    REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG, PGG_THRESHOLD = config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                    remove_variable = config.remove_var, replace_missing = config.replace_missing,
                                    write_out = config.write_out)

@assert reshape(Y, (range_dim, time_dim)) == DBZ_map

Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.005953073501586914 seconds
COMPLETED PROCESSING 1 FILES IN 0.01 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 2)
X TYPE: Matrix{Float32}
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.016927003860473633 seconds
COMPLETED PROCESSING 1 FILES IN 0.02 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 2)
X TYPE: Matrix{Float32}


In [9]:
### Test to ensure remove_var is proper 
config.remove_var = "VG"
X,Y = calculate_features(config.input_path, config.task_paths[1], config.feature_output_paths[1], config.HAS_INTERACTIVE_QC; 
                                    verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP,NCP_THRESHOLD=config.NCP_THRESHOLD, 
                                    REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG, PGG_THRESHOLD = config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                    remove_variable = config.remove_var, replace_missing = config.replace_missing,
                                    write_out = config.write_out)
@assert size(X)[1] == sum(VG_map)

config.remove_var = "DBZ"
X,Y = calculate_features(config.input_path, config.task_paths[1], config.feature_output_paths[1], config.HAS_INTERACTIVE_QC; 
                                    verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP,NCP_THRESHOLD=config.NCP_THRESHOLD, 
                                    REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG, PGG_THRESHOLD = config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                    remove_variable = config.remove_var, replace_missing = config.replace_missing,
                                    write_out = config.write_out)

@assert size(X)[1] == sum(DBZ_map)

Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.1264960765838623 seconds
COMPLETED PROCESSING 1 FILES IN 0.13 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (16, 2)
X TYPE: Matrix{Float32}
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.001065969467163086 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 2)
X TYPE: Matrix{Float32}


In [10]:
function train_multi_model_bench(config::ModelConfig)
    ##Quick input sanitation check 
    @assert (length(config.model_output_paths) == length(config.feature_output_paths)
             == length(config.met_probs) == length(config.task_paths) == length(config.task_weights))

    full_start_time = time() 
    ###Iteratively train models and apply QC_scan with the specified probabilites to train a multi-pass model 
    ###pipeline 
    for (i, model_path) in enumerate(config.model_output_paths)
        
        out = config.feature_output_paths[i] 
        currt = config.task_paths[i]
        cw = config.task_weights[i]

        ##If execution proceeds past the first iteration, a composite model is being created, and 
        ##so a further mask will be applied to the features 
        if i > 1
            QC_mask = true 
        else 
            QC_mask = config.QC_mask 
        end 

        QC_mask ? mask_name = config.mask_names[i] : mask_name = ""
        println("MASK NAME: $(mask_name)")
        starttime = time() 
        
        if config.file_preprocessed[i]

            print("Reading input features from file $(out)...\n")
            h5open(out) do f
                X = f["X"][:,:]
                Y = f["Y"][:,:]
            end 

        else
            printstyled("\nCALCULATING FEATURES FOR PASS: $(i)\n", color=:green)

            ###Check to see if the features file already exists, if so, delete it so 
            ###that it may be overwritten 
            if config.write_out & config.overwrite_output
                isfile(out) ? rm(out) : ""
            end 

            X,Y = calculate_features(config.input_path, currt, out, config.HAS_INTERACTIVE_QC; 
                                verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP,NCP_THRESHOLD=config.NCP_THRESHOLD, 
                                REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG, PGG_THRESHOLD = config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                remove_variable = config.remove_var, replace_missing = config.replace_missing,
                                write_out = config.write_out, QC_mask = QC_mask, mask_name = mask_name, weight_matrixes=cw)
            printstyled("FINISHED CALCULATING FEATURES FOR PASS $(i) in $(round(time() - starttime, digits = 3)) seconds...\n", color=:green)
        end 

        printstyled("\nTRAINING MODEL FOR PASS: $(i)\n", color=:green)
        starttime = time() 

        class_weights = Vector{Float32}([0.0,1.0])
        ##Train model based on these features 
        if config.class_weights != ""

            if lowercase(config.class_weights) != "balanced"
                printstyled("ERROR: UNKNOWN CLASS WEIGHT $(config.class_weights)... \nContinuing with no weighting\n", color=:yellow)
            else 

                class_weights = Vector{Float32}(fill(0,length(Y[:,:][:])))
                weight_dict = compute_balanced_class_weights(Y[:,:][:])
                for class in keys(weight_dict)
                    class_weights[Y[:,:][:] .== class] .= weight_dict[class]
                end 

            end 
        end 
        
        printstyled("\n...TRAINING FOR PASS: $(i) ON $(size(X)[1]) GATES...\n", color=:green)
    
        Ronin.train_model(out, model_path, n_trees = config.n_trees, max_depth = config.max_depth, class_weights = class_weights)

        
        ###If this was the last pass, we don't need to write out a mask, and we're done!
        ###Otherwise, we need to mask out the features we want to apply the model to on the next pass 
        if i < config.num_models

            curr_model = load_object(model_path) 
            curr_metprobs = config.met_probs[i]

            paths = Vector{String}() 
            file_path = config.input_path

            if isdir(file_path) 
                paths = parse_directory(file_path)
            else 
                paths = [file_path]
            end 
                
            for path in paths

                dims = Dataset(path) do f
                    (f.dim["range"], f.dim["time"])
                end 
                
                ###NEED to update this if it's beyond two pass so we can pass it the correct mask
                X, Y, idxer = calculate_features(path, currt, out, true; 
                                    verbose = config.verbose, REMOVE_LOW_NCP = config.REMOVE_LOW_NCP, NCP_THRESHOLD=config.NCP_THRESHOLD,
                                    REMOVE_HIGH_PGG=config.REMOVE_HIGH_PGG,PGG_THRESHOLD=config.PGG_THRESHOLD, QC_variable = config.QC_var, 
                                    remove_variable = config.remove_var, replace_missing = config.replace_missing, return_idxer=true,
                                    write_out = false, QC_mask = QC_mask, mask_name = mask_name, weight_matrixes=cw)
                
                met_probs = DecisionTree.predict_proba(curr_model, X)
                if size(met_probs)[2] < 2
                    throw(DomainError(1, "ERROR: ONLY ONE CLASS IN INPUT DATASET")) 
                end 
                met_probs = met_probs[:, 2]
                valid_idxs = (met_probs .> minimum(curr_metprobs)) .& (met_probs .<= maximum(curr_metprobs))
                print("RESULTANT GATES: $(sum(valid_idxs))")
                ##Create mask field, fill it, and then write out
                new_mask = Matrix{Union{Missing, Float32}}(missings(dims))[:]
               
                ##We only care about gates that have met the base QC thresholds, so first index 
                ##by indexer returned from calculate_features, and then set the gates between
                ##the specified probability levels to valid in the mask. The next model pass will 
                ##thus only be calculated upon these features. 
                idxer = idxer[1][:]
                idxer[idxer] .= Vector{Bool}(valid_idxs)
                new_mask[idxer] .= 1.
                new_mask = reshape(new_mask, dims)
    
                write_field(path, config.mask_names[i+1], new_mask, attribs=Dict("Units" => "Bool", "Description" => "Gates between met prob theresholds"))

            end 
        end   
    end 
    printstyled("\n COMPLETED TRAINING MODEL IN $(round(time() - full_start_time, digits = 3)) seconds...\n", color=:green)   
end 


train_multi_model_bench (generic function with 1 method)

In [11]:
config.QC_mask = true

true

In [12]:
###Ensure that we have the full number of gates 
@assert sum(DBZ_map) == (range_dim * time_dim)
###Then try and mask something out... first we need to write it to file though 
###Can just used the QC'ed stuff 

config.QC_mask = true 
config.mask_names = ["VG", "OK"]

try 
    train_multi_model(config)
catch DomainError 
    println("OK")
    NCDataset(config.feature_output_paths[1]) do f1 
        @assert size(f1["X"][:,:])[1] == sum(.! map(ismissing, sample_VG)) 
    end 
    ###We should get a domain variable because we are removing the non-met 
    ###gates in the first pass 
else 
    @assert false
end 

###Now let's try it without the mask 
config.QC_mask = false 

try 
    train_multi_model(config)
catch DomainError 
    ###possible we're just getting 100% accuracy 
    println("DOMAIN ERROR") 
    NCDataset(config.feature_output_paths[1]) do f1 
        println(size(f1["X"][:,:])[1] )
        @assert size(f1["X"][:,:])[1] == length(sample_DBZ)
    end 
else 
    @assert true
end 
###Check to ensure that it's the full size 





[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0019328594207763672 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (16, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 0.005 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 16 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.00040221214294433594 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 100.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0007100105285644531 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OK

[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed 

In [13]:
config.mask_names = ["OK"]
try
    train_multi_model(config)
catch AssertionError

else 
    @assert false 
end 

try 
    composite_prediction(config) 
catch AssertionError 
else 
    @assert false 
end 

config.mask_names = ["OK", "MASK_2"]
config.QC_mask = false 

false

In [8]:
process_single_file(currs, "./tasks.txt"; NCP_THRESHOLD = Float32(.2), PGG_THRESHOLD=Float32(1.))

LoadError: NetCDF error: [31mVariable 'VV' not found in file ./toy_set.nc[39m (NetCDF error code: -49)


[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0018160343170166016 seconds
COMPLETED PROCESSING 1 FILES IN 0.01 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 0.012 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 25 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.0003218650817871094 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 96.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0008401870727539062 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 22[31mDataset: /Users/ischluesche/.julia

RandomForestClassifier
n_trees:             21
n_subfeatures:       -1
partial_sampling:    0.7
max_depth:           14
min_samples_leaf:    1
min_samples_split:   2
min_purity_increase: 0.0
classes:             [0, 1]
ensemble:            Ensemble of Decision Trees
Trees:      21
Avg Leaves: 6.904761904761905
Avg Depth:  4.666666666666667

In [18]:
####Test tree depth, n trees, etc. 
config.HAS_INTERACTIVE_QC = true
config.QC_var = "VG"
config.n_trees = 40 
config.max_depth = 20 
train_multi_model(config)
classifier = load_object(config.model_output_paths[1])
@assert classifier.n_trees == config.n_trees 
@assert classifier.max_depth == config.max_depth 




[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.011059999465942383 seconds
COMPLETED PROCESSING 1 FILES IN 0.02 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (20, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 0.034 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 20 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.0016739368438720703 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 95.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0008320808410644531 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 15[31mDataset: /Users/ischluesche/.julia/

In [15]:
config.REMOVE_HIGH_PGG = false  
config.REMOVE_LOW_NCP = false  

valid_NCP_gates = sum(sample_NCP .> config.NCP_THRESHOLD)
total_gates = length(sample_DBZ)

config.REMOVE_LOW_NCP = true 

train_multi_model(config)


[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0016720294952392578 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (20, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 0.007 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 20 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.00024819374084472656 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 95.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.0009279251098632812 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 16[31mDataset: /Users/ischluesche/.julia

In [106]:
sample_DBZ

5×5 Matrix{Float32}:
 32.0  65.0  54.0  18.0  52.0
 44.0  22.0  12.0  35.0  63.0
 39.0  12.0  62.0   6.0  30.0
 10.0  24.0  55.0  59.0  49.0
 44.0  31.0  23.0  17.0  54.0

In [79]:
###Create model config object
config = ModelConfig(num_models = num_models,model_output_paths =  model_output_paths,met_probs =  met_probs, 
                    feature_output_paths = feature_output_paths, input_path = input_path,task_mode="nan",file_preprocessed = [false, false],
                     task_paths = task_paths, QC_var = QC_var, remove_var = remove_var, QC_mask = false, mask_names = mask_names,
                     VARS_TO_QC = ["VEL"], class_weights = class_weights, HAS_INTERACTIVE_QC=true, task_weights = task_weights,
                     REMOVE_HIGH_PGG=false, REMOVE_LOW_NCP=false)
sleep(1) 

config.file_preprocessed = [false, false] 
train_multi_model(config)
@assert (Base.time() - mtime(config.feature_output_paths[1])) < 1 
sleep(2) 
config.file_preprocessed = [true, true] 
train_multi_model(config) 
@assert (Base.time() - mtime(config.feature_output_paths[1])) > 2



[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed ./toy_set.nc in 0.0027959346771240234 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 2)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 0.007 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 25 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.0004858970642089844 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 100.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed ./toy_set.nc in 0.001422882080078125 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 20[31mDataset: ./toy_set.nc[39m
Group: /

[31mDimensions[39m
   range = 5
   time = 5

[31mVariables[39m
[32m  time[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  time
    Attributes:


In [83]:
@assert false

LoadError: AssertionError: false

In [64]:
QC_mask = Matrix{Union{Missing, Float32}}(fill(1.,(5,5)))
QC_mask[3,:] .= missing

5-element view(::Matrix{Union{Missing, Float32}}, 3, :) with eltype Union{Missing, Float32}:
 missing
 missing
 missing
 missing
 missing

524.8146052360535

LoadError: MethodError: objects of type Vector{Int64} are not callable
Use square brackets [] for indexing an Array.
The object of type `Vector{Int64}` exists, but no method is defined for this combination of argument types when trying to treat it as a callable object.

1.739560627353247e9

In [41]:
length([1 ;2])

2

In [96]:
NCDataset(config.feature_output_paths[1]) do f
    print(size(f["X"][:,:])[1])
end 

25

In [25]:
close(currs)

In [None]:
###test NCP and PGG thresholding 

In [4]:
train_multi_model(config)  


[32mCALCULATING FEATURES FOR PASS: 1[39m
Processed ./toy_set.nc in 0.7293710708618164 seconds
COMPLETED PROCESSING 1 FILES IN 0.84 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: output_features_0.h5

WRITING DATA TO FILE OF SHAPE (25, 1)
X TYPE: Matrix{Float32}
[32mFINISHED CALCULATING FEATURES FOR PASS 1 in 1.591 seconds...[39m

[32mTRAINING MODEL FOR PASS: 1[39m

[32m...TRAINING FOR PASS: 1 ON 25 GATES...[39m

[34mOpening HDF5.File: (read-only) output_features_0.h5...[39m
FITTING MODEL
COMPLETED FITTING MODEL IN 0.45818400382995605 seconds

MODEL VERIFICATION:
ACCURACY ON TRAINING SET: 88.0%

[32mSAVING MODEL TO: raw_model_0.jld2 [39m
Processed ./toy_set.nc in 0.0004439353942871094 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
RESULTANT GATES: 16[31mDataset: ./toy_set.nc[39m
Group: /

[31mDimensions[39m
   range = 5
   time = 5

[31mVariables[39m
[32m  time[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  time
    Attributes:
    

In [9]:
currs=h5open("output_features_0.h5")

🗂️ HDF5.File: (read-only) output_features_0.h5
├─ 🏷️ MISSING_FILL_VALUE
├─ 🏷️ Parameters
├─ 🔢 X
└─ 🔢 Y

In [12]:
currs["X"][:,:]

25×1 Matrix{Float32}:
 44.0
 57.0
  6.0
 60.0
 54.0
 22.0
 40.0
 29.0
 23.0
 63.0
 36.0
  6.0
 21.0
 37.0
  6.0
 38.0
 46.0
  4.0
 62.0
 26.0
 58.0
 12.0
 18.0
 43.0
 22.0

In [5]:
###Set up a sample model configuration object 
rets = QC_scan(config)

[32mLOADING MODELS....[39m
(25, 1)[31mDataset: ./toy_set.nc[39m
Group: /

[31mDimensions[39m
   range = 5
   time = 5

[31mVariables[39m
[32m  time[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  time
    Attributes:
     units                = [36ms[39m

[32m  range[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range
    Attributes:
     units                = [36mm[39m

[32m  NCP[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mNCP units[39m

[32m  DBZ[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mdBz[39m

[32m  VEL[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mm/s[39m

[32m  VG[39m   (5 × 5)
    Datatype:    [0m[1mUnion{Mi

In [21]:
config.task_paths[1]

"./tasks.txt"

In [8]:
close(currs)

closed Dataset

In [25]:
X, Y, indexer = process_single_file(currs, config.task_paths[1], HAS_INTERACTIVE_QC=true, 
        REMOVE_HIGH_PGG=false, REMOVE_LOW_NCP=false, QC_variable="VG", 
        replace_missing=false, remove_variable="VEL")

([25.0; 8.0; … ; 23.0; 7.0;;], [0; 0; … ; 1; 1;;], Bool[1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [26]:
close(currs)

closed Dataset

In [27]:
X

25×1 Matrix{Float64}:
 25.0
  8.0
 46.0
 46.0
 14.0
 38.0
 13.0
 16.0
 61.0
  6.0
  3.0
 61.0
 27.0
 62.0
 31.0
 36.0
 55.0
  8.0
 16.0
 41.0
 23.0
 63.0
 64.0
 23.0
  7.0

In [30]:
indexer

25-element BitVector:
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1

In [58]:
using JLD2
push!(LOAD_PATH, "../src/DecisionTree")
using DecisionTree



5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.47619   0.571429  1.0       1.0       1.0
 0.333333  0.952381  0.714286  0.47619   1.0
 0.333333  1.0       1.0       0.952381  0.809524
 0.333333  0.761905  0.904762  0.666667  0.761905

5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.47619   0.571429  1.0       1.0       1.0
 0.333333  0.952381  0.714286  0.47619   1.0
 0.333333  1.0       1.0       0.952381  0.809524
 0.333333  0.761905  0.904762  0.666667  0.761905

In [43]:
calced_probs[(calced_probs .>= .1) .& (calced_probs .<= .9)]

13-element Vector{Float64}:
 0.3333333333333333
 0.47619047619047616
 0.3333333333333333
 0.3333333333333333
 0.3333333333333333
 0.5714285714285714
 0.7619047619047619
 0.7142857142857143
 0.47619047619047616
 0.6666666666666666
 0.8095238095238095
 0.8095238095238095
 0.7619047619047619

In [44]:
(calced_probs .>= .1) .& (calced_probs .<= .9)

5×5 BitMatrix:
 1  0  0  0  1
 1  1  0  0  0
 1  0  1  1  0
 1  0  0  0  1
 1  1  0  1  1

In [49]:
currs["PASS_2_MASK"][:,:]

5×5 Matrix{Union{Missing, Float64}}:
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing   missing   missing
 1.0   missing  1.0       1.0        missing
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing  1.0       1.0

In [51]:
currs["VEL_QC"][:,:]

5×5 Matrix{Union{Missing, Float32}}:
 missing  -17.0       -11.0         1.0          missing
 missing     missing    7.0       -19.0       -15.0
 missing  -10.0          missing     missing   -6.0
 missing   -9.0       -14.0         6.0          missing
 missing   17.0       -15.0          missing     missing

In [52]:
calced_probs = reshape(mps, (5,5))

5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.47619   0.571429  1.0       1.0       1.0
 0.333333  0.952381  0.714286  0.47619   1.0
 0.333333  1.0       1.0       0.952381  0.809524
 0.333333  0.761905  0.904762  0.666667  0.761905

In [54]:
reshape(probs,(5,5))

5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.619048  0.619048  1.0       1.0       1.0
 0.142857  0.952381  0.714286  0.619048  1.0
 0.142857  1.0       1.0       0.952381  0.809524
 0.238095  0.952381  0.904762  0.47619   0.714286

In [47]:
using NCDatasets
currs = NCDataset("./toy_set.nc")

[31mDataset: ./toy_set.nc[39m
Group: /

[31mDimensions[39m
   range = 5
   time = 5

[31mVariables[39m
[32m  time[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  time
    Attributes:
     units                = [36ms[39m

[32m  range[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range
    Attributes:
     units                = [36mm[39m

[32m  NCP[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mNCP units[39m

[32m  DBZ[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mdBz[39m

[32m  VEL[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mm/s[39m

[32m  VG[39m   (5 × 5)
    Datatype:    [0m[1mUnion{Missing, Float32}[22m (Float32)
    D

In [39]:
config.met_probs

2-element Vector{Tuple{Float64, Float64}}:
 (0.1, 0.9)
 (0.1, 0.9)

In [37]:
reshape(probs, (5,5))

5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.619048  0.619048  1.0       1.0       1.0
 0.142857  0.952381  0.714286  0.619048  1.0
 0.142857  1.0       1.0       0.952381  0.809524
 0.238095  0.952381  0.904762  0.47619   0.714286

In [15]:
rets = composite_prediction(config, return_probs=true)

[32mLOADING MODELS....[39m
(25, 1)[31mDataset: ./toy_set.nc[39m
Group: /

[31mDimensions[39m
   range = 5
   time = 5

[31mVariables[39m
[32m  time[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  time
    Attributes:
     units                = [36ms[39m

[32m  range[39m   (5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range
    Attributes:
     units                = [36mm[39m

[32m  NCP[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mNCP units[39m

[32m  DBZ[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mdBz[39m

[32m  VEL[39m   (5 × 5)
    Datatype:    [0m[1mFloat32[22m (Float32)
    Dimensions:  range × time
    Attributes:
     units                = [36mm/s[39m

[32m  VG[39m   (5 × 5)
    Datatype:    [0m[1mUnion{Mi

(Bool[0, 0, 0, 0, 0, 1, 0, 1, 1, 1  …  1, 1, 0, 1, 0, 0, 1, 1, 0, 0], [0; 0; … ; 1; 1;;], [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]], [0.3333333333333333, 0.6190476190476191, 0.14285714285714285, 0.14285714285714285, 0.23809523809523808, 1.0, 0.6190476190476191, 0.9523809523809523, 1.0, 0.9523809523809523  …  1.0, 1.0, 0.6190476190476191, 0.9523809523809523, 0.47619047619047616, 0.8095238095238095, 1.0, 1.0, 0.8095238095238095, 0.7142857142857143])

In [17]:
X, y, idxers, probs = rets

(Bool[0, 0, 0, 0, 0, 1, 0, 1, 1, 1  …  1, 1, 0, 1, 0, 0, 1, 1, 0, 0], [0; 0; … ; 1; 1;;], [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0  …  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]], [0.3333333333333333, 0.6190476190476191, 0.14285714285714285, 0.14285714285714285, 0.23809523809523808, 1.0, 0.6190476190476191, 0.9523809523809523, 1.0, 0.9523809523809523  …  1.0, 1.0, 0.6190476190476191, 0.9523809523809523, 0.47619047619047616, 0.8095238095238095, 1.0, 1.0, 0.8095238095238095, 0.7142857142857143])

In [19]:
reshape(probs, (5,5))

5×5 Matrix{Float64}:
 0.333333  1.0       0.952381  1.0       0.809524
 0.619048  0.619048  1.0       1.0       1.0
 0.142857  0.952381  0.714286  0.619048  1.0
 0.142857  1.0       1.0       0.952381  0.809524
 0.238095  0.952381  0.904762  0.47619   0.714286

In [118]:
sum(idxers)

25-element Vector{Float64}:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

In [112]:
curr_model = load_object("raw_model_0.jld2")
mps = predict_proba(curr_model, X)[:,2]
calced_probs = reshape(mps, (5,5))


pass_2_valid = (calced_probs .>= config.met_probs[1][1]) .& (calced_probs .<= config.met_probs[1][2])
pass_2_real = currs[config.mask_names[2]][:,:]

5×5 Matrix{Union{Missing, Float64}}:
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing   missing   missing
 1.0   missing  1.0       1.0        missing
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing  1.0       1.0

In [113]:
pass_2_real[BitMatrix(map(ismissing, pass_2_real))] .= 0

12-element view(reshape(::Matrix{Union{Missing, Float64}}, 25), [6, 8, 9, 11, 12, 14, 15, 16, 17, 19, 22, 23]) with eltype Union{Missing, Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [116]:
Matrix{Bool}(pass_2_real) == pass_2_valid

true

In [104]:
pass_2_real

5×5 Matrix{Union{Missing, Float64}}:
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing   missing   missing
 1.0   missing  1.0       1.0        missing
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing  1.0       1.0

In [98]:
pass_2_real

5×5 Matrix{Union{Missing, Float64}}:
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing   missing   missing
 1.0   missing  1.0       1.0        missing
 1.0   missing   missing   missing  1.0
 1.0  1.0        missing  1.0       1.0

In [89]:
idx = pass_2_valid .== 0 
pass_2_valid = Matrix{Union{Bool, Missing}}(pass_2_valid)

5×5 Matrix{Union{Missing, Bool}}:
 1  0  0  0  1
 1  1  0  0  0
 1  0  1  1  0
 1  0  0  0  1
 1  1  0  1  1

In [90]:
pass_2_valid[.! pass_2_valid] .= missing

12-element view(reshape(::Matrix{Union{Missing, Bool}}, 25), [6, 8, 9, 11, 12, 14, 15, 16, 17, 19, 22, 23]) with eltype Union{Missing, Bool}:
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing

In [92]:
pass_2_valid == pass_2_real

missing

In [75]:
idx

5×5 BitMatrix:
 0  1  1  1  0
 0  0  1  1  1
 0  1  0  0  1
 0  1  1  1  0
 0  0  1  0  0

In [74]:
pass_2_valid[idx] .== missing

12-element Vector{Missing}:
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing

In [67]:
pass_2_valid .== pass_2_real

5×5 Matrix{Union{Missing, Bool}}:
 1   missing   missing   missing  1
 1  1          missing   missing   missing
 1   missing  1         1          missing
 1   missing   missing   missing  1
 1  1          missing  1         1

In [61]:
currs["./toy_set.nc"]

LoadError: NetCDF error: [31mVariable './toy_set.nc' not found in file ./toy_set.nc[39m (NetCDF error code: -49)

In [57]:
###let's write a test here. 
currs = NCDataset("./toy_set.nc")
QCed_set = currs["VEL_QC"][:,:]

5×5 Matrix{Union{Missing, Float32}}:
 missing  -17.0       -11.0         1.0          missing
 missing     missing    7.0       -19.0       -15.0
 missing  -10.0          missing     missing   -6.0
 missing   -9.0       -14.0         6.0          missing
 missing   17.0       -15.0          missing     missing

In [143]:
sample_DBZ

5×5 Matrix{Union{Missing, Float32}}:
  8.0  28.0  35.0  57.0  65.0
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [249]:
##Set up a toy example to do the analytical calculations 

function slide_window(var::Matrix{Union{Missing, Float32}}, window::Matrix{Union{Missing,Float32}}, func; replace_missing = false)

    ##First, construct matrix and apply window 
    nrow, ncol = size(var)
    wrow, wcol = size(window) 

    ###int divide to get the windows off center 
    plusx = div(wcol, 2)
    plusy = div(wrow, 2)

    ###Get center coordinates of window 
    wc = (plusx+1, plusy+1)

    res = zeros(size(var))
    curr_view = fill(Missing, (size(window)))
    for i in 1:1:nrow
        for j in 1:1:ncol
            
            ###Calculate maximum and minimum indicies into the variable array 
            maxx = i + plusy > nrow ? nrow : i + plusy 
            maxy = j + plusx > ncol ? ncol : j + plusx
            minx = i - plusy < 1    ? 1    : i - plusy 
            miny = j - plusx < 1    ? 1    : j - plusx 
            

            ###Calculate indicies into the window/weight array 
            ###Difference here is that these will simply be in relation to the center of the window 
            windx = ((wc[1] - (i-minx)), (wc[1] + (maxx-i)))
            windy = ((wc[2] - (j-miny)), (wc[2] + (maxy-j)))

            ##Calculate resultant matrix    
            weighted = var[miny:maxy,minx:maxx] .* window[windy[1]:windy[2], windx[1]:windx[2]]
            #println(weighted)

            # ##i,j is the center index 
            # println([i,j])

            # printstyled("ROW WINDOW: $(minx : maxx) COL WINDOW: $(miny : maxy)\n", color=:green)
            # printstyled("WINDOW INDEX: $(windx) $(windy)\n", color=:blue)
            # if (i,j) == (5,4) 
            #     printstyled("ROW WINDOW: $(minx : maxx) COL WINDOW: $(miny : maxy)\n", color=:green)
            #     printstyled("WINDOW INDEX: $(windx) $(windy)\n", color=:blue)
            # end 

            res[j,i] = func(weighted)
        end 
    end 
    Matrix{Float32}(res)
end 

slide_window (generic function with 3 methods)

In [250]:
sample_DBZ[2:5, 1:5] .* sw[1:4, 1:5]

4×5 Matrix{Float32}:
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [255]:
sw

7×7 Matrix{Union{Missing, Float32}}:
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0

In [268]:
@btime slide_window(sample_DBZ, sw, std)[:]

  6.942 μs (207 allocations: 14.83 KiB)


25-element Vector{Float32}:
 18.169113
 17.82045
 17.82045
 17.82045
 17.86197
 18.602419
 18.757486
 18.757486
 18.757486
 18.191597
 18.602419
 18.757486
 18.757486
 18.757486
 18.191597
 18.602419
 18.757486
 18.757486
 18.757486
 18.191597
 19.333801
 19.68295
 19.68295
 19.68295
 20.07818

In [88]:
open("./ok.txt", "w") do file 
    for task in tasks 
        write(file, " "  * task)
    end 
end 

In [269]:
sample_DBZ

5×5 Matrix{Union{Missing, Float32}}:
  8.0  28.0  35.0  57.0  65.0
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [281]:
####Writing tests for calculate_features 

function test_calculate_features()

    input_loc = ds_path
    tasks=["VEL", "DBZ", "STD(DBZ)", "STD(DBZ)"]
    weight_matrixes = [pw, pw, sw, rw]
    output_loc = joinpath(scratchspace, "trash_output.h5")
    HAS_INTERACTIVE_QC = true

    X1, Y1 = calculate_features(input_loc, tasks, weight_matrixes, output_loc, HAS_INTERACTIVE_QC;
            verbose=true, REMOVE_LOW_NCP=false, NCP_THRESHOLD=Float32(.2), QC_variable ="VG", remove_variable = "VEL" )


    @assert X1[:,2] == sample_DBZ[:]
    @assert X1[:,1] == sample_VEL[:]
    @assert Y1[:]   == .! map(ismissing, sample_VG[:])
    ###Test file-specified arguments version of calculate features 

    input_loc = ds_path 
    print(tasks)
    argfile_path = joinpath(scratchspace, "./sample_tasks.txt")
    open(argfile_path, "w") do file 
        for task in tasks 
            write(file, "," * task)
        end 
    end 

    X2, Y2 = calculate_features(input_loc, argfile_path, output_loc, true, QC_variable="VG", remove_variable="VEL", 
                        weight_matrixes = weight_matrixes)

    @assert X1 == X2
    @assert Y1 == Y2 

    
return X1, X2
    
end 

test_calculate_features (generic function with 1 method)

In [282]:
X1, X2 = test_calculate_features()

Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.002089977264404297 seconds
COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/trash_output.h5

WRITING DATA TO FILE OF SHAPE (25, 4)
X TYPE: Matrix{Float32}
["VEL", "DBZ", "STD(DBZ)", "STD(DBZ)"]COMPLETED PROCESSING 1 FILES IN 0.0 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/trash_output.h5

WRITING DATA TO FILE OF SHAPE (25, 4)
X TYPE: Matrix{Float32}


(Float32[6.0 8.0 18.169113 17.461864; 18.0 22.0 17.82045 17.859392; … ; 2.0 43.0 19.682947 26.294888; 11.0 63.0 20.07818 24.581837], Float32[6.0 8.0 18.169113 17.461864; 18.0 22.0 17.82045 17.859392; … ; 2.0 43.0 19.682947 26.294888; 11.0 63.0 20.07818 24.581837])

In [138]:
X2 

25×4 Matrix{Float32}:
   6.0   8.0  22.4678  17.4619
  18.0  22.0  21.7316  17.8594
  -3.0  33.0  22.391   17.8594
   8.0  33.0  21.4871  17.8594
  18.0  43.0  22.0424  19.0189
  12.0  28.0  23.4338  19.9963
   8.0  57.0  22.2224  19.4189
   1.0  19.0  21.334   19.4189
  15.0   2.0  22.3553  19.4189
  10.0  29.0  23.604   20.4123
  -5.0  35.0  23.4338  23.7675
  19.0  55.0  22.2224  23.2029
  18.0  16.0  21.334   23.2029
   8.0  48.0  22.3553  23.2029
  18.0  10.0  23.604   23.5199
   9.0  57.0  23.4338  24.418
 -18.0  26.0  22.2224  25.0767
   2.0  54.0  21.334   25.0767
   0.0  53.0  22.3553  25.0767
 -20.0  54.0  23.604   24.0947
   3.0  65.0  25.1369  25.0951
  17.0  22.0  23.6516  26.2949
 -19.0  13.0  22.6864  26.2949
   2.0  43.0  23.3206  26.2949
  11.0  63.0  24.6194  24.5818

In [29]:
Ronin._weighted_func

_weighted_func (generic function with 3 methods)

In [31]:
using ImageFiltering

In [80]:
function get_window_matrixes(x) 
    println("OK $(x[4,4])")
    x[4,4]
end 

get_window_matrixes (generic function with 1 method)

In [81]:
mapwindow(x -> get_window_matrixes(x), sample_DBZ, size(aw), border=Fill(missing))

OK 8.0
OK 8.0
OK 22.0
OK 33.0
OK 33.0
OK 43.0
OK 28.0
OK 57.0
OK 19.0
OK 2.0
OK 29.0
OK 35.0
OK 55.0
OK 16.0
OK 48.0
OK 10.0
OK 57.0
OK 26.0
OK 54.0
OK 53.0
OK 54.0
OK 65.0
OK 22.0
OK 13.0
OK 43.0
OK 63.0


5×5 Matrix{Float32}:
  8.0  28.0  35.0  57.0  65.0
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [55]:
sample_DBZ

5×5 Matrix{Union{Missing, Float32}}:
  8.0  28.0  35.0  57.0  65.0
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [60]:
things[12] === things[11]

true

In [41]:
Y

25×1 Matrix{Int64}:
 0
 0
 0
 0
 0
 1
 1
 0
 1
 1
 1
 1
 0
 1
 1
 1
 1
 0
 1
 1
 1
 1
 0
 1
 1

In [44]:
X

25×4 Matrix{Float32}:
   6.0   8.0  22.4678  17.4619
  18.0  22.0  21.7316  17.8594
  -3.0  33.0  22.391   17.8594
   8.0  33.0  21.4871  17.8594
  18.0  43.0  22.0424  19.0189
  12.0  28.0  23.4338  19.9963
   8.0  57.0  22.2224  19.4189
   1.0  19.0  21.334   19.4189
  15.0   2.0  22.3553  19.4189
  10.0  29.0  23.604   20.4123
  -5.0  35.0  23.4338  23.7675
  19.0  55.0  22.2224  23.2029
  18.0  16.0  21.334   23.2029
   8.0  48.0  22.3553  23.2029
  18.0  10.0  23.604   23.5199
   9.0  57.0  23.4338  24.418
 -18.0  26.0  22.2224  25.0767
   2.0  54.0  21.334   25.0767
   0.0  53.0  22.3553  25.0767
 -20.0  54.0  23.604   24.0947
   3.0  65.0  25.1369  25.0951
  17.0  22.0  23.6516  26.2949
 -19.0  13.0  22.6864  26.2949
   2.0  43.0  23.3206  26.2949
  11.0  63.0  24.6194  24.5818

In [43]:
std(skipmissing(things[1] .* aw))

24.619436f0

In [39]:
sample_DBZ

5×5 Matrix{Union{Missing, Float32}}:
  8.0  28.0  35.0  57.0  65.0
 22.0  57.0  55.0  26.0  22.0
 33.0  19.0  16.0  54.0  13.0
 33.0   2.0  48.0  53.0  43.0
 43.0  29.0  10.0  54.0  63.0

In [27]:
aw

7×7 Matrix{Union{Missing, Float32}}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  1.0  1.0  1.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [21]:
std([8., 28., 35., 57., 22., 57.,55.,26., 65., 22.])

19.443650777453175

In [26]:
X

25×4 Matrix{Float32}:
   6.0   8.0  22.4678  17.4619
  18.0  22.0  21.7316  17.8594
  -3.0  33.0  22.391   17.8594
   8.0  33.0  21.4871  17.8594
  18.0  43.0  22.0424  19.0189
  12.0  28.0  23.4338  19.9963
   8.0  57.0  22.2224  19.4189
   1.0  19.0  21.334   19.4189
  15.0   2.0  22.3553  19.4189
  10.0  29.0  23.604   20.4123
  -5.0  35.0  23.4338  23.7675
  19.0  55.0  22.2224  23.2029
  18.0  16.0  21.334   23.2029
   8.0  48.0  22.3553  23.2029
  18.0  10.0  23.604   23.5199
   9.0  57.0  23.4338  24.418
 -18.0  26.0  22.2224  25.0767
   2.0  54.0  21.334   25.0767
   0.0  53.0  22.3553  25.0767
 -20.0  54.0  23.604   24.0947
   3.0  65.0  25.1369  25.0951
  17.0  22.0  23.6516  26.2949
 -19.0  13.0  22.6864  26.2949
   2.0  43.0  23.3206  26.2949
  11.0  63.0  24.6194  24.5818

Processed /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/toy_set.nc in 0.007937192916870117 seconds
COMPLETED PROCESSING 1 FILES IN 0.03 SECONDS
OUTPUTTING DATA IN HDF5 FORMAT TO FILE: /Users/ischluesche/.julia/scratchspaces/905eca60-9fa9-4fb3-8835-f5cd63a3719c/ronin_testing/trash_output.h5

WRITING DATA TO FILE OF SHAPE (25, 4)
X TYPE: Matrix{Float32}


(Float32[6.0 8.0 22.467754 17.461864; 18.0 22.0 21.731615 17.859392; … ; 2.0 43.0 23.320648 26.294888; 11.0 63.0 24.619436 24.581837], [0; 0; … ; 1; 1;;])

In [20]:
X

20×4 Matrix{Float32}:
  12.0  28.0  23.4338  19.9963
   8.0  57.0  22.2224  19.4189
   1.0  19.0  21.334   19.4189
  15.0   2.0  22.3553  19.4189
  10.0  29.0  23.604   20.4123
  -5.0  35.0  23.4338  23.7675
  19.0  55.0  22.2224  23.2029
  18.0  16.0  21.334   23.2029
   8.0  48.0  22.3553  23.2029
  18.0  10.0  23.604   23.5199
   9.0  57.0  23.4338  24.418
 -18.0  26.0  22.2224  25.0767
   2.0  54.0  21.334   25.0767
   0.0  53.0  22.3553  25.0767
 -20.0  54.0  23.604   24.0947
   3.0  65.0  25.1369  25.0951
  17.0  22.0  23.6516  26.2949
 -19.0  13.0  22.6864  26.2949
   2.0  43.0  23.3206  26.2949
  11.0  63.0  24.6194  24.5818

In [14]:
sample_NCP

5×5 Matrix{Float64}:
 0.1  1.0  1.0  1.0  1.0
 0.1  1.0  1.0  1.0  1.0
 0.1  1.0  1.0  1.0  1.0
 0.1  1.0  1.0  1.0  1.0
 0.1  1.0  1.0  1.0  1.0

In [13]:
sample_VG

5×5 Matrix{Union{Missing, Float32}}:
 missing  12.0       -5.0         9.0        3.0
 missing   8.0       19.0       -18.0       17.0
 missing    missing    missing     missing    missing
 missing  15.0        8.0         0.0        2.0
 missing  10.0       18.0       -20.0       11.0