In [203]:
###Script for datamining a given CFRadial file for the necessary predictors to feed to JMLQC model 

###Need to apply controls here to ensure that dimensionality stays the same throughout the processing
###Even if we do end up flattening it to put into h5 


include("./JMLQC_utils.jl")
using .JMLQC_utils
using NCDatasets
using HDF5
using ArgParse 



In [28]:
using BenchmarkTools

In [None]:
###Overall structure 
###Give script desired variables, input file (or list thereof?), and output file 
###script will compute desired variables from the data, and output into and h5 file? 

In [197]:
###Define queues for function queues
AVG_QUEUE = String[]
ISO_QUEUE = String[] 
STD_QUEUE = String[] 

function parse_commandline()
    
    s = ArgParseSettings()

    @add_arg_table s begin
        "--argfile","-f"
            help = ("File containing comma-delimited list of variables you wish the script to calculate and output\n
                    Currently supported funcs include AVG(var_name), STD(var_name), and ISO(var_name)\n
                    Example file content: DBZ, VEL, AVG(DBZ), AVG(VEL), ISO(DBZ), STD(DBZ)\n")
        "--outfile", "-o"
            help = "Location to output mined data to"
            default = "./mined_data.h5"
        "CFRad_path"
            help = "Path to input CFRadial File"
            required = true
    end

    return parse_args(s)
end

function main()
    
    parsed_args = parse_commandline()
    println("Parsed args:")
    for (arg,val) in parsed_args
        println("  $arg  =>  $val")
    end
    print(parsed_args)
    ##Load given netCDF file 
    
    cfrad = NCDataset(parsed_args["CFRad_path"])
    valid_vars = keys(cfrad)
    
    tasks = get_task_params(parsed_args["argfile"], valid_vars)
    
    fid = h5open(parsed_args["outfile"], "w")
    
    create_dataset(fid,"./X")
    
end

main()

Parsed args:
  CFRad_path  =>  /Users/ischluesche/Library/Jupyter/runtime/kernel-25c0bae8-1a5b-43f0-9f68-82adf3446daa.json
  outfile  =>  ./mined_data.h5
  argfile  =>  nothing
Dict{String, Any}("CFRad_path" => "/Users/ischluesche/Library/Jupyter/runtime/kernel-25c0bae8-1a5b-43f0-9f68-82adf3446daa.json", "outfile" => "./mined_data.h5", "argfile" => nothing)

LoadError: NetCDF error: [31mOpening path /Users/ischluesche/Library/Jupyter/runtime/kernel-25c0bae8-1a5b-43f0-9f68-82adf3446daa.json: NetCDF: Unknown file format[39m (NetCDF error code: -51)

In [182]:

func_regex = r"(\w{1,})\((\w{1,})\)"
valid_funcs = ["AVG", "ISO", "STD"]
###Parses given parameter file and ensures that specified variables are found within the 
###passed CFradial file
###Could potentially internally return this as queues for each function 

##TODO case insensitivity 
function get_task_params(params_file, variablelist; delimiter=",")
    
    tasks = readlines(params_file)
    task_param_list = String[]
    
    for line in tasks
        if line[1] == "#"
            continue
        else
            delimited = split(line, delimiter)
            for token in delimited
                expr_ret = match(func_regex,token)
                if (typeof(expr_ret) != Nothing)
                    if (expr_ret[1] ∉ valid_funcs || expr_ret[2] ∉ variablelist)
                        print(("ERROR: CANNOT CALCULATE $(expr_ret[1]) of $(expr_ret[2])\nPotentially invalid function or missing variable"))
                    else
                        print("CACLULATE $(expr_ret[1]) of $(expr_ret[2])\n")
                        push!(task_param_list, token)
                    end 
                else
                    if token in variablelist
                        push!(task_param_list, token)
                    else
                        print("\"$token\" NOT FOUND IN CFRAD FILE.... CONTINUING...\n")
                    end
                end
            end
        end 
    end 
    
    return(task_param_list)
end 

get_task_params (generic function with 1 method)

In [12]:
currset = Dataset("CFRADIAL/cfrad.19950516_221944.169_to_19950516_221946.124_TF-ELDR_AIR.nc")

[31mDataset: CFRADIAL/cfrad.19950516_221944.169_to_19950516_221946.124_TF-ELDR_AIR.nc[39m
Group: /

[31mDimensions[39m
   time = 182
   range = 384
   sweep = 1
   string_length_8 = 8
   string_length_32 = 32
   status_xml_length = 1
   r_calib = 1
   frequency = 4

[31mVariables[39m
[32m  volume_number[39m  
    Attributes:
     long_name            = [36mdata_volume_index_number[39m
     units                = 
     _FillValue           = [36m-9999[39m

[32m  platform_type[39m   (32)
    Datatype:    [0m[1mChar[22m (Char)
    Dimensions:  string_length_32
    Attributes:
     long_name            = [36mplatform_type[39m
     options              = [36mfixed, vehicle, ship, aircraft_fore, aircraft_aft, aircraft_tail, aircraft_belly, aircraft_roof, aircraft_nose, satellite_orbit, satellite_geostat[39m

[32m  primary_axis[39m   (32)
    Datatype:    [0m[1mChar[22m (Char)
    Dimensions:  string_length_32
    Attributes:
     long_name            = [36mprimary_

In [183]:
get_task_params("./test_mining_config.txt", keys(currset))

"VEL" NOT FOUND IN CFRAD FILE.... CONTINUING...
CACLULATE AVG of DBZ
"second line" NOT FOUND IN CFRAD FILE.... CONTINUING...


2-element Vector{String}:
 "DBZ"
 "AVG(DBZ)"

In [124]:
println

println (generic function with 3 methods)

In [49]:
split(result[1],",")

3-element Vector{SubString{String}}:
 "DBZ"
 "VEL"
 "AVG(DBZ)"

In [52]:
func_regex = r"(\w{1,})\((\w{1,})\)"


r"(\w{1,})\((\w{1,})\)"

In [192]:
Symbol("currset")

:currset

In [196]:
eval_var = "DBZ"
@eval $currset[eval_var][:,:]

384×182 Matrix{Union{Missing, Float32}}:
 -34.99      missing  -34.99      …  -34.99      -34.99      -34.99
    missing  missing     missing       0.75        6.25        9.63
    missing  missing     missing      11.13       10.38        9.88
    missing  missing     missing      11.38       13.13        8.63
    missing  missing     missing       9.75        9.5        11.63
    missing  missing     missing  …    6.75        9.25       11.13
    missing  missing     missing       6.88        7.25        7.38
    missing  missing     missing       5.63        8.25        7.5
    missing  missing     missing       7.38        6.25        5.75
    missing  missing     missing      10.63        8.88        9.63
    missing  missing     missing  …   13.25       13.25       14.5
    missing  missing     missing      16.25       14.25       13.63
    missing  missing     missing      21.13       18.38       14.63
   ⋮                              ⋱                ⋮         
    missing  mi

In [190]:
currset["DBZ"][:,:]

384×182 Matrix{Union{Missing, Float32}}:
 -34.99      missing  -34.99      …  -34.99      -34.99      -34.99
    missing  missing     missing       0.75        6.25        9.63
    missing  missing     missing      11.13       10.38        9.88
    missing  missing     missing      11.38       13.13        8.63
    missing  missing     missing       9.75        9.5        11.63
    missing  missing     missing  …    6.75        9.25       11.13
    missing  missing     missing       6.88        7.25        7.38
    missing  missing     missing       5.63        8.25        7.5
    missing  missing     missing       7.38        6.25        5.75
    missing  missing     missing      10.63        8.88        9.63
    missing  missing     missing  …   13.25       13.25       14.5
    missing  missing     missing      16.25       14.25       13.63
    missing  missing     missing      21.13       18.38       14.63
   ⋮                              ⋱                ⋮         
    missing  mi

In [238]:
fid = h5open("test.h5", "w")
create_dataset(fid,"./X", Float64, (100,))
attributes(fid["X"])["Column Names"] = ["AVG(DBZ), DBZ, VV"]


1-element Vector{String}:
 "AVG(DBZ), DBZ, VV"

In [None]:
###Need to apply controls here to ensure that dimensionality stays the same throughout the processing
###Even if we do end up flattening it to put into h5

In [214]:
attributes(fid["X"])["test"] = "test"

"test"

In [237]:
close(fid)

In [245]:
attributes(fid["X"])["Column Names"] = ["TEST"]

LoadError: HDF5.API.H5Error: Error creating attribute Column Names for object /X
libhdf5 Stacktrace:
 [1] [0m[1mH5A__create[22m: Attribute/Object already exists
[90m     attribute already exists[39m
  ⋮

In [243]:
fid["X"]["Column Names"][]

1-element Vector{String}:
 "AVG(DBZ), DBZ, VV"

LoadError: UndefVarError: `modify` not defined