Input validation (#132)

* Rename input table schema file * validation function for input tables * Add rudimentary example based tests for validation * input_tables.jl: format validation error message only when necessary
TulipaEnergy · Oct 4, 2023 · 2608212 · 2608212
1 parent 591a2d3
commit 2608212
Show file tree

Hide file tree

Showing 6 changed files with 128 additions and 44 deletions.
diff --git a/src/InputTables.jl b/src/InputTables.jl
diff --git a/src/TulipaEnergyModel.jl b/src/TulipaEnergyModel.jl
@@ -9,5 +9,6 @@ using JuMP
 
 include("io.jl")
 include("model.jl")
+include("input_tables.jl")
 
 end
diff --git a/src/input_tables.jl b/src/input_tables.jl
@@ -0,0 +1,96 @@
+struct NodeData
+    id::Int                     # Node ID
+    name::String                # Name of node (geographical?)
+    type::String                # Producer/Consumer - maybe an enum?
+    active::Bool                # Active or decomissioned
+    investable::Bool            # Whether able to invest
+    variable_cost::Float32      # kEUR/MWh
+    investment_cost::Float32    # kEUR/MW/year
+    capacity::Float32           # MW
+    initial_capacity::Float32   # MW
+    peak_demand::Float32        # MW
+end
+
+struct EdgeData
+    id::Int                     # Edge ID
+    carrier::String             # (Optional?) Energy carrier
+    from_node_id::Int           # Node ID
+    to_node_id::Int             # Node ID
+    active::Bool                # Active or decomissioned
+    investable::Bool            # Whether able to invest
+    variable_cost::Float32      # kEUR/MWh
+    investment_cost::Float32    # kEUR/MW/year
+    capacity::Float32           # MW
+    initial_capacity::Float32   # MW
+end
+
+struct EdgeProfiles
+    id::Int                     # Edge ID
+    rep_period_id::Int
+    time_step::Int
+    value::Float32              # p.u.
+end
+
+struct NodeProfiles
+    id::Int                     # Node ID
+    rep_period_id::Int
+    time_step::Int
+    value::Float32              # p.u.
+end
+
+struct RepPeriodData
+    id::Int
+    weight::Float32
+end
+
+function validate_df(df::DataFrame, schema::DataType; fname::String = "", silent = false)
+    df_t = describe(df) # relevant columns: variable::Symbol, eltype::DataType
+    cols = [i for i in fieldnames(schema)]
+    col_types = [i for i in fieldtypes(schema)]
+
+    col_error = collect(Iterators.filter(x -> !(x in df_t[!, :variable]), cols))
+
+    cols_t2 = collect(
+        Iterators.map(
+            ((col, expect),) -> (
+                col,
+                expect,
+                first(collect(Iterators.filter(r -> r[:variable] == col, eachrow(df_t))))[:eltype],
+            ),
+            Iterators.filter(
+                ((col, _),) -> !(col in col_error),
+                Iterators.zip(cols, col_types),
+            ),
+        ),
+    )
+
+    col_type_err = collect(
+        Iterators.filter(
+            ((_, expect, col_t),) -> if (supertype(expect) == supertype(col_t))
+                false
+            else
+                ((promote_type(expect, col_t) == Any) ? true : false)
+            end,
+            cols_t2,
+        ),
+    )
+
+    if !silent
+        msg = length(col_error) > 0 ? "\n [1] missing columns: $(col_error)" : ""
+        if length(col_type_err) > 0
+            msg *= length(msg) > 0 ? "\n [2] " : " [1] "
+            msg *= "incompatible column types:"
+            msg *= join(
+                Iterators.map(
+                    ((col, expect, col_t),) ->
+                        "\n     - $col::$col_t (expected: $expect)",
+                    col_type_err,
+                ),
+            )
+        end
+        if length(msg) > 0
+            error("$fname failed validation", msg)
+        end
+    end
+    return (col_error, col_type_err)
+end
diff --git a/test/Project.toml b/test/Project.toml
@@ -1,3 +1,5 @@
 [deps]
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/inputs/tiny/bad-nodes-data.csv b/test/inputs/tiny/bad-nodes-data.csv
@@ -0,0 +1,8 @@
+,,{producer;consumer},{true;false},{true;false},kEUR/MWh,kEUR/MW/year,MW,MW,MW
+iid,name,type,active,investable,variable_cost,investment_cost,capacity,initial_capacity,peak_demand
+1,ocgt,producer,true,t,0.07,25,100,0,t
+2,ccgt,producer,true,true,0.05,40,400,0,0
+3,wind,producer,true,true,0.001,70,50,0,0
+4,solar,producer,true,true,0,50,10,0,0
+5,ens,producer,true,false,0.18,0,0,0,0
+6,demand,consumer,true,false,0,0,0,0,1115
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,3 +1,5 @@
+using CSV
+using DataFrames
 using Graphs
 using TulipaEnergyModel
 using Test
@@ -32,3 +34,22 @@ end
               [Graphs.Edge(e) for e in [(1, 6), (2, 6), (3, 6), (4, 6), (5, 6)]]
     end
 end
+
+@testset "Input validation" begin
+    # FIXME: test separately
+    @testset "missing columns and incompatible types" begin
+        dir = joinpath(INPUT_FOLDER, "tiny")
+        df = CSV.read(joinpath(dir, "bad-nodes-data.csv"), DataFrame; header = 2)
+
+        # FIXME: instead of examples, mutate and test
+        col_err, col_type_err = TulipaEnergyModel.validate_df(
+            df,
+            TulipaEnergyModel.NodeData;
+            fname = "bad-nodes-data.csv",
+            silent = true,
+        )
+        @test col_err == [:id]
+        @test col_type_err ==
+              [(:investable, Bool, String7), (:peak_demand, Float32, String7)]
+    end
+end