Skip to content

Commit

Permalink
Input validation (#132)
Browse files Browse the repository at this point in the history
* Rename input table schema file

* validation function for input tables

* Add rudimentary example based tests for validation

* input_tables.jl: format validation error message only when necessary
  • Loading branch information
suvayu committed Oct 4, 2023
1 parent 591a2d3 commit 2608212
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 44 deletions.
44 changes: 0 additions & 44 deletions src/InputTables.jl

This file was deleted.

1 change: 1 addition & 0 deletions src/TulipaEnergyModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ using JuMP

include("io.jl")
include("model.jl")
include("input_tables.jl")

end
96 changes: 96 additions & 0 deletions src/input_tables.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
struct NodeData
id::Int # Node ID
name::String # Name of node (geographical?)
type::String # Producer/Consumer - maybe an enum?
active::Bool # Active or decomissioned
investable::Bool # Whether able to invest
variable_cost::Float32 # kEUR/MWh
investment_cost::Float32 # kEUR/MW/year
capacity::Float32 # MW
initial_capacity::Float32 # MW
peak_demand::Float32 # MW
end

struct EdgeData
id::Int # Edge ID
carrier::String # (Optional?) Energy carrier
from_node_id::Int # Node ID
to_node_id::Int # Node ID
active::Bool # Active or decomissioned
investable::Bool # Whether able to invest
variable_cost::Float32 # kEUR/MWh
investment_cost::Float32 # kEUR/MW/year
capacity::Float32 # MW
initial_capacity::Float32 # MW
end

struct EdgeProfiles
id::Int # Edge ID
rep_period_id::Int
time_step::Int
value::Float32 # p.u.
end

struct NodeProfiles
id::Int # Node ID
rep_period_id::Int
time_step::Int
value::Float32 # p.u.
end

struct RepPeriodData
id::Int
weight::Float32
end

function validate_df(df::DataFrame, schema::DataType; fname::String = "", silent = false)
df_t = describe(df) # relevant columns: variable::Symbol, eltype::DataType
cols = [i for i in fieldnames(schema)]
col_types = [i for i in fieldtypes(schema)]

col_error = collect(Iterators.filter(x -> !(x in df_t[!, :variable]), cols))

cols_t2 = collect(
Iterators.map(
((col, expect),) -> (
col,
expect,
first(collect(Iterators.filter(r -> r[:variable] == col, eachrow(df_t))))[:eltype],
),
Iterators.filter(
((col, _),) -> !(col in col_error),
Iterators.zip(cols, col_types),
),
),
)

col_type_err = collect(
Iterators.filter(
((_, expect, col_t),) -> if (supertype(expect) == supertype(col_t))
false
else
((promote_type(expect, col_t) == Any) ? true : false)
end,
cols_t2,
),
)

if !silent
msg = length(col_error) > 0 ? "\n [1] missing columns: $(col_error)" : ""
if length(col_type_err) > 0
msg *= length(msg) > 0 ? "\n [2] " : " [1] "
msg *= "incompatible column types:"
msg *= join(
Iterators.map(
((col, expect, col_t),) ->
"\n - $col::$col_t (expected: $expect)",
col_type_err,
),
)
end
if length(msg) > 0
error("$fname failed validation", msg)
end
end
return (col_error, col_type_err)
end
2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8 changes: 8 additions & 0 deletions test/inputs/tiny/bad-nodes-data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
,,{producer;consumer},{true;false},{true;false},kEUR/MWh,kEUR/MW/year,MW,MW,MW
iid,name,type,active,investable,variable_cost,investment_cost,capacity,initial_capacity,peak_demand
1,ocgt,producer,true,t,0.07,25,100,0,t
2,ccgt,producer,true,true,0.05,40,400,0,0
3,wind,producer,true,true,0.001,70,50,0,0
4,solar,producer,true,true,0,50,10,0,0
5,ens,producer,true,false,0.18,0,0,0,0
6,demand,consumer,true,false,0,0,0,0,1115
21 changes: 21 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using CSV
using DataFrames
using Graphs
using TulipaEnergyModel
using Test
Expand Down Expand Up @@ -32,3 +34,22 @@ end
[Graphs.Edge(e) for e in [(1, 6), (2, 6), (3, 6), (4, 6), (5, 6)]]
end
end

@testset "Input validation" begin
# FIXME: test separately
@testset "missing columns and incompatible types" begin
dir = joinpath(INPUT_FOLDER, "tiny")
df = CSV.read(joinpath(dir, "bad-nodes-data.csv"), DataFrame; header = 2)

# FIXME: instead of examples, mutate and test
col_err, col_type_err = TulipaEnergyModel.validate_df(
df,
TulipaEnergyModel.NodeData;
fname = "bad-nodes-data.csv",
silent = true,
)
@test col_err == [:id]
@test col_type_err ==
[(:investable, Bool, String7), (:peak_demand, Float32, String7)]
end
end

0 comments on commit 2608212

Please sign in to comment.