add structured_pem for linear graybox identification (#150)

* add structured_pem for linear graybox identification * add test * export * add ChainRules dep * tweak tests
baggepinnen · Apr 28, 2024 · b960093 · b960093
1 parent 8ef33cc
commit b960093
Show file tree

Hide file tree

Showing 5 changed files with 203 additions and 9 deletions.
diff --git a/Project.toml b/Project.toml
@@ -4,13 +4,15 @@ authors = ["baggepinnen <baggepinnen@gmail.com>"]
 version = "2.9.4"
 
 [deps]
+ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 ControlSystemsBase = "aaaaaaaa-a6ca-5380-bf3e-84a91bcd477e"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+ForwardDiffChainRules = "c9556dd2-1aed-4cfe-8560-1557cf593001"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LowLevelParticleFilters = "d9d29d28-c116-5dba-9239-57a5fe23875b"
@@ -33,29 +35,31 @@ LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891"
 ControlSystemIdentificationLSOptExt = "LeastSquaresOptim"
 
 [compat]
+ChainRules = "1.58"
 ComponentArrays = "0.14, 0.15"
 ControlSystemsBase = "1.8"
 DSP = "0.6.1, 0.7"
 DelimitedFiles = "1"
 FFTW = "1.1"
 FillArrays = "1"
 ForwardDiff = "0.10"
+ForwardDiffChainRules = "0.2.1"
+InteractiveUtils = "1.6"
 LeastSquaresOptim = "0.8"
+LinearAlgebra = "1.6"
 LowLevelParticleFilters = "2.0, 3.0"
 MatrixEquations = "1.1, 2.0"
 MonteCarloMeasurements = "1.0"
 Optim = "<0.20, 0.20, 0.21, 0.22, 1.0"
 Parameters = "<0.13"
 QuadGK = "2.4"
+Random = "1.6"
 RecipesBase = "<0.8, 0.8, 1.0"
 StaticArrays = "1"
 Statistics = "1"
 StatsBase = "0.33, 0.34"
 TotalLeastSquares = "1.7.2"
 julia = "1.6"
-InteractiveUtils = "1.6"
-LinearAlgebra = "1.6"
-Random = "1.6"
 
 [extras]
 LeastSquaresOptim = "0fc2ff8b-aaa3-5acd-a817-1944a5e08891"

diff --git a/ext/ControlSystemIdentificationLSOptExt.jl b/ext/ControlSystemIdentificationLSOptExt.jl
@@ -86,7 +86,7 @@ end
 """
     nonlinear_pem(d, model::NonlinearPredictionErrorModel; x0, R1, R2, kwargs...)
 
-Nonlinear Prediction-Error Method initialized with a model resulting from a previous call to `nonlinear_pem`. Parameters `x0, R1, R2` can be modified to refine the optimization. Calling `nonlinear_pem` repeatedly may be beneficial when the first call uses a small `R2` for large amounts of measurement feedback, this makes it easier to find a good model when the initial parameter guess is poor. A second call can use a larger `R2` to improve the simulation performance of the estimated model.
+Nonlinear Prediction-Error Method initialized with a model resulting from a previous call to `nonlinear_pem`. Parameters `x0, R1, R2` can be modified to refine the optimization. Calling `nonlinear_pem` repeatedly may be beneficial when the first call uses a small `R2` for large amounts of measurement feedback, this makes it easier to find a good model when the initial parameter guess is poor. A second call can use a larger `R2` to improve the simulation performance of the estimated model. Alternatively, `R1` may start large and then be reduced to improve the parameter estimates. These two appraoches are equivalent for linear systems, but may behave differently for nonlinear systems.
 """
 function nonlinear_pem(d::AbstractIdData, model::NonlinearPredictionErrorModel; x0 = model.x0, R1=model.ukf.R1, R2=model.ukf.R2, kwargs...)
     nonlinear_pem(d, model.ukf.dynamics, model.ukf.measurement, model.p, x0, R1, R2, model.ukf.nu; kwargs...)

diff --git a/src/ControlSystemIdentification.jl b/src/ControlSystemIdentification.jl
@@ -45,7 +45,7 @@ export iddata,
     InputOutputFreqData
 
 export AbstractPredictionStateSpace, PredictionStateSpace, N4SIDStateSpace,
-    pem, newpem, prediction_error, prediction_error_filter, predictiondata, predict, simulate, noise_model, estimate_x0
+    pem, newpem, structured_pem, prediction_error, prediction_error_filter, predictiondata, predict, simulate, noise_model, estimate_x0
 export n4sid, subspaceid, era, okid, find_similarity_transform, schur_stab
 export getARXregressor,
     getARregressor,
@@ -156,7 +156,7 @@ x0h[2] == x0[2] # Should be exact equality
 norm(x0-x0h)    # Should be small
 ```
 """
-function estimate_x0(sys::AbstractStateSpace, d, n = min(length(d), 3slowest_time_constant(sys)); fixed = fill(NaN, sys.nx))
+function estimate_x0(sys::AbstractStateSpace, d, n = min(length(d), 3slowest_time_constant(sys)); fixed = fill(NaN, sys.nx), focus=:prediction)
     d.ny == sys.ny || throw(ArgumentError("Number of outputs of system and data do not match"))
     d.nu == sys.nu || throw(ArgumentError("Number of inputs of system and data do not match"))
     T = ControlSystemsBase.numeric_type(sys)
@@ -165,7 +165,7 @@ function estimate_x0(sys::AbstractStateSpace, d, n = min(length(d), 3slowest_tim
     nx,p,N = sys.nx, sys.ny, length(d)
     size(y,2) >= nx || throw(ArgumentError("y should be at least length sys.nx"))
 
-    if sys isa AbstractPredictionStateSpace && !iszero(sys.K)
+    if focus === :prediction && sys isa AbstractPredictionStateSpace && !iszero(sys.K)
         ε, _ = lsim(prediction_error_filter(sys), predictiondata(d))
         y = y - ε # remove influence of innovations
     end 

diff --git a/src/pem.jl b/src/pem.jl
@@ -142,7 +142,7 @@ plot(
 
 The returned model is of type `PredictionStateSpace` and contains the field `sys` with the system model, as well as covariance matrices and estimated Kalman gain for a Kalman filter.
 
-See also [`nonlinear_pem`](@ref).
+See also [`structured_pem`](@ref) and [`nonlinear_pem`](@ref).
 
 # Extended help
 This implementation uses a tridiagonal parametrization of the A-matrix that has been shown to be favourable from an optimization perspective.¹ The initial guess `sys0` is automatically transformed to a special tridiagonal modal form. 
@@ -617,4 +617,137 @@ See [Identification of nonlinear models](https://baggepinnen.github.io/ControlSy
 !!! warning "Experimental"
     This function is considered experimental and may change in the future without respecting semantic versioning. This implementation also lacks a number of features associated with good nonlinear PEM implementations, such as regularization and support for multiple datasets.
 """
-function nonlinear_pem end
+function nonlinear_pem end
+
+# ==============================================================================
+## Structured PEM
+# ==============================================================================
+using ChainRules
+using ForwardDiffChainRules
+@ForwardDiff_frule LinearAlgebra.exp!(x1::AbstractMatrix{<:ForwardDiff.Dual}) true
+
+function inner_constructor(p, constructor, Ts)
+    sys = constructor(p)
+    if iscontinuous(sys)
+        return c2d(sys, Ts, :zoh), p.K, p.x0
+    end
+    return sys, p.K, p.x0
+end
+
+"""
+    structured_pem(
+        d,
+        nx;
+        focus = :prediction,
+        p0,
+        x0 = nothing,
+        K0 = focus == :prediction ? zeros(nx, d.ny) : zeros(0,0),
+        constructor,
+        h = 1,
+        metric::F = abs2,
+        regularizer::RE = (p, P) -> 0,
+        optimizer = BFGS(
+            # alphaguess = LineSearches.InitialStatic(alpha = 0.95),
+            linesearch = LineSearches.BackTracking(),
+        ),
+        store_trace = true,
+        show_trace = true,
+        show_every = 50,
+        iterations = 10000,
+        allow_f_increases = false,
+        time_limit = 100,
+        x_tol = 0,
+        f_abstol = 1e-16,
+        g_tol = 1e-12,
+        f_calls_limit = 0,
+        g_calls_limit = 0,
+    )
+
+Linear gray-box model identification using the prediction-error method (PEM).
+
+This function differs from [`newpem`](@ref) in that here, the user controls the structure of the estimated model, while in `newpem` a generic black-box structure is used.
+
+The user provides the function `constructor(p)` that constructs the model from the parameter vector `p`. This function must return a statespace system. `p0` is the corresponding initial guess for the parameters. `K0` is an initial guess for the observer gain (only used if `focus = :prediciton`) and `x0` is the initial guess for the initial condition (estimated automatically if not provided).
+    
+For other options, see [`newpem`](@ref).
+"""
+function structured_pem(
+    d,
+    nx;
+    focus = :prediction,
+    p0,
+    x0 = nothing,
+    K0 = focus == :prediction ? 1e-10randn(nx, d.ny) : zeros(0,0), # Small random value used to not suffer loss of state dimension in observer_predictor when K0 is zero
+    constructor,
+    h = 1,
+    metric::F = abs2,
+    regularizer::RE = (p, P) -> 0,
+    optimizer = BFGS(
+        # alphaguess = LineSearches.InitialStatic(alpha = 0.95),
+        linesearch = LineSearches.BackTracking(),
+    ),
+    store_trace = true,
+    show_trace = true,
+    show_every = 50,
+    iterations = 10000,
+    allow_f_increases = false,
+    time_limit = 100,
+    x_tol = 0,
+    f_abstol = 1e-16,
+    g_tol = 1e-12,
+    f_calls_limit = 0,
+    g_calls_limit = 0,
+) where {F, RE}
+    T = promote_type(eltype(d.y), eltype(p0))
+    nu = d.nu
+    ny = d.ny
+    pred = focus === :prediction
+    pd = predictiondata(d)
+    sys0 = constructor(p0)
+    isdiscrete(sys0) || (sys0 = c2d(sys0, d.Ts))
+    if x0 === nothing
+        x0i::Vector{T} = if h == 1 || focus === :simulation
+            T.(estimate_x0(sys0, d, min(length(d), 10nx)))
+        else
+            T.(estimate_x0(prediction_error_filter(PredictionStateSpace(sys0, K0, 0, 0); h), pd, min(length(pd), 10nx)))
+        end
+    else
+        length(x0) == nx || throw(ArgumentError("x0 must have length $nx"))
+        x0i = x0
+    end
+    p0ca = ComponentArray(p = p0, K = T.(K0), x0 = x0i)
+    function predloss(p)
+        sysi, Ki, x0 = inner_constructor(p, constructor, d.Ts)
+        syso = PredictionStateSpace(sysi, Ki, 0, 0)
+        Pyh = ControlSystemsBase.observer_predictor(syso; h)
+        yh, _ = lsim(Pyh, pd; x0=Vector(x0))
+        yh .= metric.(yh .- d.y)
+        c1 = sum(yh)
+        c1 + regularizer(p, syso)
+    end
+    function simloss(p)
+        syssim, _, x0 = inner_constructor(p, constructor, d.Ts)
+        y, _ = lsim(syssim, d; x0)
+        y .= metric.(y .- d.y)
+        sum(y) + regularizer(p, syssim)
+    end
+    res = Optim.optimize(
+        pred ? predloss : simloss,
+        p0ca,
+        optimizer,
+        Optim.Options(;
+            store_trace, show_trace, show_every, iterations, allow_f_increases,
+            time_limit, x_tol, f_abstol, g_tol, f_calls_limit, g_calls_limit),
+        autodiff = :forward,
+    )
+    sys_opt, K_opt, x0_opt = inner_constructor(res.minimizer, constructor, d.Ts)
+    sysp_opt = PredictionStateSpace(
+        sys_opt,
+        pred ? K_opt : zeros(T, nx, ny),
+        zeros(T, nx, nx),
+        zeros(T, ny, ny),
+        zeros(T, nx, ny),
+    )
+    pred && !isstable(observer_predictor(sysp_opt)) && @warn("Estimated predictor dynamics A-KC is unstable")
+    (; sys=sysp_opt, x0=x0_opt, res)
+end
diff --git a/test/test_pem.jl b/test/test_pem.jl
@@ -181,6 +181,11 @@ d = iddata(yn, un, 1)
     @test mean(ControlSystemIdentification.nrmse(y, yh)) > 97
 
 
+    sysh, x0h, opt = ControlSystemIdentification.newpem(d, nx, show_every=100, safe=true, h=3)
+    @test iszero(sysh.D)
+    @test Optim.minimum(opt) < 30
+    @test freqresptest(sys, sysh) < 0.4
+
     # Non-zero D
 
     sysh, x0h, opt = ControlSystemIdentification.newpem(d, nx, show_every=10, zeroD = false, safe=true)
@@ -300,3 +305,55 @@ d = iddata(yn, un, 1)
 @test Optim.minimum(opt) < 1e-3*T # Should depend on system gramian, but too lazy to figure out
 
 end
+
+
+@testset "structured_pem" begin
+    @info "Testing structured_pem"
+    constructor = function (p)
+        Mα, Mq, Mδe, Zα, Zq, Zδe = p
+        V = 30.2
+        A = [
+            Mq Mα
+            (1+Zq / V) Zα/V
+        ]
+        B = [Mδe; Zδe / V]
+        return ss(A, B, I, 0)
+    end
+
+    # True Parameters
+    Mα_true = -83.17457223750834
+    Mq_true = -4.0485957994781785
+    Mδe_true = -80.71377329163104
+    Zα_true = -115.91677356408941
+    Zq_true = -0.573560626095269
+    Zδe_true = 32.13261325199936
+
+    Ts = 0.01
+    t = 0:Ts:10
+
+    p_true = [Mα_true, Mq_true, Mδe_true, Zα_true, Zq_true, Zδe_true]
+    p0 = [Mα_true, Mq_true, Mδe_true, Zα_true, Zq_true, Zδe_true] .* 1.2
+
+    P_true = constructor(p_true)
+    u = sign.(sin.((1 / 3 * t)')) .+ sign.(sin.((1 / 2 * t)'))
+    res_true = lsim(P_true, u, t)
+    d = iddata(res_true)
+
+    nx = 2
+    function regularizer(p, _)
+        sum(abs2, p.K)
+    end
+
+    res = ControlSystemIdentification.structured_pem(d, nx; focus=:prediction, p0, constructor, regularizer, show_trace = false, show_every = 1, iterations=300)
+    p_est = res.res.minimizer.p
+    @test norm(p_est - p_true) < 1e-6
+
+    # res = ControlSystemIdentification.structured_pem(d, nx; focus=:prediction, p0=0.98p_true, constructor, regularizer, show_trace = true, show_every = 1, iterations=300000, h=6, optimizer=NelderMead())
+    # p_est = res.res.minimizer.p
+    # @test norm(p_est - p_true)/norm(p_true) < 1e-2 # Severe convergence problems here, but it does eventually converge using NelderMead The test is deactivated since it takes a long time
+
+    res = ControlSystemIdentification.structured_pem(d, nx; focus=:simulation, p0, constructor, regularizer, show_trace = false, show_every = 1, iterations=300)
+    p_est = res.res.minimizer.p
+    @test norm(p_est - p_true) < 1e-6
+
+end