In [None]:
using Revise
using Burgers
using DataDeps, MAT, MLUtils
using NeuralOperators, Flux
using BSON
using DataDeps, MAT, MLUtils
using NeuralOperators, Flux
using CUDA, FluxTraining, BSON
import Flux: params
using BSON: @save, @load
using ProgressBars
using Zygote
using Optimisers, ParameterSchedulers

using Burgers
using FluxTraining

In [None]:
function my_get_data(file_path; n = 50000, Δsamples = 1, grid_size = div(51, Δsamples), T = Float32)
    file = matopen(file_path)
    
    x_data = T.(collect(read(file, "a")[1:n, 1:Δsamples:end]'))
    y_data = T.(collect(read(file, "u")[1:n, 1:Δsamples:end]'))
    safe_labels = T.(collect(read(file, "safe")[1:n, 1:Δsamples:end]'))
    pf_labels = T.(collect(read(file, "pf")[1:n, 1:Δsamples:end]'))
    close(file)

    x_loc_data = Array{T, 3}(undef, 2, grid_size, n)
    x_loc_data[1, :, :] .= reshape(repeat(LinRange(0, 5, grid_size), n), (grid_size, n))
    x_loc_data[2, :, :] .= x_data

    return x_loc_data, reshape(y_data, 1, :, n), safe_labels, pf_labels
end

function my_get_dataloader(; ratio::Float64 = 0.9, batchsize = 128)
    𝐱1, 𝐲1, safe1, pf1 = my_get_data("data_bcks_hyperbolic_1_minus.mat") 
    
    data_train1, data_test1 = splitobs((𝐱1, 𝐲1, safe1, pf1), at = ratio)
    𝐱2, 𝐲2, safe2, pf2 = my_get_data("data_bcks_hyperbolic_1_minus.mat")
    
    data_train2, data_test2 = splitobs((𝐱2, 𝐲2, safe2, pf2), at = ratio)
    𝐱3, 𝐲3, safe3, pf3 = my_get_data("data_bcks_hyperbolic_1_minus.mat")
    
    data_train3, data_test3 = splitobs((𝐱3, 𝐲3, safe3, pf3), at = ratio)

    @show size(data_train3[1]), size(data_test3[2])

    data_train1_x_pf = data_train1[1][:,:,:]
    data_test1_x_pf = data_test1[1][:,:,:]
    data_train1_y_pf = data_train1[2][:,:,:]
    data_test1_y_pf = data_test1[2][:,:,:]
    data_train1_safe_pf = data_train1[3][:,:]
    data_test1_safe_pf = data_test1[3][:,:]

    data_train2_x_pf = data_train2[1][:,:,:]
    data_test2_x_pf = data_test2[1][:,:,:]
    data_train2_y_pf = data_train2[2][:,:,:]
    data_test2_y_pf = data_test2[2][:,:,:]
    data_train2_safe_pf = data_train2[3][:,:]
    data_test2_safe_pf = data_test2[3][:,:]

    data_train3_x_pf = data_train3[1][:,:,:]
    data_test3_x_pf = data_test3[1][:,:,:]
    data_train3_y_pf = data_train3[2][:,:,:]
    data_test3_y_pf = data_test3[2][:,:,:]
    data_train3_safe_pf = data_train3[3][:,:]
    data_test3_safe_pf = data_test3[3][:,:]




    data_train = (cat(cat(data_train1_x_pf, data_train2_x_pf, dims=3), data_train3_x_pf, dims=3), 
                    cat(cat(data_train1_y_pf, data_train2_y_pf, dims=3), data_train3_y_pf, dims=3), 
                    cat(cat(data_train1_safe_pf, data_train2_safe_pf, dims=2), data_train3_safe_pf, dims=2)) # omit the last pf tumple
    data_test = (cat(cat(data_test1_x_pf, data_test2_x_pf, dims=3), data_test3_x_pf, dims=3), 
                cat(cat(data_test1_y_pf, data_test2_y_pf, dims=3), data_test3_y_pf, dims=3), 
                cat(cat(data_test1_safe_pf, data_test2_safe_pf, dims=2), data_test3_safe_pf, dims=2)) # # omit the last pf tumple
    loader_train = DataLoader(data_train, batchsize = batchsize, shuffle = true)
    loader_test = DataLoader(data_test, batchsize = batchsize, shuffle = false)

    return loader_train, loader_test
end
function delete_with_probability!(list, p = 0.5)
    mask = rand(length(list)) .< p  
    index = findall(x->x==1, mask)
    return list[index] 
end

In [None]:
function loss_naive_safeset(ϕ, x,y_init)
    @show x[:, 1:10], ϕ(x)[1, 1:10], y_init[1:10]
    @show x[:, end-10:end], ϕ(x)[1, end-10:end], y_init[end-10:end]
    index = findall(x->x==0, y_init)
    size(index)[1] == 0 && return 0
    x = x[:, index]
    y_init = y_init[index]
    
    loss = relu((2 .* y_init .- 1) .* ϕ(x)[1, :] .+ 1e-6)
    return (sum(loss)) / (size(loss)[end])
end

function loss_regularization(ϕ::Chain, x::AbstractArray,y_init::AbstractArray)
     # safe: 1; unsafe: 0
    index = findall(x->x==0, y_init)
    size(index)[1] == 0 && return 0
    x = x[:, index]
    y_init = y_init[index]
    loss = sigmoid_fast((2 .* y_init .- 1) .* ϕ(x)[1, :])
    return sum(loss) / (size(loss)[end])
end

In [None]:
function loss_naive_safeset_end(ϕ, x,y_init;minus_safe=false)
    if minus_safe
        index = findall(x->x==1, y_init)
        size(index)[1] == 0 && return 0
        x = x[:, index]
        y_init = y_init[index]
        
        loss = relu((2 .* y_init .- 1) .* ϕ(x)[1, :] .+ 1e-6)
        return (sum(loss)) / (size(loss)[end])
    else
        return relu((2 .* y_init[end] .- 1) .* ϕ(x)[1, end] .+ 1e-6)
    end
end

function loss_regularization_end(ϕ::Chain, x::AbstractArray,y_init::AbstractArray;minus_safe=false)
    if minus_safe
        index = findall(x->x==1, y_init)
        size(index)[1] == 0 && return 0
        x = x[:, index]
        y_init = y_init[index]
        loss = sigmoid_fast((2 .* y_init .- 1) .* ϕ(x)[1, :])
        return sum(loss) / (size(loss)[end])
    else
        return sigmoid_fast((2 .* y_init[end] .- 1) .* ϕ(x)[1, end])
    end
end

function find_derivative(vector)
    M, N = size(vector)[2], size(vector)[3]

    # Assume `vector` is the (2, M, N) array
    inputs = vector[1, :, :]  # Shape (M, N)
    outputs = vector[2, :, :]  # Shape (M, N)

    # Preallocate the derivative array with shape (1, M, N)
    derivatives = zeros(Float64, 1, M, N)

    # Central differences for the interior points (2 to M-1)
    derivatives[1, 2:M-1, :] = (outputs[3:M, :] .- outputs[1:M-2, :]) ./ (inputs[3:M, :] .- inputs[1:M-2, :])

    # Forward difference for the first point
    derivatives[1, 1, :] = (outputs[2, :] .- outputs[1, :]) ./ (inputs[2, :] .- inputs[1, :])

    # Backward difference for the last point
    derivatives[1, M, :] = (outputs[M, :] .- outputs[M-1, :]) ./ (inputs[M, :] .- inputs[M-1, :])

    # `derivatives` now contains the derivative of the output with respect to the input
    # with shape (1, M, N)
    return derivatives
end

function find_derivative_1step(vector)
    M, N = size(vector)[2], size(vector)[3]

    # Assume `vector` is the (2, M, N) array
    inputs = vector[1, :, :]  # Shape (M, N)
    outputs = vector[2, :, :]  # Shape (M, N)

    # Preallocate the derivative array with shape (1, M, N)
    derivatives = zeros(Float64, 1, M, N)

    # 1-step forward finite difference for all points from 1 to M-1
    derivatives[1, 1:M-1, :] = (outputs[2:M, :] .- outputs[1:M-1, :]) ./ (inputs[2:M, :] .- inputs[1:M-1, :])

    # 1-step backward finite difference for the last point
    derivatives[1, M, :] = (outputs[M, :] .- outputs[M-1, :]) ./ (inputs[M, :] .- inputs[M-1, :])

    # `derivatives` now contains the derivative of the output with respect to the input
    # with shape (1, M, N)
    return derivatives
end



function loss_pf(ϕ::Chain, U::AbstractArray, Yt::AbstractArray, U_0,extended_U̇, ∇Y_t,T, α,y_init; all=false,ϵ = 0.5,λ_pf_batch=nothing)
    ∇Y_t = reshape(∇Y_t, size(Yt))
    isnothing(λ_pf_batch) || (λ_pf_batch = reshape(λ_pf_batch, size(U_0[1:1,:])))
    
    if !all
        mask = abs.(Yt[2,:]) .< ϵ
        index = findall(x->x==true, mask)
        index = delete_with_probability!(index, 0.2) 
        size(index)[1] == 0 && return 0
        Yt = Yt[:, index]
    
        ∇Y_t = ∇Y_t[:, index]
        U_0 = U_0[:, index]
        isnothing(λ_pf_batch) || (λ_pf_batch = λ_pf_batch[:, index])
    end

    
    state_dim, batchsize = size(Yt) # 2*51000
    _, ∇ϕ = Zygote.pullback(ϕ, Yt)
    ∇ϕ_Y = ∇ϕ(ones(size(Yt)))[1] ./ state_dim
    ∇ϕ_Y = reshape(∇ϕ_Y, (1, state_dim, batchsize))

    ∇Y_t = reshape(∇Y_t, (state_dim, 1, batchsize))
    
    ϕ̇ = reshape(batched_mul(∇ϕ_Y, ∇Y_t), size(ϕ(Yt)))
    
    C = (α * ℯ^(-α*T)) / (1-ℯ^(-α*T))
    l = ϕ̇ .+ α .* ϕ(Yt) .+ C .* ϕ(U_0)

    isnothing(λ_pf_batch) || (l = l .* λ_pf_batch)
    loss = relu(l .+ 1e-6)
    return sum(loss) / size(loss)[end]
end

function get_model(name)
    model_path = joinpath(@__DIR__, "./model/")
    @assert name in readdir(model_path)
    model_file = name
    return BSON.load(joinpath(model_path, model_file), @__MODULE__)
end


Replace the pretrained neural operator path `NEURAL_OPERATOR_PATH` with the one saved in `train_hyper_all_pf.jl`.

In [None]:

cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
total_epoch = 20
pretrained_NO="NEURAL_OPERATOR_PATH"
if cuda && CUDA.has_cuda()
    device = gpu
    CUDA.allowscalar(false)
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

lr_NO = η₀
lr_CBF = 0.001 
lr_CBF = 0.01

lr_decay_rate = 0.2
lr_decay_epoch =4

train_loader, test_loader = my_get_dataloader()
model_NO = FourierNeuralOperator(ch = (2, 64, 64, 64, 64, 64, 128, 1), modes = (16,), 
                              σ = gelu)
if isnothing(pretrained_NO)
    model_NO = FourierNeuralOperator(ch = (2, 64, 64, 64, 64, 64, 128, 1), modes = (16,), 
                              σ = gelu)
else
    model_NO = get_model(pretrained_NO)[:model_NO]
end
model_CBF = Chain(
        Dense(2 => 16, relu),   # activation function inside layer
        Dense(16 => 64, relu),   # activation function inside layer
        Dense(64 => 16, relu),   # activation function inside layer
        Dense(16 => 1)
    )

optim_NO = Flux.setup(Flux.Optimise.AdamW(η₀, (0.9, 0.999), λ), model_NO)
optim_CBF = Flux.setup(Flux.Optimise.NADAM(lr_CBF, (0.9, 0.999), 0.1), model_CBF)
sched_CBF = ParameterSchedulers.Stateful(Step(lr_CBF, lr_decay_rate, lr_decay_epoch)) # setup schedule of your choice


loss_func = l₂loss
α = 0.00001
pf_ϵ = 0.1
λ_pf = 1
λ_reg = 1
all_flag = false
minus_safe_flag = true # cannot be false if the end can be not pf


training_losses = []
test_losses = []
no_training_losses = []
no_test_losses = []
least_loss = 1000
test_loss = 0
loss = 0
for epoch in ProgressBar(1:total_epoch)
    training_loss_epoch = []
    test_loss_epoch = []
    no_training_loss_epoch = []
    no_test_loss_epoch = []
    for item in train_loader
        x_batch = item[1]
        y_batch = item[2]
        safe_batch = item[3]

        λ_pf_batch = zeros(size(safe_batch)) 
        pf_index = findall(x->x==1, safe_batch[end, :])

        size(pf_index)[1] != 0 && (λ_pf_batch[end,pf_index] .= λ_pf)
        λ_pf_batch[:,:] .= λ_pf_batch[end:end,:]

        
        # train CBF
        x = copy(y_batch)
        y_init = copy(safe_batch)
        x = vcat(x[1,:,:]...)
        x = reshape(x, (1, size(x)[1]))
        y_init = vcat(y_init...)

        U_0 = copy(x_batch)
        U_0[2:2,:,:] .= x_batch[2:2,1:1,:]
        U_0 = vcat(U_0[2:2,:,:][1,:,:]...)
        U_0 = reshape(U_0, (1, size(U_0)[1]))
        U̇ = find_derivative(x_batch)
        extended_U̇ = cat(ones(size(U̇)),U̇,dims=1)
        T = x_batch[1,end,1]
        _, ∇ϕ = Zygote.pullback(model_NO, x_batch)
        ∇Y_t = find_derivative(cat(x_batch[1:1,:,:], y_batch, dims=1)) # empirical derivative

        yt = cat(x_batch[1:1,:,:], y_batch, dims=1) # NO
        ytt = reshape(yt, (size(yt)[1], size(yt)[2]*size(yt)[3]))
        extended_∇Y_t = cat(ones(size(∇Y_t[1,:,:,:])),∇Y_t[1,:,:,:],dims=1) # NO
        U_0t = cat(x_batch[1:1,:,:], reshape(U_0, size(y_batch)), dims=1) # NO
        U_0tt = reshape(U_0t, (size(U_0t)[1], size(U_0t)[2]*size(U_0t)[3]))
        extended_∇Y_t = reshape(extended_∇Y_t, (size(extended_∇Y_t)[1],1, size(extended_∇Y_t)[2:end]...))
        extended_∇Y_tt = reshape(extended_∇Y_t, (size(extended_∇Y_t)[1],size(extended_∇Y_t)[2], size(extended_∇Y_t)[3]*size(extended_∇Y_t)[4]))
        
        CBF_training_loss, CBF_grads = Flux.withgradient(model_CBF) do m 
            loss_naive_safeset(m, ytt, y_init)  +  λ_reg .* loss_regularization(m, ytt, y_init) + λ_pf .* loss_pf(m, x_batch, ytt, U_0tt,extended_U̇, extended_∇Y_tt,T, α,y_init;all=all_flag,ϵ = pf_ϵ,λ_pf_batch=λ_pf_batch) + loss_naive_safeset_end(m, ytt, y_init;minus_safe=minus_safe_flag)  +  λ_reg .* loss_regularization_end(m, ytt, y_init;minus_safe=minus_safe_flag)
        end
        
        Flux.update!(optim_CBF, model_CBF, CBF_grads[1])

        loss = loss_naive_safeset(model_CBF, ytt, y_init)  +  λ_reg .* loss_regularization(model_CBF, ytt, y_init) + λ_pf .* loss_pf(model_CBF, x_batch, ytt, U_0tt,extended_U̇, extended_∇Y_tt,T, α,y_init;all=all_flag,ϵ = pf_ϵ,λ_pf_batch=λ_pf_batch) + loss_naive_safeset_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag)  +  λ_reg .* loss_regularization_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag)
        @show loss_naive_safeset(model_CBF, ytt, y_init), loss_regularization(model_CBF, ytt, y_init), loss_pf(model_CBF, x_batch, ytt, U_0tt,extended_U̇, extended_∇Y_tt,T, α,y_init;all=all_flag,ϵ = pf_ϵ,λ_pf_batch=λ_pf_batch), loss_naive_safeset_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag), loss_regularization_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag)
        push!(training_loss_epoch, loss)  # logging, outside gradient context
    end
    for item in test_loader
        x_batch = item[1]
        y_batch = item[2]
        safe_batch = item[3]

        λ_pf_batch = zeros(size(safe_batch)) 
        pf_index = findall(x->x==1, safe_batch[end, :])

        size(pf_index)[1] != 0 && (λ_pf_batch[end,pf_index] .= λ_pf)
        λ_pf_batch[:,:] .= λ_pf_batch[end:end,:]

        x = copy(y_batch)
        y_init = copy(safe_batch)
        x = vcat(x[1,:,:]...)
        x = reshape(x, (1, size(x)[1]))
        y_init = vcat(y_init...)

        U_0 = copy(x_batch)
        U_0[2:2,:,:] .= x_batch[2:2,1:1,:]
        U_0 = vcat(U_0[2:2,:,:][1,:,:]...)
        U_0 = reshape(U_0, (1, size(U_0)[1]))
        U̇ = find_derivative(x_batch)
        extended_U̇ = cat(ones(size(U̇)),U̇,dims=1)
        T = x_batch[1,end,1]
        _, ∇ϕ = Zygote.pullback(model_NO, x_batch)
        ∇Y_t = find_derivative(cat(x_batch[1:1,:,:], y_batch, dims=1)) # empirical derivative

        yt = cat(x_batch[1:1,:,:], y_batch, dims=1) # NO
        ytt = reshape(yt, (size(yt)[1], size(yt)[2]*size(yt)[3]))
        extended_∇Y_t = cat(ones(size(∇Y_t[1,:,:,:])),∇Y_t[1,:,:,:],dims=1) # NO
        U_0t = cat(x_batch[1:1,:,:], reshape(U_0, size(y_batch)), dims=1) # NO
        U_0tt = reshape(U_0t, (size(U_0t)[1], size(U_0t)[2]*size(U_0t)[3]))
        extended_∇Y_t = reshape(extended_∇Y_t, (size(extended_∇Y_t)[1],1, size(extended_∇Y_t)[2:end]...))
        extended_∇Y_tt = reshape(extended_∇Y_t, (size(extended_∇Y_t)[1],size(extended_∇Y_t)[2], size(extended_∇Y_t)[3]*size(extended_∇Y_t)[4]))
        
        loss = loss_naive_safeset(model_CBF, ytt, y_init)  +  λ_reg .* loss_regularization(model_CBF, ytt, y_init) + λ_pf .* loss_pf(model_CBF, x_batch, ytt, U_0tt,extended_U̇, extended_∇Y_tt,T, α,y_init;all=all_flag,ϵ = pf_ϵ,λ_pf_batch=λ_pf_batch) + loss_naive_safeset_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag)  +  λ_reg .* loss_regularization_end(model_CBF, ytt, y_init;minus_safe=minus_safe_flag)
        push!(test_loss_epoch, loss)  # logging, outside gradient context
    end
    nextlr = ParameterSchedulers.next!(sched_CBF) # advance schedule
    Optimisers.adjust!(optim_CBF, nextlr) # update optimizer state, by default this changes the learning rate `eta`

    @save "model/hyper_model_$epoch.bson" model_CBF
    push!(training_losses, sum(training_loss_epoch) ./ 45000) 
    push!(test_losses, sum(test_loss_epoch) ./ 5000)

end


In [None]:
@show training_losses, test_losses