Skip to content

Commit

Permalink
First-class support of SKCE for probability vectors and boolean targe…
Browse files Browse the repository at this point in the history
…ts (#85)
  • Loading branch information
devmotion committed Apr 28, 2021
1 parent 95ea24c commit a140a02
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 54 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "CalibrationErrors"
uuid = "33913031-fe46-5864-950f-100836f47845"
authors = ["David Widmann <david.widmann@it.uu.se>"]
version = "0.5.16"
version = "0.5.17"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand Down
28 changes: 28 additions & 0 deletions src/skce/generic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ function unsafe_skce_eval(
return result
end

# for binary classification with probabilities (corresponding to parameters of Bernoulli
# distributions) and boolean targets the expression simplifies to
# ```math
# k((p, y), (p̃, ỹ)) = (y(1-p) + (1-y)p)(ỹ(1-p̃) + (1-ỹ)p̃)(k((p, y), (p̃, ỹ)) - k((p, 1-y), (p̃, ỹ)) - k((p, y), (p̃, 1-ỹ)) + k((p, 1-y), (p̃, 1-ỹ)))
# ```
function unsafe_skce_eval(kernel::Kernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
noty = !y
notỹ = !
z =
kernel((p, y), (p̃, ỹ)) - kernel((p, noty), (p̃, ỹ)) -
kernel((p, y), (p̃, notỹ)) + kernel((p, noty), (p̃, notỹ))
return (y ? 1 - p : p) * (ỹ ? 1 -: p̃) * z
end

# evaluation for tensor product kernels
function unsafe_skce_eval(kernel::KernelTensorProduct, p, y, p̃, ỹ)
κpredictions, κtargets = kernel.kernels
Expand All @@ -105,6 +119,10 @@ function unsafe_skce_eval(
κpredictions, κtargets = kernel.kernels
return κpredictions(p, p̃) * unsafe_skce_eval_targets(κtargets, p, y, p̃, ỹ)
end
function unsafe_skce_eval(kernel::KernelTensorProduct, p::Real, y::Bool, p̃::Real, ỹ::Bool)
κpredictions, κtargets = kernel.kernels
return κpredictions(p, p̃) * unsafe_skce_eval_targets(κtargets, p, y, p̃, ỹ)
end

function unsafe_skce_eval_targets(
κtargets::Kernel,
Expand Down Expand Up @@ -258,3 +276,13 @@ function unsafe_skce_eval_targets(
@inbounds res = (y == ỹ) - p[ỹ] - p̃[y] + dot(p, p̃)
return res
end

function unsafe_skce_eval_targets(κtargets::Kernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
noty = !y
notỹ = !
z = κtargets(y, ỹ) - κtargets(noty, ỹ) - κtargets(y, notỹ) + κtargets(noty, notỹ)
return (y ? 1 - p : p) * (ỹ ? 1 -: p̃) * z
end
function unsafe_skce_eval_targets(::WhiteKernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
return 2 * (y - p) * (ỹ - p̃)
end
56 changes: 10 additions & 46 deletions test/kernels.jl
Original file line number Diff line number Diff line change
@@ -1,51 +1,15 @@
@testset "kernels.jl" begin
# alternative implementation of white kernel
struct WhiteKernel2 <: Kernel end
(::WhiteKernel2)(x, y) = x == y
kernel = TVExponentialKernel()

# alternative implementation TensorProductKernel
struct TensorProduct2{K1<:Kernel,K2<:Kernel} <: Kernel
kernel1::K1
kernel2::K2
end
function (kernel::TensorProduct2)((x1, x2), (y1, y2))
return kernel.kernel1(x1, y1) * kernel.kernel2(x2, y2)
end
# traits
@test KernelFunctions.metric(kernel) === TotalVariation()

@testset "TVExponentialKernel" begin
kernel = TVExponentialKernel()
# simple evaluation
x, y = rand(10), rand(10)
@test kernel(x, y) == exp(-totalvariation(x, y))

# traits
@test KernelFunctions.metric(kernel) === TotalVariation()

# simple evaluation
x, y = rand(10), rand(10)
@test kernel(x, y) == exp(-totalvariation(x, y))

# transformations
@test (kernel ScaleTransform(0.1))(x, y) == exp(-0.1 * totalvariation(x, y))
ard = rand(10)
@test (kernel ARDTransform(ard))(x, y) == exp(-totalvariation(ard .* x, ard .* y))
end

@testset "unsafe_skce_eval" begin
kernel = SqExponentialKernel()
kernel1 = kernel WhiteKernel()
kernel2 = kernel WhiteKernel2()
kernel3 = TensorProduct2(kernel, WhiteKernel())

x1, x2 = rand(10), rand(1:10)

@test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, x1, x2)
CalibrationErrors.unsafe_skce_eval(kernel2, x1, x2, x1, x2)
@test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, x1, x2)
CalibrationErrors.unsafe_skce_eval(kernel3, x1, x2, x1, x2)

y1, y2 = rand(10), rand(1:10)

@test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, y1, y2)
CalibrationErrors.unsafe_skce_eval(kernel2, x1, x2, y1, y2)
@test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, y1, y2)
CalibrationErrors.unsafe_skce_eval(kernel3, x1, x2, y1, y2)
end
# transformations
@test (kernel ScaleTransform(0.1))(x, y) == exp(-0.1 * totalvariation(x, y))
ard = rand(10)
@test (kernel ARDTransform(ard))(x, y) == exp(-totalvariation(ard .* x, ard .* y))
end
5 changes: 5 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ using Random
using Statistics
using Test

using CalibrationErrors: unsafe_skce_eval

Random.seed!(1234)

@testset "CalibrationErrors" begin
Expand All @@ -32,6 +34,9 @@ Random.seed!(1234)
end

@testset "SKCE" begin
@testset "generic" begin
include("skce/generic.jl")
end
@testset "biased" begin
include("skce/biased.jl")
end
Expand Down
23 changes: 21 additions & 2 deletions test/skce/biased.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
@testset "biased.jl" begin
@testset "Two-dimensional example" begin
# categorical distributions
skce = BiasedSKCE(SqExponentialKernel() WhiteKernel())

# only two predictions, i.e., three unique terms in the estimator
@test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
@test @inferred(skce([[1, 0], [0, 1]], [1, 1])) 0.5
@test @inferred(skce([[1, 0], [0, 1]], [2, 1])) 1 - exp(-1)
@test @inferred(skce([[1, 0], [0, 1]], [2, 2])) 0.5

# probabilities
skce = BiasedSKCE((SqExponentialKernel() ScaleTransform(sqrt(2))) WhiteKernel())
@test iszero(@inferred(skce([1, 0], [true, false])))
@test @inferred(skce([1, 0], [true, true])) 0.5
@test @inferred(skce([1, 0], [false, true])) 1 - exp(-1)
@test @inferred(skce([1, 0], [false, false])) 0.5
end

@testset "Basic properties" begin
skce = BiasedSKCE((ExponentialKernel() ScaleTransform(0.1)) WhiteKernel())
estimates = Vector{Float64}(undef, 1_000)

# categorical distributions
for nclasses in (2, 10, 100)
dist = Dirichlet(nclasses, 1.0)

Expand All @@ -27,5 +34,17 @@

@test all(x -> x > zero(x), estimates)
end

# probabilities
predictions = Vector{Float64}(undef, 20)
targets = Vector{Bool}(undef, 20)
for i in 1:length(estimates)
rand!(predictions)
map!(targets, predictions) do p
rand() < p
end
estimates[i] = skce(predictions, targets)
end
@test all(x -> x > zero(x), estimates)
end
end
60 changes: 60 additions & 0 deletions test/skce/generic.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
@testset "generic.jl" begin
# alternative implementation of white kernel
struct WhiteKernel2 <: Kernel end
(::WhiteKernel2)(x, y) = x == y

# alternative implementation TensorProductKernel
struct TensorProduct2{K1<:Kernel,K2<:Kernel} <: Kernel
kernel1::K1
kernel2::K2
end
function (kernel::TensorProduct2)((x1, x2), (y1, y2))
return kernel.kernel1(x1, y1) * kernel.kernel2(x2, y2)
end

@testset "binary classification" begin
# probabilities and boolean targets
p, p̃ = rand(2)
y, ỹ = rand(Bool, 2)
scale = rand()
kernel = SqExponentialKernel() ScaleTransform(scale)
val = unsafe_skce_eval(kernel WhiteKernel(), p, y, p̃, ỹ)
@test unsafe_skce_eval(kernel WhiteKernel2(), p, y, p̃, ỹ) val
@test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel()), p, y, p̃, ỹ) val
@test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel2()), p, y, p̃, ỹ) val

# corresponding values and kernel for full categorical distribution
pfull = [p, 1 - p]
yint = y ? 1 : 2
p̃full = [p̃, 1 - p̃]
ỹint =? 1 : 2
kernelfull = SqExponentialKernel() ScaleTransform(scale / sqrt(2))

@test unsafe_skce_eval(kernelfull WhiteKernel(), pfull, yint, p̃full, ỹint) val
@test unsafe_skce_eval(kernelfull WhiteKernel2(), pfull, yint, p̃full, ỹint)
val
@test unsafe_skce_eval(
TensorProduct2(kernelfull, WhiteKernel()), pfull, yint, p̃full, ỹint
) val
@test unsafe_skce_eval(
TensorProduct2(kernelfull, WhiteKernel2()), pfull, yint, p̃full, ỹint
) val
end

@testset "multi-class classification" begin
n = 10
p = rand(n)
p ./= sum(p)
y = rand(1:n)
= rand(n)
./= sum(p̃)
= rand(1:n)

kernel = SqExponentialKernel() ScaleTransform(rand())
val = unsafe_skce_eval(kernel WhiteKernel(), p, y, p̃, ỹ)

@test unsafe_skce_eval(kernel WhiteKernel2(), p, y, p̃, ỹ) val
@test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel()), p, y, p̃, ỹ) val
@test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel2()), p, y, p̃, ỹ) val
end
end
71 changes: 66 additions & 5 deletions test/skce/unbiased.jl
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
@testset "unbiased.jl" begin
@testset "Unbiased: Two-dimensional example" begin
# categorical distributions
skce = UnbiasedSKCE(SqExponentialKernel() WhiteKernel())

# only two predictions, i.e., one term in the estimator
@test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
@test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 1])))
@test @inferred(skce([[1, 0], [0, 1]], [2, 1])) -2 * exp(-1)
@test iszero(@inferred(skce([[1, 0], [0, 1]], [2, 2])))

# probabilities
skce = UnbiasedSKCE(
(SqExponentialKernel() ScaleTransform(sqrt(2))) WhiteKernel()
)
@test iszero(@inferred(skce([1, 0], [true, false])))
@test iszero(@inferred(skce([1, 0], [true, true])))
@test @inferred(skce([1, 0], [false, true])) -2 * exp(-1)
@test iszero(@inferred(skce([1, 0], [false, false])))
end

@testset "Unbiased: Basic properties" begin
skce = UnbiasedSKCE((ExponentialKernel() ScaleTransform(0.1)) WhiteKernel())
estimates = Vector{Float64}(undef, 1_000)

# categorical distributions
for nclasses in (2, 10, 100)
dist = Dirichlet(nclasses, 1.0)

Expand All @@ -30,13 +39,26 @@
@test any(x -> x < zero(x), estimates)
@test mean(estimates) 0 atol = 1e-3
end

# probabilities
predictions = Vector{Float64}(undef, 20)
targets = Vector{Bool}(undef, 20)
for i in 1:length(estimates)
rand!(predictions)
map!(targets, predictions) do p
rand() < p
end
estimates[i] = skce(predictions, targets)
end

@test any(x -> x > zero(x), estimates)
@test any(x -> x < zero(x), estimates)
@test mean(estimates) 0 atol = 1e-3
end

@testset "Block: Two-dimensional example" begin
# Blocks of two samples
# categorical distributions
skce = BlockUnbiasedSKCE(SqExponentialKernel() WhiteKernel())

# only two predictions, i.e., one term in the estimator
@test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
@test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 1])))
@test @inferred(skce([[1, 0], [0, 1]], [2, 1])) -2 * exp(-1)
Expand All @@ -48,6 +70,21 @@
@test @inferred(skce(repeat([[1, 0], [0, 1]], 10), repeat([2, 1], 10)))
-2 * exp(-1)
@test iszero(@inferred(skce(repeat([[1, 0], [0, 1]], 10), repeat([2, 2], 10))))

# probabilities
skce = BlockUnbiasedSKCE(
(SqExponentialKernel() ScaleTransform(sqrt(2))) WhiteKernel()
)
@test iszero(@inferred(skce([1, 0], [true, false])))
@test iszero(@inferred(skce([1, 0], [true, true])))
@test @inferred(skce([1, 0], [false, true])) -2 * exp(-1)
@test iszero(@inferred(skce([1, 0], [false, false])))

# two predictions, ten times replicated
@test iszero(@inferred(skce(repeat([1, 0], 10), repeat([true, false], 10))))
@test iszero(@inferred(skce(repeat([1, 0], 10), repeat([true, true], 10))))
@test @inferred(skce(repeat([1, 0], 10), repeat([false, true], 10))) -2 * exp(-1)
@test iszero(@inferred(skce(repeat([1, 0], 10), repeat([false, false], 10))))
end

@testset "Block: Basic properties" begin
Expand All @@ -58,6 +95,7 @@
blockskce_all = BlockUnbiasedSKCE(kernel, nsamples)
estimates = Vector{Float64}(undef, 1_000)

# categorical distributions
for nclasses in (2, 10, 100)
dist = Dirichlet(nclasses, 1.0)

Expand All @@ -82,5 +120,28 @@
@test any(x -> x < zero(x), estimates)
@test mean(estimates) 0 atol = 5e-3
end

# probabilities
predictions = Vector{Float64}(undef, nsamples)
targets = Vector{Bool}(undef, nsamples)

for i in 1:length(estimates)
rand!(predictions)
map!(targets, predictions) do p
return rand() < p
end
estimates[i] = blockskce(predictions, targets)

# consistency checks
@test estimates[i] mean(
skce(predictions[(2 * i - 1):(2 * i)], targets[(2 * i - 1):(2 * i)]) for
i in 1:(nsamples ÷ 2)
)
@test skce(predictions, targets) == blockskce_all(predictions, targets)
end

@test any(x -> x > zero(x), estimates)
@test any(x -> x < zero(x), estimates)
@test mean(estimates) 0 atol = 5e-3
end
end

2 comments on commit a140a02

@devmotion
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/35591

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.5.17 -m "<description of version>" a140a02f828789be621d2fd61f2485b2a70744ed
git push origin v0.5.17

Please sign in to comment.