Skip to content

Commit

Permalink
Merge 316adf8 into 8e125c9
Browse files Browse the repository at this point in the history
  • Loading branch information
holtri committed Nov 1, 2018
2 parents 8e125c9 + 316adf8 commit c0f7cc8
Show file tree
Hide file tree
Showing 16 changed files with 53 additions and 41 deletions.
14 changes: 9 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
language: julia
os:
# - osx
- linux
julia:
- 0.6
# - 0.7
# - nightly
- 0.7
- 1.0

matrix:
fast_finish: true
allow_failures:
- julia: 1.0

notifications:
email: false
addons:
apt_packages:
- gfortran
after_success:
- julia -e 'cd(Pkg.dir("SVDD")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
- julia -e 'import Pkg; cd(Pkg.dir("SVDD")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ The package has been developed as part of a benchmark suite for [active-learning
> Holger Trittenbach, Adrian Englhardt, Klemens Böhm, "An Overview and a Benchmark of Active Learning for One-Class Classification" [arXiv:1808.04759](https://arxiv.org/abs/1808.04759), 14 Aug 2018
## Installation
This package works with Julia 0.6. We plan to upgrade to the latest Julia version once all dependencies support 1.0.
This package is not registered yet. Please use the following command to clone the package.
This package works with Julia 1.0.
This package is not registered yet. Please use the following command to add the package with Pkg3.
```Julia
Pkg.clone("https://github.com/englhardt/SVDD.jl.git")
using Pkg
Pkg.add("https://github.com/englhardt/SVDD.jl.git")
```

The results presented in the paper base on a previous version of the package and on Julia 0.6.
To reproduce the experiment results from the paper, use the old package manager (with Pkg.clone) and checkout SVDD.jl at tag `v1.0`.

## Overview

[One-class classifiers](https://en.wikipedia.org/wiki/One-class_classification) learn to identify if objects belong to a specific class, often used for outlier detection.
Expand Down
3 changes: 2 additions & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
julia 0.6
julia 0.7
MLKernels
MLLabelUtils
JuMP
Ipopt
Distributions
StatsBase
Memento
Compat
1 change: 1 addition & 0 deletions src/SVDD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include("init_strategies/strategies_combined.jl")

using Memento
using Compat: @__MODULE__
using LinearAlgebra, Random

const LOGGER = getlogger(@__MODULE__)

Expand Down
16 changes: 8 additions & 8 deletions src/classifiers/classifier_ssad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,23 @@ end

function get_cy(model::SSAD)
cy = ones(size(model.data, 2))
haskey(model.pools, :Lout) && (cy[model.pools[:Lout]] = -1)
haskey(model.pools, :Lout) && (cy[model.pools[:Lout]] .= -1)
return cy
end

function calculate_rho(model::SSAD)
SV_candidates = find(model.alpha_values .> OPT_PRECISION)
SV_candidates_U = haskey(model.pools, :U)? SV_candidates model.pools[:U] : Int64[]
SV_candidates = findall(model.alpha_values .> OPT_PRECISION)
SV_candidates_U = haskey(model.pools, :U) ? SV_candidates model.pools[:U] : Int64[]
cy = get_cy(model)

if length(SV_candidates_U) > 0
scores = (model.alpha_values .* cy)' * model.K[:, SV_candidates_U]
sv = find(model.alpha_values[SV_candidates_U] .< model.C1 - OPT_PRECISION)
sv = findall(model.alpha_values[SV_candidates_U] .< model.C1 - OPT_PRECISION)
ρ = isempty(sv) ? maximum(scores) : minimum(scores[sv])
else
scores = model.K'model.alpha_values
SV_candidates_Lin = haskey(model.pools, :Lin) ? SV_candidates model.pools[:Lin] : Int64[]
SV_candidates_Lout = haskey(model.pools, :Lout)? SV_candidates model.pools[:Lout] : Int64[]
SV_candidates_Lout = haskey(model.pools, :Lout) ? SV_candidates model.pools[:Lout] : Int64[]
if length(SV_candidates_Lout) > 0 && length(SV_candidates_Lin) == 0
warn(LOGGER, "[CALCULATE_RHO] There are no labeled inlier SV -- check OPT_PRECISION.")
ρ = maximum(scores[SV_candidates_Lout])
Expand All @@ -98,13 +98,13 @@ end

function predict(model::SSAD, target::Array{T,2}) where T <: Real
model.state == model_fitted || throw(ModelStateException(model.state, model_fitted))
SV_candidates = find(model.alpha_values .> OPT_PRECISION)
SV_candidates = findall(model.alpha_values .> OPT_PRECISION)
function predict_observation(z)
k = vec(mapslices(x -> kernel(model.kernel_fct, z, x), model.data[:, SV_candidates], 1))
k = vec(mapslices(x -> kernel(model.kernel_fct, z, x), model.data[:, SV_candidates], dims=1))
model.alpha_values[SV_candidates]'k
end
# this is inverted from Goernitz such that outliers have positive margin to be consistent with SVDD
return model.ρ .- vec(mapslices(predict_observation, target, 1))
return model.ρ .- vec(mapslices(predict_observation, target, dims=1))
end

function fit!(model::SSAD, solver)
Expand Down
2 changes: 1 addition & 1 deletion src/classifiers/classifier_svdd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function predict(model::SVDDClassifier, target::Array{T,2}) where T <: Real
2 * sum(α[i] * kernel(model.kernel_fct, model.data[:,i], z) for i in eachindex(α)) +
model.const_term
end
return vec(sqrt.(mapslices(predict_observation, target, 1)) - model.R)
return vec(sqrt.(mapslices(predict_observation, target, dims=1)) .- model.R)
end

function get_R_and_const_term(model::SVDDClassifier)
Expand Down
4 changes: 2 additions & 2 deletions src/classifiers/classifier_svdd_neg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ end
function get_support_vectors(model::SVDDneg)
ULin = merge_pools(model.pools, :U, :Lin)
length(ULin) > 0 || throw(ModelInvariantException("SVDDneg requires samples in pool :Lin or :U."))
sv = filter!(x -> x in ULin, find((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C1 - OPT_PRECISION))))
sv = filter!(x -> x in ULin, findall((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C1 - OPT_PRECISION))))
if haskey(model.pools, :Lout)
sv = append!(sv, filter!(x -> x in model.pools[:Lout],
find((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C2 - OPT_PRECISION)))))
findall((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C2 - OPT_PRECISION)))))
end
return sv
end
2 changes: 1 addition & 1 deletion src/classifiers/classifier_svdd_vanilla.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ function solve!(model::VanillaSVDD, solver)
end

function get_support_vectors(model::VanillaSVDD)
find((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C - OPT_PRECISION)))
findall((model.alpha_values .> OPT_PRECISION) .& (model.alpha_values .< (model.C - OPT_PRECISION)))
end

get_alpha_prime(model::VanillaSVDD) = model.alpha_values
4 changes: 2 additions & 2 deletions src/init_strategies/strategies_gamma.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ calculate_gamma(model, strategy::FixedGammaStrategy) = strategy.kernel
Original publication:
Silverman, Bernard W. Density estimation for statistics and data analysis. Routledge, 2018.
"""
type RuleOfThumbSilverman <: InitializationStrategyGamma end
struct RuleOfThumbSilverman <: InitializationStrategyGamma end

function calculate_gamma(model, strategy::RuleOfThumbSilverman)
return (size(model.data, 2) * (size(model.data, 1) + 2) / 4.0)^(-1.0 / (size(model.data,1) + 4.0))
Expand All @@ -20,7 +20,7 @@ end
Original publication:
Scott, David W. Multivariate density estimation: theory, practice, and visualization. John Wiley & Sons, 2015.
"""
type RuleOfThumbScott <: InitializationStrategyGamma end
struct RuleOfThumbScott <: InitializationStrategyGamma end

function calculate_gamma(model, strategy::RuleOfThumbScott)
return size(model.data, 2)^(-1.0/(size(model.data,1) + 4))
Expand Down
10 changes: 5 additions & 5 deletions src/svdd_util.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@

function merge_pools(pools, names...)
(names[1] == [] || MLLabelUtils.islabelenc(collect(names), SVDD.learning_pool_enc)) || throw(ArgumentError("$(collect(names)) is not a valid label encoding."))
return reduce((r, key) -> vcat(r, haskey(pools, key) ? pools[key] : Int64[]), Int64[], unique(names))
return reduce((r, key) -> vcat(r, haskey(pools, key) ? pools[key] : Int64[]), unique(names); init=Int64[])
end

classify(x::Number) = x > 0 ? :outlier : :inlier

function adjust_kernel_matrix(K::Array{T, 2}; tolerance = 1e-15, warn_threshold = 1e-8) where T <: Real
info(LOGGER, "Adjusting Kernel Matrix.")
F = eigfact(K)
eltype(F[:values]) <: Complex && throw(ArgumentError("Matrix K has complex eigenvalues."))
F[:values][F[:values] .< tolerance] = 0.0
K_adjusted::Array{Float64, 2} = F[:vectors] * diagm(F[:values]) * inv(F[:vectors])
F = eigen(K)
eltype(F.values) <: Complex && throw(ArgumentError("Matrix K has complex eigenvalues."))
F.values[F.values .< tolerance] .= 0.0
K_adjusted::Array{Float64, 2} = F.vectors * diagm(0 => F.values) * inv(F.vectors)
K_diff = abs.(K_adjusted - K)
sum_adjustment = sum(K_diff)
max_adjustment = maximum(K_diff)
Expand Down
2 changes: 1 addition & 1 deletion test/REQUIRE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
julia 0.6
julia 0.7
Coverage
2 changes: 1 addition & 1 deletion test/classifiers/classifier_ssad_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
@testset "predict" begin
predictions = SVDD.predict(ssad, dummy_data)
@test length(predictions) == size(dummy_data, 2)
@test predictions[1] == SVDD.predict(ssad, [1.0 2.0]')[1]
@test predictions[1] == SVDD.predict(ssad, hcat([1.0, 2.0]))[1]
end

@testset "params" begin
Expand Down
2 changes: 1 addition & 1 deletion test/classifiers/classifier_svdd_vanilla_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
@testset "predict" begin
predictions = SVDD.predict(vanilla_svdd, dummy_data)
@test length(predictions) == size(dummy_data, 2)
@test predictions[1] == SVDD.predict(vanilla_svdd, [1.0 4.0]')[1]
@test predictions[1] == SVDD.predict(vanilla_svdd, hcat([1.0, 4.0]))[1]
end

@testset "params" begin
Expand Down
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ using SVDD
using Ipopt
using StatsBase, Distributions
using MLKernels, MLLabelUtils
using Base.Test
using Test
using LinearAlgebra, Random

TEST_SOLVER = IpoptSolver(print_level=0)

Expand Down
10 changes: 5 additions & 5 deletions test/svdd_util_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@

@testset "adjust kernel" begin
K1 = [2 -1 0; -1 2 -1; 0 -1 2]
@assert all(eig(K1)[1] .> 0)
@assert all(eigen(K1).values .> 0)
@test K1 SVDD.adjust_kernel_matrix(K1)

K2 = [1 2; 2 1]
@assert any(eig(K2)[1] .< 0)
@assert any(eigen(K2).values .< 0)
K2_adjusted = SVDD.adjust_kernel_matrix(K2, warn_threshold = 2)
@test !(K2 K2_adjusted)
@test all(eig(K2_adjusted)[1] .>= 0.0)
@test all(eigen(K2_adjusted).values .>= 0.0)

srand(42)
Random.seed!(42)
dummy_data, _ = generate_mvn_with_outliers(2, 100, 42, true, true)
model = SVDD.VanillaSVDD(dummy_data)
init_strategy = SVDD.FixedParameterInitialization(MLKernels.GaussianKernel(4), 0.1)
SVDD.initialize!(model, init_strategy)
K_old = copy(model.K)
@assert any(eig(model.K)[1] .< 0)
@assert any(eigen(model.K).values .< 0)

@test_throws ArgumentError SVDD.adjust_kernel_matrix([3 -2; 4 -1])
end
Expand Down
8 changes: 4 additions & 4 deletions test/test_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
function generate_mvn_with_outliers(n_dim, n_observations,
seed=123, normalized=true, incl_outliers=true)

srand(seed)
norm_distribution = MvNormal(zeros(n_dim), eye(n_dim))
Random.seed!(seed)
norm_distribution = MvNormal(zeros(n_dim), Matrix(1.0I, n_dim, n_dim))
inliers = rand(norm_distribution, n_observations)
tmp = [rand(MvNormal([x, y], eye(2)), 2) for x in [4,-4] for y in [4,-4]]
tmp = [rand(MvNormal([x, y], Matrix(1.0I, 2, 2)), 2) for x in [4,-4] for y in [4,-4]]
outliers = vcat(hcat(tmp...), zeros(n_dim - 2, 8))

if incl_outliers
Expand All @@ -16,7 +16,7 @@ function generate_mvn_with_outliers(n_dim, n_observations,
labels = vcat(fill("inlier", n_observations))
end
if normalized
x = mapslices(normalize, x, 2)
x = mapslices(normalize, x, dims=2)
end
return (x, labels)
end

0 comments on commit c0f7cc8

Please sign in to comment.