Skip to content

Commit

Permalink
add deepest regression estimator (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbytecode committed Aug 23, 2023
1 parent f7bac83 commit 313a007
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 36 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# v0.10.2 (Upcoming Release)
# v0.11.1 (Upcoming Release)


# v0.11.0

- Deepest Regression Estimator added.


# v0.10.2

- mahalanobisSquaredBetweenPairs() return Union{Nothing, Matrix} depending on the determinant of the covariance matrix
- mahalanobisSquaredMatrix() returns Union{Nothing, Matrix} depending on the determinant of the covariance matrix
Expand Down
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LinRegOutliers"
uuid = "6d4de0fb-32d9-4c65-aac1-cc9ed8b94b1a"
authors = ["Mehmet Hakan Satman <mhsatman@gmail.com>", "Shreesh Adiga <16567adigashreesh@gmail.com>", "Guillermo Angeris <angeris@stanford.edu>", "Emre Akadal <emre.akadal@istanbul.edu.tr>"]
version = "0.10.2"
version = "0.11.0"

[deps]
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
Expand All @@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
mrfDepth_jll = "53656f53-9700-50e7-bf9c-d3aea1338d1b"

[compat]
Clustering = "0.12.2, 0.13, 0.14, 0.15"
Expand All @@ -26,6 +27,7 @@ PrecompileTools = "1"
Requires = "1"
StatsModels = "0.4, 0.5, 0.6, 0.7"
julia = "1.4"
mrfDepth_jll = "1.0.14"

[extras]
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ A Julia package for outlier detection in linear regression.
- Hadi (1994) Algorithm
- Chatterjee & Mächler (1997)
- Theil-Sen estimator for multiple regression
- Deepest Regression Estimator
- Summary


## Unimplemented Methods
- Depth based estimators (Regression depth, deepest regression, etc.) See [#13](https://github.com/jbytecode/LinRegOutliers/issues/13) for the related issue.

- Pena & Yohai (1999). See [#25](https://github.com/jbytecode/LinRegOutliers/issues/25) for the related issue.


Expand Down
4 changes: 4 additions & 0 deletions docs/src/algorithms.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ LinRegOutliers.quantileregression
LinRegOutliers.theilsen
```

## Deepest Regression Estimator
```@docs
LinRegOutliers.deepestregression
```



Expand Down
71 changes: 38 additions & 33 deletions src/LinRegOutliers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,33 @@ using Requires
# After the module is loaded, we check if Plots is installed and loaded.
# If Plots is installed and loaded, we load the corresponding modules.
function __init__()
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin

import .Plots: RGBX
@require Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin

include("mveltsplot.jl")
include("dataimage.jl")
include("bchplot.jl")
import .Plots: RGBX

import .MVELTSPlot: mveltsplot
import .DataImage: dataimage
import .BCHPlot: bchplot

export mveltsplot, dataimage, bchplot, RGBX
include("mveltsplot.jl")
include("dataimage.jl")
include("bchplot.jl")

end
import .MVELTSPlot: mveltsplot
import .DataImage: dataimage
import .BCHPlot: bchplot

export mveltsplot, dataimage, bchplot, RGBX

end
end

# Basis
include("basis.jl")
import .Basis:
RegressionSetting,
createRegressionSetting,
@extractRegressionSetting,
applyColumns,
find_minimum_nonzero,
designMatrix,
responseVector
RegressionSetting,
createRegressionSetting,
@extractRegressionSetting,
applyColumns,
find_minimum_nonzero,
designMatrix,
responseVector
export RegressionSetting
export createRegressionSetting
export designMatrix
Expand Down Expand Up @@ -65,20 +65,20 @@ import .OrdinaryLeastSquares: OLS, ols, wls, residuals, predict, coef
# Regression diagnostics
include("diagnostics.jl")
import .Diagnostics:
dffit,
dffits,
dfbeta,
dfbetas,
hatmatrix,
studentizedResiduals,
adjustedResiduals,
jacknifedS,
cooks,
cooksoutliers,
mahalanobisSquaredMatrix,
covratio,
hadimeasure,
diagnose
dffit,
dffits,
dfbeta,
dfbetas,
hatmatrix,
studentizedResiduals,
adjustedResiduals,
jacknifedS,
cooks,
cooksoutliers,
mahalanobisSquaredMatrix,
covratio,
hadimeasure,
diagnose


# Hadi & Simonoff (1993) algorithm
Expand Down Expand Up @@ -205,6 +205,10 @@ import .CM97: cm97
include("theilsen.jl")
import .TheilSen: theilsen

# Deepest Regression Estimator
include("deepestregression.jl")
import .DeepestRegression: deepestregression

# All-in-one
include("summary.jl")
import .Summary: detectOutliers
Expand Down Expand Up @@ -267,6 +271,7 @@ export atkinson94, atkinsonstalactiteplot, generate_stalactite_plot
export bacon
export cm97
export theilsen
export deepestregression


# Snoop-Precompile
Expand Down
66 changes: 66 additions & 0 deletions src/deepestregression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
module DeepestRegression

export deepestregression

import ..Basis:
RegressionSetting, @extractRegressionSetting, designMatrix, responseVector

using mrfDepth_jll: mrfDepth_jll


"""
deepestregression(setting; maxit = 1000)
Estimate Deepest Regression paramaters.
# Arguments
- `setting::RegressionSetting`: RegressionSetting object with a formula and dataset.
- `maxit`: Maximum number of iterations
# Description
Estimates Deepest Regression Estimator coefficients.
# References
Van Aelst S., Rousseeuw P.J., Hubert M., Struyf A. (2002). The
deepest regression method. Journal of Multivariate Analysis,
81, 138-166.
# Output
- `betas`: Vector of regression coefficients estimated.
"""
function deepestregression(setting::RegressionSetting; maxit::Int = 10000)
X = designMatrix(setting)
y = responseVector(setting)
if all(x -> isone(x), X[:, 1])
X = X[:, 2:end]
end
return deepestregression(X, y, maxit = maxit)
end

function deepestregression(X::Matrix{Float64}, y::Vector{Float64}; maxit::Int = 10000)::Vector{Float64}
drdata = hcat(X, y)
n, p = size(drdata)
n = Int32(n)
p = Int32(p)
betas = zeros(Float64, p)
maxit = Int32(maxit)
iter = Int32(1)
MDEPAPPR = Int32(p)
ccall((:sweepmedres_, mrfDepth_jll.libmrfDepth),
Cint,
(Ref{Float64}, # X
Ref{Int32}, # n
Ref{Int32}, # np
Ref{Float64}, # betas
Ref{Cint}, # maxit
Ref{Cint}, # iter
Ref{Cint}, # MDEPAPPR
), drdata, n, p, betas, maxit, iter, MDEPAPPR)

return vcat(betas[end], betas[1:(end-1)])
end


end # end of module DeepestRegression
1 change: 1 addition & 0 deletions src/precompile/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ using PrecompileTools
smr98(reg)
ransac(reg, t = 0.8, w = 0.85)
theilsen(reg, 2, nsamples = 10)
deepestregression(reg)
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ include("testbacon2000.jl")
include("testdataimage.jl")
include("testtheilsen.jl")
include("testsummary.jl")
include("testdeepestregression.jl")
39 changes: 39 additions & 0 deletions test/testdeepestregression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import LinRegOutliers: DataSets

@testset "Deepest Regression" begin

@testset "Simple Data" begin
eps = 0.1

n = 100000
x1 = rand(n)
x2 = rand(n)
o = ones(Float64, n)
e = randn(n)
y = 15 .+ 10 .* x1 + 5 .* x2 + e
X = hcat(x1, x2)

result = deepestregression(X, y)

@test isapprox(result[1], 15, atol = eps)
@test isapprox(result[2], 10, atol = eps)
@test isapprox(result[3], 5, atol = eps)
end

@testset "Stackloss Data Example" begin

eps = 0.001

setting = createRegressionSetting(
@formula(stackloss ~ airflow + watertemp + acidcond),
DataSets.stackloss)

result = deepestregression(setting)

@test isapprox(result[1], -35.37610619, atol = eps)
@test isapprox(result[2], 0.82522124, atol = eps)
@test isapprox(result[3], 0.44247788, atol = eps)
@test isapprox(result[4], -0.07964602, atol = eps)

end
end

2 comments on commit 313a007

@jbytecode
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/90164

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.11.0 -m "<description of version>" 313a0070733efdc7e89bd6e48698c076dbea6e48
git push origin v0.11.0

Please sign in to comment.