# WinVector/RcppDynProg

Fetching contributors…
Cannot retrieve contributors at this time
269 lines (217 sloc) 7.52 KB

# Linear Segmentation

John Mount 2019-01-01

In this example we fit a piecewise linear function to example data. Please see here for a discussion of the methodology.

```library("RcppDynProg")
library("ggplot2")

set.seed(2018)
g <- 100
d <- data.frame(
x = 0.05*(1:(3*g))) # ordered in x
d\$y_ideal <- sin((0.3*d\$x)^2)
d\$y_observed <- d\$y_ideal + 0.25*rnorm(length(d\$y_ideal))

plt1 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
ylab("y") +
ggtitle("raw data",
subtitle = "dots: observed values, dashed line: unobserved true values")
print(plt1)```

```x_cuts <- solve_for_partition(d\$x, d\$y_observed, penalty = 1)
print(x_cuts)```
``````##        x       pred group  what
## 1   0.05 -0.1570880     1  left
## 2   4.65  1.1593754     1 right
## 3   4.70  1.0653666     2  left
## 4   6.95 -0.9770792     2 right
## 5   7.00 -1.2254925     3  left
## 6   9.20  0.8971391     3 right
## 7   9.25  1.3792437     4  left
## 8  11.10 -1.1542021     4 right
## 9  11.15 -1.0418353     5  left
## 10 12.50  1.1519490     5 right
## 11 12.55  1.3964906     6  left
## 12 13.75 -1.2045219     6 right
## 13 13.80 -1.3791405     7  left
## 14 15.00  1.0195679     7 right
``````
```d\$estimate <- approx(x_cuts\$x, x_cuts\$pred, xout = d\$x, method = "linear", rule = 2)\$y
d\$group <- as.character(findInterval(d\$x, x_cuts[x_cuts\$what=="left", "x"]))```
`print(sum((d\$y_observed - d\$y_ideal)^2))`
``````## [1] 20.42462
``````
`print(sum((d\$estimate - d\$y_ideal)^2))`
``````## [1] 3.536541
``````
`print(sum((d\$estimate - d\$y_observed)^2))`
``````## [1] 20.53796
``````
```plt2 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed, color = group)) +
geom_line(aes(y = estimate, color = group)) +
ylab("y") +
ggtitle("RcppDynProg piecewise linear estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt2)```

`use_vtreat <- requireNamespace("vtreat", quietly = TRUE)`

Here we show many of the `vtreat` custom variable coders. An application of these coders can be found here.

```spline_variable <- function(varName, x, y, w = NULL) {
if(is.null(w)) {
w <- numeric(n) + 1
}
nknots <- 100
spline <- stats::smooth.spline(x, y,
w = w,
nknots = nknots,
keep.data = FALSE,
keep.stuff = FALSE,
cv = TRUE)\$fit
estimate <- stats::predict(spline, x)\$y
return(estimate)
}

customCoders = list('c.PiecewiseV.num' = vtreat::solve_piecewise,
'n.PiecewiseV.num' = vtreat::solve_piecewise,
'c.knearest.num' = vtreat::square_window,
'n.knearest.num' = vtreat::square_window,
'c.spline.num' = spline_variable,
'n.spline.num' = spline_variable,
'c.PiecewiseLin.num' = RcppDynProg::piecewise_linear,
'n.PiecewiseLin.num' = RcppDynProg::piecewise_linear,
'c.PiecewiseC.num' = RcppDynProg::piecewise_constant,
'n.PiecewiseC.num' = RcppDynProg::piecewise_constant)
codeRestriction = c("PiecewiseV",
"knearest",
"spline",
"PiecewiseLin",
"PiecewiseC")

trt <- vtreat::designTreatmentsN(
d,
'x', 'y_observed',
customCoders = customCoders,
codeRestriction = codeRestriction,
verbose = FALSE)
knitr::kable(trt\$scoreFrame[, c("varName", "rsq", "sig")])```
varName rsq sig
x_PiecewiseV 0.8064672 0
x_knearest 0.7415207 0
x_spline 0.8486192 0
x_PiecewiseLin 0.8380624 0
x_PiecewiseC 0.6652028 0
```dt <- vtreat::prepare(trt, d)
dt\$y_observed <- NULL
d <- cbind(d, dt)```
`print(sum((d\$x_PiecewiseV - d\$y_ideal)^2))`
``````## [1] 5.336369
``````
`print(sum((d\$x_PiecewiseV - d\$y_observed)^2))`
``````## [1] 25.18626
``````
`print(sum((d\$x_knearest - d\$y_ideal)^2))`
``````## [1] 3.825642
``````
`print(sum((d\$x_knearest - d\$y_observed)^2))`
``````## [1] 22.8239
``````
`print(sum((d\$x_PiecewiseLin - d\$y_ideal)^2))`
``````## [1] 3.536541
``````
`print(sum((d\$x_PiecewiseLin - d\$y_observed)^2))`
``````## [1] 20.53796
``````
`print(sum((d\$x_spline - d\$y_ideal)^2))`
``````## [1] 1.002791
``````
`print(sum((d\$x_spline - d\$y_observed)^2))`
``````## [1] 19.3061
``````
```plt3 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
geom_line(aes(y = x_PiecewiseV)) +
ylab("y") +
ggtitle("vtreat PiecewiseV estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt3)```

```plt4 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
geom_line(aes(y = x_knearest)) +
ylab("y") +
ggtitle("vtreat k-nearest estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt4)```

```plt5 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
geom_line(aes(y = x_PiecewiseLin)) +
ylab("y") +
ggtitle("vtreat RcppDynProg piecewise linear estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt5)```

```plt6 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
geom_line(aes(y = x_PiecewiseC)) +
ylab("y") +
ggtitle("vtreat RcppDynProg piecewise constant estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt6)```

```plt7 <- ggplot(data= d, aes(x = x)) +
geom_line(aes(y = y_ideal), linetype=2) +
geom_point(aes(y = y_observed)) +
geom_line(aes(y = x_spline)) +
ylab("y") +
ggtitle("spline estimate",
subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") +
theme(legend.position = "none")
print(plt7)```