# PRESS Residuals

In [1]:
library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


# Import data

In [24]:
dat_HMW <- tribble(
~M, ~W, ~H,
76.2, 156.8, 3398, 
64.8, 137.5, 3020, 
71.3, 114.1, 2988, 
60.2, 129.7, 2812,
69.6, 142.6, 3048, 
72.4,  97.1, 2962, 
58.0, 142.6, 2781, 
68.9, 129.4, 3236,
74.6, 142.6, 2912, 
70.1, 129.4, 3214, 
68.9, 128.3, 3135, 
70.8, 161.7, 3389,
69.1, 142.6, 3261, 
66.5, 129.4, 2908, 
62.1, 156.8, 3030, 
66.7, 137.5, 3063,
68.7, 128.3, 3139, 
71.2, 129.4, 2956, 
65.4, 142.6, 2996, 
72.4, 145.6, 3023,
70.4, 128.3, 3248, 
69.3, 129.4, 3001, 
69.1, 142.6, 3117, 
67.4, 145.6, 2841,
63.7, 142.6, 2891, 
69.6, 161.7, 3117, 
62.1, 114.1, 2667, 
66.2, 121.3, 2733,
73.5, 142.6, 3403, 
74.5, 121.3, 2808, 
61.3, 129.4, 2999, 
67.7,  97.1, 2813,
70.1, 137.5, 3318, 
57.5, 97.1, 2615, 
79.8, 121.3, 2989, 
70.4, 113.2, 2814,
61.3, 129.4, 3936) # 2936

In [3]:
head(dat_HMW)

M,W,H
76.2,156.8,3398
64.8,137.5,3020
71.3,114.1,2988
60.2,129.7,2812
69.6,142.6,3048
72.4,97.1,2962


In [4]:
fit <- lm(H ~ M + W, data = dat_HMW)
fit


Call:
lm(formula = H ~ M + W, data = dat_HMW)

Coefficients:
(Intercept)            M            W  
   1536.510       10.141        6.156  


In [5]:
X <- model.matrix(fit)
H <- X %*% solve(t(X) %*% X) %*% t(X)

print(diag(H))
cat("-------------------\n")
print(lm.influence(fit)$hat)

         1          2          3          4          5          6          7 
0.15046380 0.04370126 0.07789403 0.09779817 0.03952969 0.19082498 0.16164825 
         8          9         10         11         12         13         14 
0.02875958 0.08056499 0.03264182 0.02964910 0.12158147 0.03852977 0.03074610 
        15         16         17         18         19         20         21 
0.14223376 0.03272021 0.02930312 0.03905585 0.04866858 0.06221103 0.03512185 
        22         23         24         25         26         27         28 
0.02969251 0.03852977 0.04702368 0.06368738 0.11843648 0.09671209 0.04304126 
        29         30         31         32         33         34         35 
0.06669559 0.09038515 0.07965194 0.15947117 0.03340704 0.26294919 0.20196525 
        36         37 
0.07505215 0.07965194 
-------------------
         1          2          3          4          5          6          7 
0.15046380 0.04370126 0.07789403 0.09779817 0.03952969 0.19082498 0.16164825

# PRESS residual (deleted residual)

deleted one model

In [6]:
get_hat_matrix <- function(linear_model){
    X <- model.matrix(linear_model)
    H <- X %*% solve(t(X) %*% X) %*% t(X)
    return(H)
}

Calculate PRESS residuals from definition

In [8]:
dat <- dat_HMW
fit1 <- lm(H ~ M + W, data = dat)
fit2 <- lm(H ~ M + W, data = dat[-1,])

lm_beta <- fit2$coefficients
print(lm_beta)

(Intercept)           M           W 
1659.266464    8.942384    5.816461 


In [9]:
y  <- dat[1, "H"]         %>% as.numeric
x  <- dat[1, c("M", "W")] %>% as.numeric
x  <- c(1, x) 
print(y)
print(x)

[1] 3398
[1]   1.0  76.2 156.8


In [10]:
crossprod(lm_beta, x)

0
3252.697


In [11]:
# 
y - crossprod(lm_beta, x)

0
145.3028


calculate PRESS residual by the formula $\frac{e_{i}}{1 - h_{ii}}$

In [12]:
# get e_1(-1) from full model
e <- fit1$residuals[1]
h <- lm.influence(fit1)$hat[1]
e / (1 - h)

# Compare with slides (First Example)

In [None]:
dat_HMW <- tribble(
~M, ~W, ~H,
76.2, 156.8, 3398, 
64.8, 137.5, 3020, 
71.3, 114.1, 2988, 
60.2, 129.7, 2812,
69.6, 142.6, 3048, 
72.4,  97.1, 2962, 
58.0, 142.6, 2781, 
68.9, 129.4, 3236,
74.6, 142.6, 2912, 
70.1, 129.4, 3214, 
68.9, 128.3, 3135, 
70.8, 161.7, 3389,
69.1, 142.6, 3261, 
66.5, 129.4, 2908, 
62.1, 156.8, 3030, 
66.7, 137.5, 3063,
68.7, 128.3, 3139, 
71.2, 129.4, 2956, 
65.4, 142.6, 2996, 
72.4, 145.6, 3023,
70.4, 128.3, 3248, 
69.3, 129.4, 3001, 
69.1, 142.6, 3117, 
67.4, 145.6, 2841,
63.7, 142.6, 2891, 
69.6, 161.7, 3117, 
62.1, 114.1, 2667, 
66.2, 121.3, 2733,
73.5, 142.6, 3403, 
74.5, 121.3, 2808, 
61.3, 129.4, 2999, 
67.7,  97.1, 2813,
70.1, 137.5, 3318, 
57.5, 97.1, 2615, 
79.8, 121.3, 2989, 
70.4, 113.2, 2814,
61.3, 129.4, 3936) # 2936

("/media/clint/Data/GitRepo/Statistics/BIOS705/Fig/lec03_diagnosis_data_one.bmp")

![fig](lec03_diagnosis_data_one_v2.bmp)

In [27]:
dat <- dat_HMW
fit <- lm(H ~ M + W, data = dat)
mse <- 1 / fit$df.residual * sum(fit$residuals^2)

e <- fit$residuals
h <- lm.influence(fit)$hat
press_residuals <- e / (1-h)
press <- sum(press_residuals^2)

dat <- dat_HMW
dat <- cbind(dat, press_residuals)
cat("PRESS: ", press, "\n")
cat("MSE:   ", mse, "\n")
print(dat)

PRESS:  2096946 
MSE:    52297.7 
      M     W    H press_residuals
1  76.2 156.8 3398      145.302794
2  64.8 137.5 3020      -21.056659
3  71.3 114.1 2988       28.202900
4  60.2 129.7 2812     -147.936843
5  69.6 142.6 3048      -75.182008
6  72.4  97.1 2962      115.560717
7  58.0 142.6 2781     -264.299200
8  68.9 129.4 3236      210.197410
9  74.6 142.6 2912     -281.601813
10 70.1 129.4 3214      175.718930
11 68.9 128.3 3135      113.282930
12 70.8 161.7 3389      158.278488
13 69.1 142.6 3261      151.705541
14 66.5 129.4 2908     -102.666217
15 62.1 156.8 3030     -118.416164
16 66.7 137.5 3063        3.717478
17 68.7 128.3 3139      119.452709
18 71.2 129.4 2956     -103.202501
19 65.4 142.6 2996      -85.793787
20 72.4 145.6 3023     -153.631166
21 70.4 128.3 3248      215.273658
22 69.3 129.4 3001      -35.972245
23 69.1 142.6 3117        1.934913
24 67.4 145.6 2841     -288.956953
25 63.7 142.6 2891     -180.899838
26 69.6 161.7 3117     -137.024916
27 62.1 114.1 2667   

# Compare with Slides (Second Example)

In [17]:
dat_HMW <- tribble(
~M, ~W, ~H,
76.2, 156.8, 3398, 
64.8, 137.5, 3020, 
71.3, 114.1, 2988, 
60.2, 129.7, 2812,
69.6, 142.6, 3048, 
72.4,  97.1, 2962, 
58.0, 142.6, 2781, 
68.9, 129.4, 3236,
74.6, 142.6, 2912, 
70.1, 129.4, 3214, 
68.9, 128.3, 3135, 
70.8, 161.7, 3389,
69.1, 142.6, 3261, 
66.5, 129.4, 2908, 
62.1, 156.8, 3030, 
66.7, 137.5, 3063,
68.7, 128.3, 3139, 
71.2, 129.4, 2956, 
65.4, 142.6, 2996, 
72.4, 145.6, 3023,
70.4, 128.3, 3248, 
69.3, 129.4, 3001, 
69.1, 142.6, 3117, 
67.4, 145.6, 2841,
63.7, 142.6, 2891, 
69.6, 161.7, 3117, 
62.1, 114.1, 2667, 
66.2, 121.3, 2733,
73.5, 142.6, 3403, 
74.5, 121.3, 2808, 
61.3, 129.4, 2999, 
67.7,  97.1, 2813,
70.1, 137.5, 3318, 
57.5, 97.1, 2615, 
79.8, 121.3, 2989, 
70.4, 113.2, 2814,
61.3, 129.4, 2936)

![Fig](lec03_diagnosis_data_two.bmp)

In [23]:
get_press <- function(linear_model){
    e <- linear_model$residuals
    h <- lm.influence(linear_model)$hat
    press_residuals <- e / (1-h)
    press <- sum(press_residuals^2)
    
    res <- list()
    res$press_residuals <- press_residuals
    res$press <- press
    return(res)
} # end func

dat <- dat_HMW
fit <- lm(H ~ M + W, data = dat)
mse <- 1 / fit$df.residual * sum(fit$residuals^2)
press <- get_press(fit)$press_residuals

dat <- cbind(dat, press)
cat("PRESS: ", get_press(fit)$press, "\n")
cat("MSE:   ", mse, "\n")
print(dat)

PRESS:  852966.3 
MSE:    21647.75 
      M     W    H      press
1  76.2 156.8 3398  102.30472
2  64.8 137.5 3020   33.54553
3  71.3 114.1 2988   33.21264
4  60.2 129.7 2812  -50.36861
5  69.6 142.6 3048  -59.44584
6  72.4  97.1 2962  112.72210
7  58.0 142.6 2781 -140.60281
8  68.9 129.4 3236  232.44958
9  74.6 142.6 2912 -306.69321
10 70.1 129.4 3214  188.58700
11 68.9 128.3 3135  135.65446
12 70.8 161.7 3389  163.15383
13 69.1 142.6 3261  171.39677
14 66.5 129.4 2908  -61.45871
15 62.1 156.8 3030  -35.46750
16 66.7 137.5 3063   42.69904
17 68.7 128.3 3139  143.38973
18 71.2 129.4 2956  -98.99045
19 65.4 142.6 2996  -36.19098
20 72.4 145.6 3023 -160.59523
21 70.4 128.3 3248  225.89989
22 69.3 129.4 3001  -16.84688
23 69.1 142.6 3117   21.62614
24 67.4 145.6 2841 -255.74185
25 63.7 142.6 2891 -116.63574
26 69.6 161.7 3117 -121.77161
27 62.1 114.1 2667 -140.39772
28 66.2 121.3 2733 -186.69805
29 73.5 142.6 3403  244.90509
30 74.5 121.3 2808 -276.17981
31 61.3 129.4 2999  134.59578
32 6