# DFITTS

In [1]:
library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.7.2     ✔ stringr 1.2.0
✔ readr   1.1.1     ✔ forcats 0.2.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
dat_HMW_1 <- tribble(
~M, ~W, ~H,
76.2, 156.8, 3398, 
64.8, 137.5, 3020, 
71.3, 114.1, 2988, 
60.2, 129.7, 2812,
69.6, 142.6, 3048, 
72.4,  97.1, 2962, 
58.0, 142.6, 2781, 
68.9, 129.4, 3236,
74.6, 142.6, 2912, 
70.1, 129.4, 3214, 
68.9, 128.3, 3135, 
70.8, 161.7, 3389,
69.1, 142.6, 3261, 
66.5, 129.4, 2908, 
62.1, 156.8, 3030, 
66.7, 137.5, 3063,
68.7, 128.3, 3139, 
71.2, 129.4, 2956, 
65.4, 142.6, 2996, 
72.4, 145.6, 3023,
70.4, 128.3, 3248, 
69.3, 129.4, 3001, 
69.1, 142.6, 3117, 
67.4, 145.6, 2841,
63.7, 142.6, 2891, 
69.6, 161.7, 3117, 
62.1, 114.1, 2667, 
66.2, 121.3, 2733,
73.5, 142.6, 3403, 
74.5, 121.3, 2808, 
61.3, 129.4, 2999, 
67.7,  97.1, 2813,
70.1, 137.5, 3318, 
57.5, 97.1, 2615, 
79.8, 121.3, 2989, 
70.4, 113.2, 2814,
61.3, 129.4, 3936) # 2936

dat_HMW_2 <- tribble(
~M, ~W, ~H,
76.2, 156.8, 3398, 
64.8, 137.5, 3020, 
71.3, 114.1, 2988, 
60.2, 129.7, 2812,
69.6, 142.6, 3048, 
72.4,  97.1, 2962, 
58.0, 142.6, 2781, 
68.9, 129.4, 3236,
74.6, 142.6, 2912, 
70.1, 129.4, 3214, 
68.9, 128.3, 3135, 
70.8, 161.7, 3389,
69.1, 142.6, 3261, 
66.5, 129.4, 2908, 
62.1, 156.8, 3030, 
66.7, 137.5, 3063,
68.7, 128.3, 3139, 
71.2, 129.4, 2956, 
65.4, 142.6, 2996, 
72.4, 145.6, 3023,
70.4, 128.3, 3248, 
69.3, 129.4, 3001, 
69.1, 142.6, 3117, 
67.4, 145.6, 2841,
63.7, 142.6, 2891, 
69.6, 161.7, 3117, 
62.1, 114.1, 2667, 
66.2, 121.3, 2733,
73.5, 142.6, 3403, 
74.5, 121.3, 2808, 
61.3, 129.4, 2999, 
67.7,  97.1, 2813,
70.1, 137.5, 3318, 
57.5, 97.1, 2615, 
79.8, 121.3, 2989, 
70.4, 113.2, 2814,
61.3, 129.4, 2936)

# DFITTS

DFITTS $= \frac{\hat{Y}_i - \hat{Y}_{i(-i)}}{\sqrt{MSE_{-i} h_{i}}}$

we could derive the equation and we can get

DFITTS $= r_i \big( \frac{h_i}{1 - h_i} \big)^{1/2}$

**calculate from definition $\frac{\hat{Y}_i - \hat{Y}_{i(-i)}}{\sqrt{MSE_{-i} h_{i}}}$**

In [15]:
i <- 1
dat1 <- dat_HMW_1
dat2 <- dat1[-i, ]

fit1 <- lm(H ~ M + W, data = dat1)
fit2 <- lm(H ~ M + W, data = dat2)

In [17]:
dat   <- dat_HMW_1
x     <- as.numeric(dat[i, c("M", "W")])
x     <- c(1, x)

beta1 <- fit1$coefficients
beta2 <- fit2$coefficients

print(x)
print(beta1)
print(beta2)

[1]   1.0  76.2 156.8
(Intercept)           M           W 
1536.509759   10.140896    6.156339 
(Intercept)           M           W 
1659.266464    8.942384    5.816461 


In [20]:
Y_full  <- crossprod(beta1, x)[1]
Y_neg   <- crossprod(beta2, x)[1]

print(Y_full - Y_neg)

[1] 21.86281


In [22]:
mse_neg <- 1 / fit2$df.residual * sum(fit2$residuals^2)
print(mse_neg)

[1] 53338.95


In [24]:
hi <- lm.influence(fit1)$hat[1]
print(hi)

        1 
0.1504638 


In [25]:
(Y_full - Y_neg) / (mse_neg * hi)^0.5

**calculate using the derived equation $r_i \big( \frac{h_i}{1 - h_i} \big)^{1/2}$**

In [63]:
# get the external studentized residual
e_full <- fit1$residual
e_neg  <- fit2$residuals
mse_neg <- 1 / fit2$df.residual * sum(e_neg^2)

hi <- lm.influence(fit1)$hat[1]
ei <- e_full[1]

print(ei)
print(hi)
print(mse_neg)

     1 
123.44 
        1 
0.1504638 
[1] 53338.95


In [28]:
ri * (hi / (1 - hi))^0.5

**calculate all**

In [45]:
formula

In [50]:
get_mse <- function(linear_model){
    e    <- linear_model$residuals
    df_e <- linear_model$df.residual
    mse  <- 1 / df_e * sum(e^2)
}

get_delete_lm <- function(str_formula, dat, idx){
    fit <- lm(formula(str_formula), data = dat[-idx,])
    return(fit)
}

In [54]:
dat <- dat_HMW_1
lst_lm <- lapply(1:nrow(dat), function(idx){
    return(get_delete_lm("H ~ M + W", dat, idx))
})
lst_mse <- lapply(lst_lm, get_mse)
lst_mse <- as.numeric(lst_mse)

In [55]:
h_i <- lm.influence(fit1)$hat
e_i <- fit1$residuals

length(lst_mse)
length(h_i)
length(e_i)

In [61]:
print(e_i[1])
print(h_i[1])
print(lst_mse[1])

     1 
123.44 
        1 
0.1504638 
[1] 53338.95


In [64]:
r_i <- e_i / (lst_mse * (1 - h_i))^0.5
print(r_i)

           1            2            3            4            5            6 
 0.579886056 -0.088718519  0.116694398 -0.608736315 -0.317904101  0.449191331 
           7            8            9           10           11           12 
-1.060125719  0.903382685 -1.187856269  0.750874078  0.482426540  0.643062346 
          13           14           15           16           17           18 
 0.644859117 -0.436690557 -0.474073372  0.015750791  0.508993419 -0.437088153 
          19           20           21           22           23           24 
-0.361205888 -0.644953129  0.922643410 -0.152704507  0.008173463 -1.243345353 
          25           26           27           28           29           30 
-0.760675366 -0.556843846 -0.926051220 -0.990285315  1.104545180 -1.059980424 
          31           32           33           34           35           36 
 0.198704776 -0.036788498  0.996711285 -0.515865909 -0.501099385 -0.600473160 
          37 
 6.867425403 


In [69]:
print(r_i * (h_i / (1 - h_i))^0.5)

           1            2            3            4            5            6 
 0.244043773 -0.018965507  0.033916551 -0.200420773 -0.064493486  0.218136198 
           7            8            9           10           11           12 
-0.465510773  0.155453210 -0.351622427  0.137930596  0.084328143  0.239241231 
          13           14           15           16           17           18 
 0.129090749 -0.077776799 -0.193046725  0.002896906  0.088435629 -0.088117670 
          19           20           21           22           23           24 
-0.081698345 -0.166115175  0.176029855 -0.026712876  0.001636200 -0.276190686 
          25           26           27           28           29           30 
-0.198388088 -0.204102987 -0.303013725 -0.210017850  0.295270731 -0.334131649 
          31           32           33           34           35           36 
 0.058456179 -0.016024217  0.185296230 -0.308122938 -0.252087492 -0.171047425 
          37 
 2.020300953 


# Built-in functions

In [44]:
res <- influence.measures(fit1)
print(res$infmat[, "dffit"])

           1            2            3            4            5            6 
 0.244043773 -0.018965507  0.033916551 -0.200420773 -0.064493486  0.218136198 
           7            8            9           10           11           12 
-0.465510773  0.155453210 -0.351622427  0.137930596  0.084328143  0.239241231 
          13           14           15           16           17           18 
 0.129090749 -0.077776799 -0.193046725  0.002896906  0.088435629 -0.088117670 
          19           20           21           22           23           24 
-0.081698345 -0.166115175  0.176029855 -0.026712876  0.001636200 -0.276190686 
          25           26           27           28           29           30 
-0.198388088 -0.204102987 -0.303013725 -0.210017850  0.295270731 -0.334131649 
          31           32           33           34           35           36 
 0.058456179 -0.016024217  0.185296230 -0.308122938 -0.252087492 -0.171047425 
          37 
 2.020300953 
