# set environment

In [1]:
library(tidyverse)
library(IRdisplay)

── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.0.0     ✔ purrr   0.2.5
✔ tibble  1.4.2     ✔ dplyr   0.7.6
✔ tidyr   0.8.1     ✔ stringr 1.3.1
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [2]:
source("./data/beetle.R")
beetle

dose,n,y,n_y
1.6907,59,6,53
1.7242,60,13,47
1.7552,62,18,44
1.7842,56,28,28
1.8113,63,52,11
1.8369,59,53,6
1.861,62,61,1
1.8839,60,60,0


In [5]:
fit_logit    = glm(cbind(y, n - y) ~ dose,             data = beetle, family = binomial(link = "logit"))
fit_logit_sq = glm(cbind(y, n - y) ~ dose + I(dose^2), data = beetle, family = binomial(link = "logit"))

fit_cloglog    = glm(cbind(y, n - y) ~ dose,             data = beetle, family = binomial(link = "cloglog"))
fit_cloglog_sq = glm(cbind(y, n - y) ~ dose + I(dose^2), data = beetle, family = binomial(link = "cloglog"))

mat = matrix(c(NA, NA, NA, NA), 2, 2)
rownames(mat) = c("linear", "square")
colnames(mat) = c("logit", "cloglog")

mat["linear", "logit"  ] = fit_logit$aic
mat["linear", "cloglog"] = fit_logit_sq$aic
mat["square", "logit"  ] = fit_cloglog$aic
mat["square", "cloglog"] = fit_cloglog_sq$aic

display(mat)

Unnamed: 0,logit,cloglog
linear,41.43027,35.39294
square,33.64448,35.60866


In [6]:
dG = function(p, alpha){
    log(1 - p) / ((1 - p)^alpha - 1) - alpha^{-1}
} # end func

tmp   = beetle
tmp$z = dG(fit_logit$fitted.values, 1)
display(tmp)

dose,n,y,n_y,z
1.6907,59,6,53,0.030498
1.7242,60,13,47,0.09225343
1.7552,62,18,44,0.24159057
1.7842,56,28,28,0.53584036
1.8113,63,52,11,0.9940139
1.8369,59,53,6,1.58567955
1.861,62,61,1,2.25112322
1.8839,60,60,0,2.9483049


In [10]:
fit_link = glm(cbind(y, n - y) ~ dose + z, data = tmp, family = binomial(link = "logit"))
anova(fit_link, test = "Chisq")

Unnamed: 0,Df,Deviance,Resid. Df,Resid. Dev,Pr(>Chi)
,,,7,284.202449,
dose,1.0,272.970218,6,11.232231,2.556089e-61
z,1.0,8.279424,5,2.952807,0.004009684


Here we try loglog link with the addition of square term

The loglog link is able to take care of the non-linear relationship. Therefore, the square term is not really significant

In [11]:
fit_link = glm(cbind(y, n - y) ~ dose + I(dose^2) + z, data = tmp, family = binomial(link = "logit"))
anova(fit_link, test = "Chisq")

Unnamed: 0,Df,Deviance,Resid. Df,Resid. Dev,Pr(>Chi)
,,,7,284.202449,
dose,1.0,272.9702184,6,11.232231,2.556089e-61
I(dose^2),1.0,8.0373258,5,3.194905,0.004582314
z,1.0,0.2851347,4,2.909771,0.5933553


In [12]:
fit_link = glm(cbind(y, n - y) ~ dose + z, data = tmp, family = binomial(link = "cloglog"))
anova(fit_link, test = "Chisq")

Unnamed: 0,Df,Deviance,Resid. Df,Resid. Dev,Pr(>Chi)
,,,7,284.202449,
dose,1.0,280.75601075,6,3.446439,5.13865e-63
z,1.0,0.01965545,5,3.426783,0.8885036


In [13]:
fit_link = glm(cbind(y, n - y) ~ dose + I(dose^2) + z, data = tmp, family = binomial(link = "cloglog"))
anova(fit_link, test = "Chisq")

Unnamed: 0,Df,Deviance,Resid. Df,Resid. Dev,Pr(>Chi)
,,,7,284.202449,
dose,1.0,280.7560107,6,3.446439,5.13865e-63
I(dose^2),1.0,0.0358181,5,3.410621,0.8498916
z,1.0,0.3023616,4,3.108259,0.5824057
