In [7]:
library(tidyverse)
library(generalhoslem)
library(IRdisplay)

In [32]:
source("./codesnippet_r/f_hosmerlem.R")

# import data

In [22]:
source("./data/flour_beetle.R")
display(flour)
display(flour_n1)

cs,batch,y,n
49.06,0,2,29
49.06,1,4,30
52.99,0,7,30
52.99,1,6,30
56.91,0,9,28
56.91,1,9,34
60.84,0,14,27
60.84,1,14,29
64.76,0,23,30
64.76,1,29,33


cs,n,y
49.06,1,1
49.06,1,1
49.06,1,1
49.06,1,1
49.06,1,1
49.06,1,1
52.99,1,1
52.99,1,1
52.99,1,1
52.99,1,1


In [28]:
flour_fit    = glm(cbind(y, n-y) ~ cs, data = flour,    family = binomial(link = "logit"))
flour_n1_fit = glm(y ~ cs,             data = flour_n1, family = binomial(link = "logit"), x = T, eps = 1e-10)

In [21]:
logitgof(obs = flour$y / flour$n, exp = flour_fit$fitted.values, g = 10)

“At least one cell in the expected frequencies table is < 1. Chi-square approximation may be incorrect.”


	Hosmer and Lemeshow test (binary model)

data:  flour$y/flour$n, flour_fit$fitted.values
X-squared = NaN, df = 8, p-value = NA


In [33]:
f_hosmerlem(flour$y / flour$n, flour_fit$fitted.values, g = 10)

In [34]:
logitgof(obs = flour_n1$y, 
         exp = flour_n1_fit$fitted.values, 
         g   = 8)

“Not possible to compute 8 rows. There might be too few observations.”


	Hosmer and Lemeshow test (binary model)

data:  flour_n1$y, flour_n1_fit$fitted.values
X-squared = 4.6967, df = 4, p-value = 0.3199


In [35]:
f_hosmerlem(y    = flour_n1$y, 
            yhat = flour_n1_fit$fitted.values, 
            g    = 8)

In [15]:
flour_fit$fitted.values

### Another approach to checking for linearity of effect for an ordinal variable

In [41]:
flour_ordinal = flour %>%
    mutate(ct = as.numeric(
        cut(
            cs, 
            breaks = quantile(
                flour$cs, 
                prob = seq(0, 1, by = 0.25)), 
                include.lowest = T)) - 1) %>%
    mutate(c0 = ifelse(ct == 0, 1, 0)) %>%
    mutate(c1 = ifelse(ct == 1, 1, 0)) %>%
    mutate(c2 = ifelse(ct == 2, 1, 0)) %>%
    mutate(c3 = ifelse(ct == 3, 1, 0))

In [None]:
flour_ordinal

In [42]:
flour_ordinal_fit01 = glm(cbind(y, n - y) ~ c1 + c2 + c3, data = flour_ordinal, family = binomial)
flour_ordinal_fit02 = glm(cbind(y, n - y) ~ ct + c2 + c3, data = flour_ordinal, family = binomial)
flour_ordinal_fit03 = glm(cbind(y, n - y) ~ ct,           data = flour_ordinal, family = binomial)

In [55]:
tmp = list(
    "c1_c2_c3" = flour_ordinal_fit01$coefficients,
    "ct_c2_c3" = flour_ordinal_fit02$coefficients,
    "ct"           = flour_ordinal_fit03$coefficients)

print(tmp)

$c1_c2_c3
(Intercept)          c1          c2          c3 
  -1.660731    1.212706    3.481478    6.456522 

$ct_c2_c3
(Intercept)          ct          c2          c3 
  -1.660731    1.212706    1.056065    2.818402 

$ct
(Intercept)          ct 
  -2.044451    1.895159 



**observation**

In [60]:
print(tmp$c1_c2_c3['c2'] - 2 * tmp$c1_c2_c3['c1'])
print(tmp$c1_c2_c3['c3'] - 3 * tmp$c1_c2_c3['c1'])

      c2 
1.056065 
      c3 
2.818402 


**observation**

In [62]:
anova(flour_ordinal_fit02, test = "Chisq")

Unnamed: 0,Df,Deviance,Resid. Df,Resid. Dev,Pr(>Chi)
,,,15,289.14129,
ct,1.0,266.2193483,14,22.92195,7.566708e-60
c2,1.0,0.1790424,13,22.7429,0.6721975
c3,1.0,6.607289,12,16.13561,0.01015622


**observation**

In [66]:
cat("", 
    flour_ordinal_fit01$deviance, "\n",
    flour_ordinal_fit02$deviance, "\n",
    flour_ordinal_fit03$deviance, "\n")

 16.13561 
 16.13561 
 22.92195 
