-
Notifications
You must be signed in to change notification settings - Fork 0
/
05_BHF_estimation.R
126 lines (97 loc) · 4.92 KB
/
05_BHF_estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# BHF estimation --------------------------------------------------------------------
# NOTE: this code relies to the R-file "7 eusilc - EBLUP example.R"
# (Direct estimation and Fay-Herriot)
# Imputation of unit level variables ------------------------------------------------
md.pattern(df_rent_unit)
# df_rent_unit$heat
# df_rent_unit$eqpinsul
# df_rent_unit$eqpfhea
# df_rent_unit$electr
# df_rent_unit$util
# subset data
df_rent_unit_BHF <- subset(df_rent_unit, select = c( "bezirk", "livingdur", "rent_unweighted",
"size", "room", "hh_personen", "hh_kinder",
"hh_inc","heat", "eqpinsul","eqpnobar" ,
"eqppark", "eqpmglass", "eqpnrj", "bgsampreg",
"eqplif","eqpair" ,"eqpsol" ,"eqpalm" ,
"eqpgar" ,"eqpter", "eqpfhea", "electr"))
# imputation of NAs
df_rent_unit_BHF_imp_mice <- mice(df_rent_unit_BHF, m=1, maxit=50, meth='pmm', seed=500)
df_rent_unit_BHF_imp <- complete(df_rent_unit_BHF_imp_mice, 1)
# check imputation
md.pattern(df_rent_unit_BHF_imp)
# Selection of area level variables -------------------------------------------------
# NOTE: we take the same Covariates as for FH estimation
Xmean_rent <- subset(df_rent_area, select = c("bezirk", "shareForeigners", "shareElderly", #"shareFamChildrHH",
"quoted_net_rent_EUR_per_m2", "shareFamHighHIncHH"))
Popsize_rent <- bezirk_size[,2:3]
# Estimation of BHF -----------------------------------------------------------------
# NOTE: REML: Verzerrung der gesch?tzten Varianzkomponenten verringert (vgl. Harville (1977))
fit_rent_BHF <- eblupBHF(formula = as.numeric(rent_unweighted) ~ size + room +
hh_kinder + hh_inc + heat + livingdur,
dom = bezirk, data = df_rent_unit_BHF_imp,
meanxpop = Xmean_rent, popnsize = Popsize_rent, method = "REML")
fit_rent_BHF
fit_rent_BHF$eblup$eblup
fit_rent_BHF_mse <- pbmseBHF(formula = as.numeric(rent_unweighted) ~ size + room +
hh_kinder + hh_inc + heat + livingdur,
dom = bezirk, data = df_rent_unit_BHF_imp,
meanxpop = Xmean_rent, popnsize = Popsize_rent, method = "REML")
# NOTE: Before bringing together EBLUP + direct, reorder results of fit_rent_EBLUP according
# to order of bezirk_name in fit_rent_direct
# sqrt MSE
fit_rent_BHF_sqrtmse <- sqrt(fit_rent_BHF_mse$mse$mse)
# cv
fit_rent_BHF_cv <- 100*sqrt(fit_rent_BHF_mse$mse$mse)/fit_rent_BHF$eblup$eblup
# putting above BHF results together
df_rent_results_BHF <- data.frame(
bezirk = fit_rent_BHF$eblup$domain,
sampsize = fit_rent_BHF$eblup$sampsize,
eblup_BHF = fit_rent_BHF$eblup$eblup,
mse_BHF = fit_rent_BHF_mse$mse$mse,
sqrtmse_BHF = fit_rent_BHF_sqrtmse,
cv_BHF = fit_rent_BHF_cv)
# merge bezirk_name
df_rent_results_BHF <- plyr::join(df_rent_results_BHF, bezirk_size[,1:2],
by = 'bezirk', type = 'left', match = 'all')
df_rent_results_BHF
# bring all results together (Direct, FH, BHF)
df_rent_results <- plyr::join(df_rent_results, df_rent_results_BHF,
by = 'bezirk', type = 'left', match = 'all')
df_rent_results[,11:12] <- NULL # redundant
df_rent_results
## goodness of fit
# AIC
k = (length(fit_rent_BHF$fit$fixed) - 1)
lnL = fit_rent_BHF$fit$loglike
AIC = 2*k - 2*lnL
AIC
# BIC
ln_n = log(dim(df_rent_unit)[1])
BIC = ln_n*k - 2*lnL
BIC
# absolute bias
BHF_goodnessfit_help <- data.frame(df_rent_unit$rent_unweighted, df_rent_unit$bezirk)
colnames(BHF_goodnessfit_help) <- c("rent_unweighted", "bezirk")
BHF_goodnessfit <- plyr::join(BHF_goodnessfit_help, df_rent_results,
by = 'bezirk', type = 'left', match = 'all')
names(BHF_goodnessfit)
# absolute bias
mean(Metrics::ae(BHF_goodnessfit$rent_unweighted, BHF_goodnessfit$eblup_BHF))
mean(BHF_goodnessfit$eblup_BHF - BHF_goodnessfit$rent_unweighted)
# relative bias
mean(((BHF_goodnessfit$eblup_BHF - BHF_goodnessfit$rent_unweighted) / BHF_goodnessfit$rent_unweighted) * 100)
# root mean squared error
mean(Metrics::rmse(BHF_goodnessfit$rent_unweighted, BHF_goodnessfit$eblup_BHF))
# relative root mean squared error
(sqrt(mean((BHF_goodnessfit$eblup_BHF - BHF_goodnessfit$rent_unweighted)^2)) * 100) / sum(BHF_goodnessfit$rent_unweighted)
# random effects BHF eblub
fit_rent_BHF_ref <- fit_rent_BHF$fit$random
fit_rent_BHF_residuals <- fit_rent_BHF$fit$residuals
# clean up
rm(list = c("fit_rent_BHF_cv", "fit_rent_BHF_sqrtmse",
"fit_rent_BHF_mse", "df_rent_unit_BHF_imp_mice",
"df_rent_results_BHF", "df_rent_unit_BHF", "curWarnings",
"BHF_goodnessfit_help", "BHF_goodnessfit"))
print("-- BHF estimation done --")
###