diff --git a/.Rbuildignore b/.Rbuildignore index 67a4ffb..031e872 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,7 +4,10 @@ ^.*\.yml$ SOP_release.txt buildCRAN +checks.R copy_Rout_to_Routsave.R vignettes/gamboostLSS_Tutorial.tex vignettes/auto vignettes/graphics +vignettes/fig-crossvalidation.pdf +vignettes/gamboostLSS_Tutorial_CRAN.Rnw \ No newline at end of file diff --git a/.RbuildignoreCRAN b/.RbuildignoreCRAN index 0d34a40..084a6ee 100644 --- a/.RbuildignoreCRAN +++ b/.RbuildignoreCRAN @@ -4,9 +4,12 @@ ^.*\.yml$ SOP_release.txt buildCRAN +checks.R +copy_Rout_to_Routsave.R tests/.*\.Rout\.save$ tests/Examples vignettes/gamboostLSS_Tutorial.tex vignettes/auto vignettes/graphics vignettes/.*\.Rout\.save$ +vignettes/cvrisk diff --git a/ChangeLog b/ChangeLog index a088602..e9e4821 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,1152 @@ -commit 73b484b0b606df5cdaaaef2cf2287b3a8f2f0521 +commit cd7944efd0b9660cc273d0d120891b9d88b216b0 +Author: Benjamin Hofner + + Release gamboostLSS 2.0-1 + +commit 1cb93d2b89db47090470741e251fbb5766ae59f3 +Author: Benjamin Hofner + + Update .Rout.save + +commit a462d46caca91fe1869c8c6d82c379df469647f2 +Author: Benjamin Hofner + + Updated Thomas et al to final reference. Closes #47 once more. + +commit 00145b64e3df3ca2ca59ecdad2fbf739c7040e13 +Author: Benjamin Hofner + + fix merge conflict + +commit 6af980e94b1d594f9a3a26bff13fc38e54aac742 +Merge: 09af4f0 13e9b2d +Author: Benjamin Hofner + + Merge branch 'master' of https://github.com/boost-R/gamboostLSS + +commit 09af4f04ebf66aa66b2712c1f9838aca10af99be +Author: Benjamin Hofner + + export predict.mboostLSS + +commit 13e9b2d094e952382e52f97613ad5b5ce94604f0 +Author: Benjamin Hofner + + fix citation + +commit 780e8abaf1970eaf3c3aaf6576fce55b49e32680 +Author: Benjamin Hofner + + README + +commit 6d385df8f5d82ab0834e79792b986a1e8a75fd78 +Author: Benjamin Hofner + + Update reference for non-cyclic boosting (closes #47) + +commit 70a30c47e9c6e6112a1b31cd5f871a9356c7541c +Author: Benjamin Hofner + + Update DESCRIPTION + +commit 3449299f6b2c848d1ceb854dfd46cccbbe59fcd5 +Author: Benjamin Hofner + + Updated further manuals regarding various classes of results + +commit c962e3b4766f43e562fa03a2e94cd95ecb4994a4 +Author: Benjamin Hofner + + updated cvrisk manual (closes #46) + +commit 7f0b37cdb917d558f97595f6effd8a22839a86ad +Author: Benjamin Hofner + + streamlined interfaces of cvrisk + +commit 8750ae0652bf71fe21ade698de01fe13404347e1 +Author: Benjamin Hofner + + export various plot functions and cvrisk (see #46) + +commit 95b48b944ab9c139abc5520a889f66d2a49d9307 +Author: Benjamin Hofner + + Release gamboostLSS 2.0-0 + +commit d903d34da5ed1cbef1be1914d3f1e4628ed0baba +Author: Benjamin Hofner + + Updated release SOPs to reflect new repository structure + +commit 54dbcd9d9664078798b455586f37477e63b13248 +Author: Benjamin Hofner + + Updated check results + +commit 8b7001081b225aca47563683d32a3685c6c55987 +Author: Benjamin Hofner + + Also update inst/CITATION + +commit 8c0d47564d2a477105f3287251c978fea54ac678 +Author: Benjamin Hofner + + Updated maintainer's email address + +commit b259544ad6accd0874e31c42b288d0380179ef25 +Author: Benjamin Hofner + + Updated reference to Thomas et al (2017). Closes #45 + +commit 2de0e6c9944fd24c54fd365216f91e5508a9ff8c +Merge: 2599aed 8333592 +Author: Benjamin Hofner + + Merge pull request #38 from sbrockhaus/master + +commit 833359289f9b9946353caf3b3f06faa08ba43582 +Merge: ef96def 2599aed +Author: sbrockhaus + + Merge remote-tracking branch 'upstream/master' + +commit ef96defb1708701433c62ce39ddab4fdfaa5cebd +Author: sbrockhaus + + report in NEWS that mboostLSS_fit() is adapted for FDboostLSS() + +commit 2599aed3de6884b9fc2cca45879477ebf5b5ca1f +Author: Benjamin Hofner + + Updated SOPs for CRAN upload regarding vignette + +commit 658599c7add6cfefb24bee0653ae6dfc1cccef25 +Merge: 88449e3 36aee6e +Author: Benjamin Hofner + + Merge pull request #44 from boost-R/fix_stabsel_plot + +commit 95c79f2d97ec02ed8c88adfadabcf37b56f525ea +Author: sbrockhaus + + use fitted.mboost() as fitted.FDboost() returns matrix + +commit 36aee6ed2de9cef84f9d79c83429764fadf542e5 +Author: ja-thomas + + Update mboostLSS.R + +commit ecc63ed3afd5ce7e022d533a81ecacea1172f7c6 +Author: ja-thomas + + Update regtest-noncyclic_fitting.R + +commit 71ad6e9dbcb48dbc5486655ca59021956edb90c8 +Author: Janek + + add additional test + +commit fa8bcd32b2beb40471fe15157c366531eb2e1c45 +Author: Janek + + fix stabsel plot + +commit d1dbb32bc42664bdc123dbb7e2029bc62259308f +Merge: e04a6c4 88449e3 +Author: sbrockhaus + + Merge remote-tracking branch 'upstream/master' + +commit e04a6c4a7370cfd14bd2f68321d143cdfb33383c +Author: sbrockhaus + + fix computation of offset + +commit 88449e33822f9b197147c3eee7fef07aa70890f8 +Author: Benjamin Hofner + + check results of stabsel + +commit cf6ae9c669674a3d3e3e069d26bd00a06e2c9003 +Author: Benjamin Hofner + + fix summary. closes #43. + +commit 57117da7d03effb1b788d3f7f1daba7da782eb9b +Merge: b66755c 1e606c7 +Author: Benjamin Hofner + + Merge pull request #41 from boost-R/fix_selected + +commit fe83199db8aa4b4199be0e30b5f8efb0a9ffce46 +Author: sbrockhaus + + new argument allow_matrix = FALSE in check_y_family() to allow for matrix response + +commit 6243369c152d5191c85ac5c3e670f2b1f321b82e +Author: sbrockhaus + + argument timeformula into dots, check_timeformula into helpers + +commit 1e606c72e9db6776a87b17e89291e744280665ec +Author: Janek + + fix selected and adapt tests + +commit bc826eae98b81915baef6ee1341bc2838c04c088 +Merge: cac3809 b66755c +Author: sbrockhaus + + solve conflict in manual + +commit b66755c06da6a6e130eede3dae1956b8a3cce149 +Author: Benjamin Hofner + + Updated tests + +commit f78a0aad44c3a657859d807ee73fada011f1cd73 +Author: Benjamin Hofner + + More tests + +commit d7a21ad784c561d734d33f3306ba1f13e82418bc +Author: Benjamin Hofner + + Started fixing stabsel (see for open issues) + +commit ccb833f17f62354f2de873a966b8008f733e26e4 +Author: Benjamin Hofner + + Update Readme + +commit 90cede61fc295c81063f00c62167dffefcaa5c63 +Author: Benjamin Hofner + + Add reverse dependency checks + +commit 3084a39a834c628a056557c2432130c7d6f45784 +Author: Benjamin Hofner + + make sure the repository is set + +commit 1f66eba5a0673726d9b04d042d2190653513bb30 +Author: Benjamin Hofner + + Fixes typo + +commit 9c8cc8a17b703c0c286b910fcc6fc2a8825943de +Author: sbrockhaus + + give more details in manual + +commit cac3809d1d7586416d39c7c3fec0d77b48b675d2 +Author: sbrockhaus + + give more details in manual + +commit a16eb633de9b6999698b012e0bc8fe86a704f6e3 +Author: sbrockhaus + + adapt mboostLSS_fit() such that it works with FDboost to get FDboostLSS + +commit 814c60d90e56aaeec91a7d1ea110e073d5440376 +Author: Benjamin Hofner + + Updated NEWS.Rd + +commit c2db923cf84db32e6e89db644ee7ef4d684d35e8 +Author: Benjamin Hofner + + cosmetics + +commit 8ef6fb1f9de3e7d3fa848f4b185afa1629cf5c54 +Author: Benjamin Hofner + + package now requires mboost >= 2.8-0 + +commit e29d8d1c63b42aff4cefd948562e99eaf5fcd3bf +Author: Benjamin Hofner + + Updated readme + +commit abf76529b23d2897d7f141eff3c3e0d225c0f68d +Author: Benjamin Hofner + + risk(, merge = TRUE) did not correct for new offset risk. Fixes #37 + +commit 6ab3cc0cef24648c65414c8aea4db8cc5b4aad31 +Author: Benjamin Hofner + + cosmetics + +commit 87dc35acaa93f9ec70a8e97ae5a09014ee654caa +Author: Benjamin Hofner + + Updated references and really refer to Thomas et al in the manuals + +commit 9e4238441ce7adbedadfdb226e402b70b3df9382 +Author: Benjamin Hofner + + Updated DESCIPTION + +commit c88eaa08f07c5d1a9cdb7f287224842169bdfec9 +Author: Benjamin Hofner + + revert unwanted changes in README + +commit 3602b027644ea9e52af63efdd119a10885d19a11 +Merge: b0263d8 59566be +Author: Benjamin Hofner + + Merge pull request #36 from boost-R/devel + +commit 59566be7abd6d4004e4739774732f6afa5bc5dd8 +Author: Benjamin Hofner + + Use trusty distribution on travis + +commit af370568e8c0a7158564b5af81b8288f2080f9c2 +Author: Benjamin Hofner + + Update NEWS.Rd + +commit 880294c5978d11251b68999e9700c8a39a7a8f84 +Merge: 8dc9450 b0263d8 +Author: Benjamin Hofner + + Merge recent changes from master branch to devel + +commit 8dc9450b2288f42add3cc33b39e3284654a7637e +Author: Benjamin Hofner + + changed order of suggested packages + +commit 0ae4dc73b7ee837ce1cf41b095cc2b581f8ef60b +Author: Benjamin Hofner + + typo + +commit ce94c7b8fe65c799b34dddfc45d2662bc1832458 +Author: Benjamin Hofner + + re-indent code + +commit bb9a1da0f489f2f3be5d9bfbe9eb7fabfd15849e +Merge: 4f21eb6 6309410 +Author: Benjamin Hofner + + Merge pull request #31 from boost-R/refactor_noncyc + +commit 6309410fc4538f1a4bac3e15c673185bc9c78d06 +Author: Janek + + initialize combined_risk in subset + +commit 4c2d159f4ecfbddba851eda3e85a3ab67d42e1f0 +Author: Janek + + remove notes + +commit 86ebeb2a55e37c727d7443a192895bb313d15e87 +Author: Benjamin Hofner + + initialize combined risk + +commit 019666251f6e372511293f513245b72d36186769 +Author: Benjamin Hofner + + reindent lines + +commit 50ddb7b036f82b3abeb071d597407beae12634e5 +Author: Benjamin Hofner + + shorten \usage lines wider than 90 characters + +commit de4be0dec1c1e9b0fe7205ad9feb7521cb01a73b +Author: Benjamin Hofner + + make appveyor use github packages + +commit b0263d8e8d1272f6d066103be6aa195875991b5d +Author: Benjamin Hofner + + make appveyor use github packages + +commit 5b5190d7256f57a85feddd62959648f77fee271d +Author: Janek + + fix citation... again + +commit 0dd7fe31853b29db050f3fba30f275abd4d0eb51 +Author: Janek + + fix trace + +commit 4f21eb6b32d17fb91e7aa2e9df573bdd976e14f7 +Author: Benjamin Hofner + + Update DESCRIPTION + +commit b41a55627245ca5b9526bdbeafec55bb32c52a2a +Author: Janek + + fix citation + +commit f6b551c828643a16e4e18625c4e4433f3191e748 +Author: Janek + + fix subset increase from zero + +commit f76606c86576472def047946a97ad3bea50b3a08 +Merge: 7e061b4 d675d3d +Author: Janek + + mstop=0 + +commit 7e061b49534ebca65365e7c9fa977cc9bb23baed +Author: Janek + + fix risk + +commit 17c9674d59d4907014b6d304e7c1813b3331ef3b +Author: Janek + + start with mstop=0 + +commit d675d3dd2e18f2ad4fc662e10038d5a6d1e5cea3 +Author: mayrandy + + fix tutorial issue #32 also for devel + +commit 691048a4c7752faee57f8cb86741fbdceba9310a +Author: Benjamin Hofner + + Make risk work again (Fixes #32) + +commit 0218d4efcc07eec8091f74b388e3b4343a26b248 +Author: mayrandy + + make mixed type 4-parametric families work, fixes #28 + +commit 4a2d2349b7b2f3a9bfe8c6c171c482c7cc5a3876 +Author: mayrandy + + update README + +commit 371267b3da8a4896ef8bb150cac9925fd7dedd87 +Author: Janek + + stop destroying the CITATION file... + +commit f22530c035f332de281315718ee172a4c7c75f91 +Author: Janek + + missing , -.- + +commit 942b6317f6463720e472b0c71531a29bb852364b +Author: Janek + + cleanup + +commit 9585836295cd2a8761a92e798ae5a0254acf845e +Author: Janek + + slight speed improvements + +commit ec4b3f356c5ea1e194edcdba3a60a56fcfc3b4e9 +Author: Janek + + Readme + +commit 993167f528f4b187b383f4eccd5650a37d5064b8 +Author: Janek + + citation + +commit 34891bda05346e525f15f6c2e6deae43ff9d2b9a +Author: Janek + + remove outer version, rename inner to noncyclical + +commit c865ee7caad15c035b4d52ddb40bb61ffd44f6c9 +Author: Janek + + refactor noncyc fitting + +commit 7f542a51fc12bd7b6ac1d2f186594f4ce7c98383 +Author: mayrandy + + Include official tutoral paper in README.md + +commit 94cd3e45fae059592f85c853f083a577cc24eaaa +Author: mayrandy + + incorporate JSS reference in vignette + +commit cb5c07f8c0f4b49ceed0610195b210e8c20fa580 +Author: mayrandy + + update ULRs using canonical form + +commit 4f2f392ea81e1e6c2e36c76b1395ad74ec769c44 +Author: mayrandy + + Update citations for JSS paper and CRAN update + +commit 0d94a467398e0964662ac7c82f2e58b64f5f1bc9 +Author: mayrandy + + Adding some tests + +commit 32f0e6d621ef5bf441ccb0abb258c762fc927ad7 +Author: mayrandy + + Include new option stabilization = "L2" which divides ngradient through mean(ngr^2). + +commit f7a32e3e25d463c20655060a6762968bdf622c6b +Author: mayrandy + + re-order arguments in rd file + +commit a981957a7f0d01f806a7ef3a0244dcdbd6cb4938 +Author: mayrandy + + Include different links in as.families(), see #25 + +commit 4619cfe889679f9b7009ab193e514499cda77050 +Author: Benjamin Hofner + + Do not use plot.cvrisk from mboost but plot.cvriskLSS(..., type = "line") + fixed some typoes etc. + +commit ba403b17c316d790f7331f0ece6103ce6f1f9dd0 +Author: Benjamin Hofner + + merged changes from #18 to devel + +commit eb4e59aca44a9f98f4b3afd3b996252115068502 +Author: Benjamin Hofner + + reindent lines + +commit 4abbee41c80a53ec17a5c44d6c785ae49b3f4fb3 +Merge: 2321180 e377769 +Author: Benjamin Hofner + + Merge pull request #20 from boost-R/mstop_nc_fit + +commit e377769df5f515917b34bef16bf249f5fb6aec53 +Author: Benjamin Hofner + + use correct syntax; closes #19 + +commit 73ca5bc3c891355f9d01b21435814a9493020979 +Author: mayrandy + + Update families.R + +commit e64737dc9320d5020aabe61d3dbc6cd39ab4ee74 +Author: Janek + + change mstop definition for noncyclical fitting + +commit 23211807854e2c5ae4d96179416d792336182ca8 +Author: Benjamin Hofner + + Added test and close #12 + +commit 109daf34aba93ff1821bb29df5c480348b99d2b3 +Author: mayrandy + + Update as.families.R + +commit e4b7cd07060fa34f9fdecd415565f012b2794f47 +Merge: 84e7740 8e9083a +Author: Benjamin Hofner + + Merge branch 'devel' of https://github.com/boost-R/gamboostLSS into devel + +commit 84e7740683bf6a40da3229cd0e2334990eb36853 +Author: Benjamin Hofner + + Increased version to 1.5-0 + Changed sequence of authors + +commit b810261ca6968eba945c1a4a22a23e68fd873ba0 +Author: Benjamin Hofner + + Update README.md + +commit 8e9083a8b9d6baa8d9b97980e4a97d24067f84f3 +Author: Benjamin Hofner + + Update README.md + +commit 21b6e197ba14c011ccfcf2abc26734b7274d6dc7 +Author: Benjamin Hofner + + Added tests for cvrisk and risk + +commit ecfe9d22b818d440d1174a0015bb2cdb817ac51c +Author: Benjamin Hofner + + fixed coverage badge + +commit 82683adb416a44fca3a194836c292b22de2c6eb0 +Author: Benjamin Hofner + + Readme in devel branch supposed to show devel stats + +commit bb3dc5e99ba719f6cf2ad7dedae6e61ca9d84799 +Author: Benjamin Hofner + + Updated README.md + +commit df49b82334ae34588dd43d4084e400adecab9989 +Author: Benjamin Hofner + + Updated README.md + +commit d303e4b276f025477557ac8e15ec6c426562a581 +Author: Benjamin Hofner + + Updated travis-ci and appveyor + +commit 7a9baad69ca73f9d994746d8f5b9ef5e330ee431 +Author: Benjamin Hofner + + Fixed .travis.yml + Updated .Rbuildignore + +commit a82f782cdb5df7c0ca49ec8c1e38e73f9d402e80 +Author: Benjamin Hofner + + Fixed .travis.yml + Updated .Rbuildignore + +commit 62d1cfeb765f23e65a1b4b3c3d4d999799a396d2 +Author: Benjamin Hofner + + Updated travis-ci and appveyor + +commit 9f50b4377aaaf1bfb24761aa8d03bdd23bbeddf5 +Author: Benjamin Hofner + + Moved patch/ to parent directory + +commit aab749b8104d7de448217e3c0697cbc8d0835e14 +Author: Benjamin Hofner + + Moved pkg/ to parent folder + +commit 530dafd8fd814437f5ad697408b1419ddeceb22c +Author: Benjamin Hofner + + Updated NEWS.md + +commit f0f1c060e98c1bf376094089a184d4c1be380551 +Author: Benjamin Hofner + + added Janek as author + +commit 19626ead366e06b973553620525509560141b29e +Author: mayrandy + + fix error in manual + +commit 4d2455196c712f6163acd3e6672b00c8afe676e2 +Merge: 6f6aa6d 97e8f86 +Author: ja-thomas + + Merge pull request #14 from fabian-s/fix-stabsel-for-listdata + +commit 97e8f86da75512da45e02a1bf85645d90f72a69e +Author: fabian-s + + make stabsel work for list-shaped coming from FDboost: use `[[1]]$ydim` instead of `dim(attr(, "data"))` + +commit 0ef3caf74db0d92158b058b1d6e9ae98158bef26 +Author: fabian-s + + make stabsel work fpr list-shaped : use `[[1]]$ydim` instead of `dim(attr(, "data"))` + +commit 6f6aa6dacae4aa5e79250b3865b0856c6c21c6f8 +Author: janek + + help file for stabsel, closes #11 + +commit b6f80aeb4ad1b1e43306f1f09a4e79381084c556 +Author: janek + + fixed issue #10 + +commit 7fbb813a8ddd19aba2c39fe4b823ca79e40dbc9f +Author: Benjamin Hofner + + Update appveyor.yml + +commit 0f6f5e528281e943fbe4016b15df3f9565267d5d +Author: Benjamin Hofner + + Update repository to boost-R + +commit 9cab89468aca9f35ac56e542401c3512cffa155b +Author: Benjamin Hofner + + Merged gamboostLSS 1.2-1 to pkg + +commit bf344e5dcd36eb34eb312f0980838615b3d77842 +Author: Benjamin Hofner + + gamboostLSS 1.2-1 on its way to CRAN + +commit 1486382e5c226ab743756bbe5379a818d1c7c230 +Author: Benjamin Hofner + + Updated .Rout.save files + +commit 2c552a9a61c8befccbb28ebec102a473a7e7c0a4 +Author: Benjamin Hofner + + Cleanup + +commit 082c25cda9627b5e4074357da9f36dba4b11228c +Merge: 1464e83 d9667c6 +Author: Benjamin Hofner + + Merge branch 'master' of github.com:hofnerb/gamboostLSS + +commit 1464e83e50ff921f2b15baf4e8e69b2d87892034 +Author: Benjamin Hofner + + Merge changes from patch to pkg + +commit 11548528476620626780709bf1df8b3016ab76f1 +Author: Benjamin Hofner + + Release candidate 1.2-1 + +commit a28230488ccf390979fc3674a7a36c867341909f +Author: Benjamin Hofner + + Fixed bug in cvrisk when no families are specified. Closes #9 + +commit 8b93bdc1f8ae2131dcdaf89726fd754dd43f763a +Author: Benjamin Hofner + + Added gamboostLSS_intern to NAMESPACE and patch/... + +commit d9667c60ecbd5189b6cc57d1be161ddadc00a646 +Merge: 7daead6 bfa9727 +Author: janek + + Merge branch 'noncyclical_fitting' + +commit bfa97275fd5cc896167ceec9bbbd49a65713d6b1 +Author: janek + + replaced = with <- + +commit 7daead622a6abcca16b88549e4ced3ebf9dc8e96 +Merge: 07747d1 becee65 +Author: Benjamin Hofner + + Merge branch 'master' of github.com:hofnerb/gamboostLSS + +commit 07747d1882bebd2e2bd71c1586c44de3b3610330 +Author: Benjamin Hofner + + Added gamboostLSS_intern to make FDboost happy + +commit becee653b8a8f467e1578877356b649c69d12170 +Author: Benjamin Hofner + + Update README.md + +commit 9dbbb204883ec3b15091d66f1f9f4eaa6d20e2c8 +Author: Benjamin Hofner + + Update README.md + +commit c0c634c957291a96b7af6ada3ea684af2e6507d8 +Author: Benjamin Hofner + + Update README.md + +commit 9d7202f5503f18f45bb4531c9db8382be2aeb114 +Author: janek + + fixed missing match.arg in mboostLSS + +commit ae00aaa56dc27c8d4c7cd649d53816b2323581ae +Author: Benjamin Hofner + + AppVeyor + +commit 38634d2b5cb030f7a7fd1a178e23f81a78d57204 +Author: janek + + added selected merge, and stabsel for nc fitting + +commit 4c0942b343bd4c4280f5a5efc58fe3e0f71cd0ab +Author: Benjamin Hofner + + also modified test for pkg/ + +commit 3cec7e04e6bd92a1353e6068b302fb1a84cc1571 +Author: Benjamin Hofner + + Modified test as stabilized WeibullLSS cannot be fitted to the simulated data + +commit 355a0a89cb399ed3dc14a2076c629386e61c36a0 +Author: Benjamin Hofner + + Updated .Rout.save files + +commit 7b320a72eda720c858565cdb7b303053abc9564d +Author: Benjamin Hofner + + AppVeyor: try to make artefacts available + +commit 592dd9ad4d35082c1fcc997cadae9e3c6bc67625 +Author: janek + + fixed bug for outer fitting, where u was not updated + +commit 8d1798c63f136278bf9016e723f9d14fabbcb4c7 +Author: janek + + fixed wrong definition again, names are confusing... + +commit e26ccc01ba9af8ad0810c42d1b732665268afa0e +Author: janek + + fixed wrong definition of Iboost inner and outer + +commit d76bb85be5122d4010c9d17e99f231a990296e3e +Author: janek + + new tests, model reduction for inner still not correctly working + +commit df0c35bc9226081c5eb24eb1445a3a271408bd40 +Author: janek + + moved fitting function to method call, which uses different iBoost variants + +commit b9bd78cd2b8fcfbc62a8274521edb4b99569b46e +Author: janek + + fixed bug with oobrisk but no weights + +commit 50773ef27a93f1f1c706147ba594a1ad14492d26 +Author: janek + + added gammboost outer loss functionality + +commit f6db804758da48a1f7d58d9bd8e83c5d822ee5f8 +Author: janek + + outer loss fitting with bols + +commit 2149e2f293e12ba0e1241dee21dcf954e0fab376 +Author: janek + + experimental fitting with outer loss + +commit 89d0535bd08f98e64f2485b8c9bfc0b39de40084 +Author: Benjamin Hofner + + Improve output of print and summary for models (closes #7) + +commit e833780fc0a7f67dafc6150e1320959dc2b5fb1a +Author: janek + + second fix for wrong oob weights + +commit 729391a47bba44934e1301e1fd37a99a5a9a463a +Author: janek + + fixed bug, where the subset function startet with wrong nuisence update + +commit e5422ba15c16f37082b34086730dfd7427cd86e1 +Merge: a5a9267 daa7a4a +Author: janek + + Merge branch 'master' of https://github.com/hofnerb/gamboostLSS into noncyclical_fitting + +commit a5a92676b41790ae9194f21827e5908f4ecd4b3a +Author: janek + + test for oob fitting bug + +commit 033800421678f35501edc0b479af0f1734e3e6de +Author: janek + + fixed bug where oob weights where used to fit the model + +commit 95fd89d4f1e2dee4a8aceb298de03a469004a375 +Author: janek + + added test function for nc fitting + +commit 9e9077fa57b772b015e90883594314b9b5db3ae0 +Author: janek + + fixed error in combined_risk init + +commit 0b3c4b477fc08c826d9749e2f6fddda883ff7a37 +Author: janek + + fixed error in check for minimal mstop value + +commit daa7a4a64eedc8d2688cbb4c759289e0b4e7436a +Author: Benjamin Hofner + + Update appveyor.yml + +commit 800174b8d10e0b51149d880d3da6ffd836001f2c +Author: Benjamin Hofner + + Update .travis.yml + +commit a0ac809b13e468d519aa146a0d4d0288a9caf44a +Author: Benjamin Hofner + + Update README.md + +commit a44080732e9b30d7339b925348ad9f7ba1297991 +Author: Benjamin Hofner + + Update .travis.yml + +commit 2212a38178c321c1669afb81bfccff6460846355 +Author: janek + + fixed bug that cv_risk.nc couldn't use combined_risk + +commit ade28506f863b91fc6f997ede33237f6e0693cce +Author: Benjamin Hofner + + Update README.md + +commit 3130ea497a548c0867d229e06ddb0bdc17e16209 +Merge: 68ddb5b 326e2db +Author: Benjamin Hofner + + Merge branch 'AppVeyor' of github.com:hofnerb/gamboostLSS into AppVeyor + +commit 68ddb5bd52b5425ac71faa309e2dce83ab8c5f21 +Author: Benjamin Hofner + + Added missing ) + +commit 326e2dbe971daea3c4ac1cb48198447f73de840c +Author: Benjamin Hofner + + Update appveyor.yml + +commit 0d4b24239d714448dcd189140b719473228ca032 +Author: Benjamin Hofner + + Update appveyor.yml + +commit c660e9ecc78dd06b35f43d8a049f30b4e9ba50f8 +Author: Benjamin Hofner + + Update appveyor.yml + +commit bd4082b7e481e851658cf974a7671db65c67cd03 +Author: Benjamin Hofner + + Update appveyor.yml + +commit cc9ec9fca122257175a5c2cd2d6a8added1b4fee +Author: janek + + fixed trace function for cycling = TRUE + +commit 09fda11bb0ddeeb7d31b55c0b036cf8c051fad46 +Author: Benjamin Hofner + + Update appveyor.yml + +commit f5616d8f4e7528a1f11c95da612312de4a22509a +Author: Benjamin Hofner + + Update appveyor.yml + +commit 58eb2230d8876ba700e68a3601d8032369b5b31c +Author: Benjamin Hofner + + Update appveyor.yml + +commit 1b7c5490565d8a9be5bd62604bb5d35647ab6844 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 93421b24ab0814ca5ebf47ac3fa7ccb1acbd2db3 +Author: janek + + added documentation for new parameter for cycling/noncycling algorithm + +commit 62d0615d5254b40f8197280e60f22e862695afd9 +Author: janek + + moved cyclical fitting to mboostLSS, glmboostLSS.. etc. + +commit dee063f63209f982d815030cd1491165ac61a234 +Author: Benjamin Hofner + + Update appveyor.yml + +commit df05ae5d7e51a2dc3dde547e3a472c1f3274f137 +Author: Benjamin Hofner + + Update appveyor.yml + +commit dc70cea2481b5d3c0c1d09b9300b36b377248afe +Author: Benjamin Hofner + + Update appveyor.yml + +commit b05e951c7236129530e13028bb544f772e57adf1 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 9c60e09b9097e1a692c56f48eae20b479d5e13ed +Author: Benjamin Hofner + + Update appveyor.yml + +commit d4372f9079d070b3b154aa0133ed947ef801f634 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 72887f057ec34cad2e01dbe9a05f3e1a26e974ea +Author: Benjamin Hofner + + Update appveyor.yml + +commit 5955acc1fc6c2d43bf7b41218f08c73296627200 +Author: janek + + added nc functions to namespace and aliased them in rd files + +commit 48f163c24307422db45b79016b081459ca3127e7 +Author: janek + + added nc_mboostLSS as class in nc_mboostLSS_fit + +commit 9522320b2165be0bad6cd9734220cfd6e930a6e7 +Author: janek + + added nc_mboostLSS and nc_blackboostLSS + +commit 101a30530585425038f45bb47b53938306ef8609 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 2fe6f316dd028998de926589b4d63d39733c7766 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 604b4f4db5d8dbeb41d8ff74bddc7ade5d74b245 +Author: janek + + moved new alternative fitting algorithm to this branch + +commit 9132bf94510e1f902c32396be850043ecdc58f45 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 6a477b30723663658579f1e168003ec050d36829 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 8867fb9690a731080ab7c053182e92b0bc5f2266 +Author: Benjamin Hofner + + Update appveyor.yml + +commit a676e856ee015cb60e89f2ccef8a2d74f6d75d3f +Author: Benjamin Hofner + + Update appveyor.yml + +commit 10c6ba28345bc89e19c4d6e0d3f38c82fc24fd41 +Author: Benjamin Hofner + + Update appveyor.yml + +commit 07e6346b1b0a6fea0daedf83cae51fcb7410f5ad +Author: Benjamin Hofner + + Update appveyor.yml + +commit 7915e605413ff8d29113f058f65939a52bf66676 +Author: Benjamin Hofner + + make code consistent by always using response(fitted()); close #5 + +commit 1bfc50e434ab81775a809e5446594db056cdb95e +Author: Benjamin Hofner + + Experimental version of AppVeyor for multiple folders + +commit 4a57c766bbacd92312300ba5abba5fd9ea1e95d8 +Author: Benjamin Hofner + + Update README.md + +commit 5ea9292df17e31cc7e09dc2e63267fa1088fc787 +Author: Benjamin Hofner + + Experimental version of .travis.yml (5) + +commit 20b3f462651282df21bee9344bc3c44726ccc5e7 +Author: Benjamin Hofner + + Experimental version of .travis.yml (4) + +commit 94a32b081f2be2f8cb9fe9c62ec122d81b10e659 +Author: Benjamin Hofner + + Experimental version of .travis.yml (3) + +commit f08e24445a42cbf167941631af2c9b577e7a05db +Author: Benjamin Hofner + + Experimental version of .travis.yml (2) + +commit 8fd062a832a806454ad90a0be06320ee58d3e825 +Author: Benjamin Hofner + + Experimental version of .travis.yml + +commit e57b086f5a1a6d6b67045cce927869a521d764a2 +Author: Benjamin Hofner + + Updated .travis.yml to make vignette compilation possible + +commit 669efbbce96f282399845c23d67e64562e089097 +Author: Benjamin Hofner + + Merged latest release to pkg/ + +commit 573a5de3782bb996b493dc41121f67d09afe20db +Author: Benjamin Hofner + + Fixed inst/CITATION + +commit 39204d00c6538d575539275130bbd77e2a2e4254 Author: Benjamin Hofner Release version 1.2-0 diff --git a/DESCRIPTION b/DESCRIPTION index a64d718..ae2e616 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,19 @@ Package: gamboostLSS Type: Package Title: Boosting Methods for 'GAMLSS' -Version: 2.0-0 -Date: 2016-xx-yy -Author: Benjamin Hofner, Andreas Mayr, Nora Fenske, Janek Thomas, Matthias Schmid -Maintainer: Benjamin Hofner +Version: 2.0-1 +Date: 2018-06-14 +Authors@R: c(person("Benjamin", "Hofner", role = c("aut", "cre"), + email = "benjamin.hofner@pei.de", + comment = c(ORCID = "0000-0003-2810-3186")), + person("Andreas", "Mayr", role = "aut"), + person("Nora", "Fenske", role = "aut"), + person("Janek", "Thomas", role = "aut"), + person("Matthias", "Schmid", role = "aut")) Description: Boosting models for fitting generalized additive models for location, shape and scale ('GAMLSS') to potentially high dimensional data. -Depends: R (>= 2.10.0), mboost (>= 2.3-0), stabs (>= 0.5-0), parallel +Depends: R (>= 2.10.0), mboost (>= 2.8-0), stabs (>= 0.5-0), parallel Imports: graphics, grDevices, stats, utils Suggests: gamlss, gamlss.dist, survival, BayesX, R2BayesX LazyLoad: yes diff --git a/NAMESPACE b/NAMESPACE index 50dee1b..d87c6bb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,9 +14,12 @@ export(glmboostLSS, blackboostLSS, mboostLSS, mboostLSS_fit, + plot.glmboostLSS, plot.gamboostLSS, model.weights, PI, predint, plot.predint, - make.grid, cvrisk.mboostLSS, + predict.mboostLSS, + make.grid, cvrisk.mboostLSS, cvrisk.nc_mboostLSS, + plot.cvriskLSS, plot.nc_cvriskLSS, selected, selected.mboostLSS, selected.stabsel_mboostLSS, stabsel.mboostLSS, Families, diff --git a/R/cvrisk.R b/R/cvrisk.R index f85c70d..d174281 100644 --- a/R/cvrisk.R +++ b/R/cvrisk.R @@ -98,7 +98,7 @@ make.grid <- function(max, length.out = 10, min = NULL, log = TRUE, cvrisk.mboostLSS <- function(object, folds = cv(model.weights(object)), grid = make.grid(mstop(object)), papply = mclapply, trace = TRUE, - fun = NULL, ...) { + mc.preschedule = FALSE, fun = NULL, ...) { weights <- model.weights(object) if (any(weights == 0)) @@ -191,9 +191,17 @@ cvrisk.mboostLSS <- function(object, folds = cv(model.weights(object)), OOBweights <- matrix(rep(weights, ncol(folds)), ncol = ncol(folds)) OOBweights[folds > 0] <- 0 - oobrisk <- papply(1:ncol(folds), - function(i) dummyfct(i, weights = folds[, i], - oobweights = OOBweights[, i]), ...) + if (all.equal(papply, mclapply) == TRUE) { + oobrisk <- papply(1:ncol(folds), + function(i) dummyfct(i, weights = folds[, i], + oobweights = OOBweights[, i]), + mc.preschedule = mc.preschedule, ...) + } else { + oobrisk <- papply(1:ncol(folds), + function(i) dummyfct(i, weights = folds[, i], + oobweights = OOBweights[, i]), + ...) + } ## get errors if mclapply is used if (any(idx <- sapply(oobrisk, is.character))) stop(sapply(oobrisk[idx], function(x) x)) diff --git a/R/cvrisk.nc_mboostLSS.R b/R/cvrisk.nc_mboostLSS.R index 0743e34..d3c3140 100644 --- a/R/cvrisk.nc_mboostLSS.R +++ b/R/cvrisk.nc_mboostLSS.R @@ -93,12 +93,12 @@ risk.nc_mboostLSS <- function(object, merge = FALSE, } -plot.nc_cvriskLSS <- function(x, type = "lines", - xlab = "Number of boosting iterations", ylab = NULL, +plot.nc_cvriskLSS <- function(x, xlab = "Number of boosting iterations", ylab = NULL, ylim = range(x), main = attr(x, "type"), ...) { - if (type != "lines") + dots <- list(...) + if ("type" %in% names(dots) && dots$type != "lines") warning("Only ", sQuote('type = "lines"'), " supported for noncyclical fitting") plot.cvriskLSS(x = x, type = "lines", xlab = xlab, ylab = ylab, ylim = ylim, main = main, ...) diff --git a/R/helpers.R b/R/helpers.R index d7a593c..fd1114d 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -139,10 +139,25 @@ do_trace <- function(current, risk, mstart, } } -## helper function copied from mboost_2.2-3 -### check measurement scale of response for some losses -check_y_family <- function(y, family) - family@check_y(y) +## helper function copied from mboost_2.2-3; changed for gamboostLSS +## check measurement scale of response for some losses +check_y_family <- function(y, family, allow_matrix = FALSE){ + + if(is.null(dim((y)))) allow_matrix <- FALSE + + if(!allow_matrix){ + + return(family@check_y(y)) ## check response as it is + + }else{ + + tmp <- family@check_y(as.vector(y)) ## convert matrix to vector for check + y <- matrix(tmp, ncol = ncol(y), nrow = nrow(y)) ## convert back to matrix + return(y) + } + +} + ################################################################################ # sapply function that differentiates between data.frames and (numeric) vectors @@ -188,3 +203,34 @@ check_stabilization <- function(stabilization = c("none", "MAD", "L2")) { } stabilization } + + +################################################################################ + +# check timeformula for FDboost +# timeformula is named list with names according to distribution parameters of families +# timeformula: the formula for expansion of the effects along t for functional response y(t) +# families: specify the response distribution; see, e.g., mboostLSS_fit() +check_timeformula <- function(timeformula, families){ + + # timeformula is named list + if (is.list(timeformula)){ + if (!all(names(timeformula) %in% names(families)) || + length(unique(names(timeformula))) != length(names(families))) + stop(sQuote("timeformula"), " can be either a one-sided formula or a named list", + " of timeformulas with same names as ", sQuote("families"), ".") + } else { + # timeformula is only one formula -> set up named list + tmp <- vector("list", length = length(families)) + names(tmp) <- names(families) + for (i in 1:length(tmp)) + tmp[i] <- list(timeformula) + timeformula <- tmp + } + + timeformula + +} + + + diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 3e9fd19..a3f5bfc 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -107,6 +107,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } mstop <- mstoparg <- control$mstop + combined_risk <- NA control$mstop <- 0 if (method == "cyclic") mstop <- check(mstop, "mstop", names(families)) @@ -122,15 +123,39 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), trace <- control$trace control$trace <- FALSE + ## generate adequate model weights w <- weights + if (is.null(weights)){ - if (!is.list(response)) { - weights <- rep.int(1, NROW(response)) - } else { - weights <- rep.int(1, NROW(response[[1]])) - } + if (!is.list(response)) { + weights <- rep.int(1, NROW(response)) + # expand weights if the response is a matrix (functional response) + if(funchar == "FDboost" && !is.null(dim(response)) && !any(dim(response) == 1)) + weights <- rep.int(weights, ncol(response)) + + } else { + weights <- rep.int(1, NROW(response[[1]])) + # expand weights if the response is a matrix (functional response) + if(funchar == "FDboost" && !is.null(dim(response[[1]])) && !any(dim(response[[1]]) == 1)) + weights <- rep.int(weights, ncol(response[[1]])) + } } + weights <- rescale_weights(weights) + + ## set up timeformula for FDboost + if (funchar == "FDboost"){ + + # get timeformula from dots + dots <- list(...) + timeformula <- dots$timeformula + dots$timeformula <- NULL + + # deal with argument timeformula in case of FDboost() + # timeformula is named list with names according to + # distribution parameters of families + timeformula <- check_timeformula(timeformula, families) + } fit <- vector("list", length = length(families)) names(fit) <- names(families) @@ -141,11 +166,15 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), names(offset) <- names(families) for (j in mods){ if (!is.list(response)) { - response <- check_y_family(response, families[[j]]) - offset[[j]] <- families[[j]]@offset(y = response, w = weights) + response <- check_y_family(response, families[[j]], + allow_matrix = (funchar == "FDboost")) + offset[[j]] <- families[[j]]@offset(y = if(funchar != "FDboost") response else c(response), + w = weights) } else { - response[[j]] <- check_y_family(response[[j]], families[[j]]) - offset[[j]] <- families[[j]]@offset(y = response[[j]], w = weights) + response[[j]] <- check_y_family(response[[j]], families[[j]], + allow_matrix = (funchar == "FDboost")) + offset[[j]] <- families[[j]]@offset(y = if(funchar != "FDboost") response[[j]] else c(response[[j]]), + w = weights) } for (k in mods){ for (l in mods){ @@ -158,17 +187,28 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), for (j in mods){ ## update value of nuisance parameters in families for (k in mods[-j]){ - if (!is.null(fit[[k]])) - assign(names(fit)[k], fitted(fit[[k]], type = "response"), + if (!is.null(fit[[k]])) ## use fitted.mboost() as fitted.FDboost() returns a matrix + assign(names(fit)[k], families[[k]]@response(fitted.mboost(fit[[k]])), environment(families[[j]]@ngradient)) } ## use appropriate nu for the model control$nu <- nu[[j]] ## Do we need to recompute ngradient? - fit[[j]] <- do.call(fun, list(formula[[names(families)[[j]]]], - data = data, family = families[[j]], - control=control, weights = w, - ...)) + if(funchar != "FDboost"){ + fit[[j]] <- do.call(fun, list(formula[[names(families)[[j]]]], + data = data, family = families[[j]], + control = control, weights = w, + ...)) + }else{ + fit[[j]] <- do.call(fun, c(list(formula[[names(families)[[j]]]], + timeformula = timeformula[[names(families)[[j]]]], + data = data, family = families[[j]], + control = control, weights = w, + # always use scalar offset, as offsets are treated within the Family + offset = "scalar"), + dots)) + } + } iBoost <- function(niter, method) { @@ -182,6 +222,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), # this is the case for boosting from the beginning if (is.null(attr(fit, "combined_risk")) | niter == 0) { combined_risk <- vapply(fit, risk, numeric(1)) + } else { + combined_risk <- attr(fit, "combined_risk")() } best <- which(names(fit) == tail(names(combined_risk), 1)) @@ -204,7 +246,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## update value of nuisance parameters ## use response(fitted()) as this is much quicker than fitted(, type = response) for( k in mods[-best]) { - assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), + ## use fitted.mboost() as fitted.FDboost() returns a matrix + assign(names(fit)[best], families[[best]]@response(fitted.mboost(fit[[best]])), environment(get("ngradient", environment(fit[[k]]$subset)))) } @@ -248,8 +291,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), for (j in mods){ ## update value of nuisance parameters ## use response(fitted()) as this is much quicker than fitted(, type = response) - for (k in mods[-j]) - assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), + for (k in mods[-j]) ## use fitted.mboost() as fitted.FDboost() returns a matrix + assign(names(fit)[k], families[[k]]@response(fitted.mboost(fit[[k]])), environment(get("ngradient", environment(fit[[j]]$subset)))) ## update value of u, i.e. compute ngradient with new nuisance parameters @@ -389,7 +432,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) for(j in names(new_stop_value)){ for( k in setdiff(names(new_stop_value), j)){ - assign(k, families[[k]]@response(fitted(fit[[k]])), + assign(k, families[[k]]@response(fitted.mboost(fit[[k]])), environment(get("ngradient", environment(fit[[j]]$subset)))) } } @@ -438,11 +481,22 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## re-use user specified offset only ## (since it depends on weights otherwise) ## this is achieved via a re-evaluation of the families argument - mboostLSS_fit(formula = formula, data = data, - families = eval(call[["families"]]), weights = weights, - control = control, fun = fun, funchar = funchar, - call = call, oobweights = oobweights, - method = method) + + if(funchar != "FDboost"){ + mboostLSS_fit(formula = formula, data = data, + families = eval(call[["families"]]), weights = weights, + control = control, fun = fun, funchar = funchar, + call = call, oobweights = oobweights, + method = method) + }else{ + mboostLSS_fit(formula = formula, data = data, + families = eval(call[["families"]]), weights = weights, + control = control, fun = fun, funchar = funchar, + call = call, oobweights = oobweights, + method = method, + timeformula = timeformula[[names(families)[[j]]]]) + } + } attr(fit, "control") <- control attr(fit, "call") <- call diff --git a/R/methods.R b/R/methods.R index ec52214..1be37d7 100644 --- a/R/methods.R +++ b/R/methods.R @@ -47,7 +47,7 @@ risk.mboostLSS <- function(object, merge = FALSE, parameter = names(object), ... RES <- sapply(parameter, get_rsk, object = object) RES <- as.vector(t(RES)) - names(RES) <- rep(names(parameter), mstop(object)[1]) + names(RES) <- rep(names(parameter), mstop(object)[1] + 1) ## drop unwanted NAs if (lo != 1) RES <- RES[!is.na(RES)] @@ -97,13 +97,17 @@ selected.mboostLSS <- function(object, merge = FALSE, parameter = names(object), #merge is different for noncyclical fitting if (merge) { if (inherits(object, "nc_mboostLSS")){ - RET <- names(attr(object, "combined_risk")()) + #get the names of parameter selected in each iteration (drop initial offset risk values) + RET <- names(attr(object, "combined_risk")())[-seq_along(parameter)] + names(RET) <- RET #set the names of the vector as we will overwrite the values. + + #overwrite names in the vector with the selected BLs in the correct order for(p in names(parameter)){ RET[RET == p] <- object[[p]]$xselect() } - RET <- as.numeric(RET) - names(RET) <- names(attr(object, "combined_risk")()) + mode(RET) = "numeric" #ensure numeric values -> as.numeric drops the names + return(RET) } else { @@ -362,16 +366,22 @@ summary.mboostLSS <- function(object, ...) { } } - cat("Selection frequencies:\n") - for (i in 1:length(object)) { - cat("Parameter ", names(object)[i], ":\n", sep = "") - nm <- variable.names(object[[i]]) - selprob <- tabulate(selected(object[[i]]), nbins = length(nm)) / - length(selected(object[[i]])) - names(selprob) <- names(nm) - selprob <- sort(selprob, decreasing = TRUE) - selprob <- selprob[selprob > 0] - print(selprob) + if (!all(is_null <- sapply(selected(object), is.null))) { + cat("Selection frequencies:\n") + for (i in 1:length(object)) { + cat("Parameter ", names(object)[i], ":\n", sep = "") + if (is_null[i]){ + print(NULL) + next + } + nm <- variable.names(object[[i]]) + selprob <- tabulate(selected(object[[i]]), nbins = length(nm)) / + length(selected(object[[i]])) + names(selprob) <- names(nm) + selprob <- sort(selprob, decreasing = TRUE) + selprob <- selprob[selprob > 0] + print(selprob) + } } invisible(object) } @@ -386,13 +396,21 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, cll <- match.call() p <- sum(sapply(x, function(obj) length(variable.names(obj)))) - n <- if(inherits(x, "FDboostLSS")) { - x[[1]]$ydim[1] + + if(inherits(x, "FDboostLSS")) { + if(is.null(x[[1]]$ydim)){ + n <- length(attr(x, "(weights)")) # scalar response + }else{ + n <- x[[1]]$ydim[1] # functional reponse + # correct the wrong default folds if necessary + if(nrow(folds) == length(model.weights(x))){ + folds <- subsample(rep(1, n), B = B) + } + } } else { - nrow(attr(x, "data")) + n <- nrow(attr(x, "data")) } - ## extract names of base-learners (and add paramter name) nms <- lapply(x, function(obj) variable.names(obj)) nms <- lapply(1:length(nms), function(i) @@ -413,13 +431,11 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, stop(sQuote("mstop"), " has to be an integer larger than ", length(x)) } - mstop_min <- length(x) } else { if (is.null(mstop)) mstop <- mstop(x) mstop <- check(mstop, "mstop", names(x)) - mstop_min <- 1 } if (length(unique(mstop)) != 1) @@ -429,39 +445,43 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, if (verbose) cat("Run stabsel ") - ## set mstop = 1 to speed things up - x <- update(x, weights = model.weights(x), mstop = mstop_min) + ## set mstop = 0 to speed things up + x <- update(x, weights = model.weights(x), mstop = 0) ## define the fitting function (args.fitfun is not used but needed for ## compatibility with run_stabsel fit_model <- function(i, folds, q, args.fitfun) { if (verbose) cat(".") - ## start by fitting 1 step in each component - mod <- update(x, weights = folds[, i], mstop = mstop_min) + ## start by setting up model on subset and fit first q iterations + mod <- update(x, weights = folds[, i], mstop = q) ## make sure dispatch works correctly class(mod) <- class(x) xs <- selected(mod) nsel <- length(mod) ## now update model until we obtain q different base-learners altogether - for (m in mstop_min:max(mstop)) { + for (m in (q+1):max(mstop)) { if (nsel >= q) break - mstop(mod) <- m xs <- selected(mod) nsel <- sum(sapply(xs, function(selection) length(unique(selection)))) - if (nsel >= q) - break } #this changes nothing for method = "cyclic" but fixes mstop for method = "noncyclic" mstop <- check(mstop, "mstop", names(x)) ## complete paths if (any(sapply(xs, length) < mstop)) { for (j in 1:length(xs)) { + +## What happens if component j was never selected, i.e. xs[[j]] = NULL? +## Can we use NA as proposed? We need to see what happens later. + if (is.null(xs[[j]])) + xs[[j]][1] <- NA start <- length(xs[[j]]) + 1 xs[[j]][start:mstop[j]] <- xs[[j]][1] +## + } } @@ -484,9 +504,13 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, res[xs[[i]][j], j:mstop[[i]]] <- TRUE res }) - + +## What is this error message about? No user will know what you mean. Please fix coding issue and remove stop() or +## provide a relevant error message. if (any(mstop < max(mstop))) stop("simply add the last column to the smaller matrices") +## + ## now merge sequences for (i in 1:ncol(sequences[[1]])) { for (j in 1:length(sequences)) { @@ -496,8 +520,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, sequence <- matrix(c(sequences[[i]][, j], other_params)) } else { - tmp <- unlist(lapply(sequences[1:j], function(x) x[, - i])) + tmp <- unlist(lapply(sequences[1:j], function(x) x[, i])) other_params <- rep(FALSE, sum(sapply(sequences, nrow)[-(1:j)])) tmp <- c(tmp, other_params) @@ -509,8 +532,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, lapply(sequences[(j+1):length(sequences)], function(x) x[, i - 1]))) } else { - tmp <- unlist(lapply(sequences[1:j], function(x) x[, - i])) + tmp <- unlist(lapply(sequences[1:j], function(x) x[, i])) } sequence <- cbind(sequence, tmp) } diff --git a/README.md b/README.md index 367a9bb..3e868f1 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ gamboostLSS =========== -[![Build Status (Linux)](https://travis-ci.org/boost-R/gamboostLSS.svg?branch=devel)](https://travis-ci.org/boost-R/gamboostLSS) -[![Build status (Windows)](https://ci.appveyor.com/api/projects/status/373t0tvx5v1i5ooq/branch/devel?svg=true)](https://ci.appveyor.com/project/hofnerb/gamboostlss-s2whe/branch/devel) -[![CRAN Status Badge](http://www.r-pkg.org/badges/version/gamboostLSS)](http://cran.r-project.org/package=gamboostLSS) -[![Coverage Status](https://coveralls.io/repos/github/boost-R/gamboostLSS/badge.svg?branch=devel)](https://coveralls.io/github/boost-R/gamboostLSS?branch=devel) -[![](http://cranlogs.r-pkg.org/badges/gamboostLSS)](http://cran.rstudio.com/web/packages/gamboostLSS/index.html) +[![Build Status (Linux)](https://travis-ci.org/boost-R/gamboostLSS.svg?branch=master)](https://travis-ci.org/boost-R/gamboostLSS) +[![Build status (Windows)](https://ci.appveyor.com/api/projects/status/373t0tvx5v1i5ooq/branch/master?svg=true)](https://ci.appveyor.com/project/hofnerb/gamboostlss-s2whe/branch/master) +[![CRAN Status Badge](http://www.r-pkg.org/badges/version/gamboostLSS)](https://CRAN.R-project.org/package=gamboostLSS) +[![Coverage Status](https://coveralls.io/repos/github/boost-R/gamboostLSS/badge.svg?branch=master)](https://coveralls.io/github/boost-R/gamboostLSS?branch=master) +[![](http://cranlogs.r-pkg.org/badges/gamboostLSS)](https://CRAN.R-project.org/package=gamboostLSS) `gamboostLSS` implements boosting algorithms for fitting generalized linear, additive and interaction models for to potentially high-dimensional data. @@ -17,13 +17,18 @@ shape). ## Using gamboostLSS -For installation instructions see below. +- For installation instructions see below. -Instructions on how to use `gamboostLSS` can be found here: -- [gamboostLSS tutorial](https://www.jstatsoft.org/article/view/v074i01). +- Instructions on how to use `gamboostLSS` can be found in the + [gamboostLSS tutorial](https://www.jstatsoft.org/article/view/v074i01). -Details on the noncyclical fitting method can be found here: -- [noncyclical fitting](https://arxiv.org/abs/1611.10171); This is a preliminary version currently under review. +- Details on the noncyclical fitting method can be found in + + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + *Statistics and Computing*. 28: 673-687. DOI [10.1007/s11222-017-9754-6](http://dx.doi.org/10.1007/s11222-017-9754-6). + (Preliminary version: [ArXiv 1611.10171](http://arxiv.org/abs/1611.10171)). ## Issues & Feature Requests diff --git a/SOP_release.txt b/SOP_release.txt index 89b1f2a..0449a05 100644 --- a/SOP_release.txt +++ b/SOP_release.txt @@ -7,8 +7,6 @@ Update ChangeLog ------------------ - Go to patch/pkg directory of project: - git log abba..HEAD --pretty=short > ChangeLog @@ -24,7 +22,7 @@ ../relative/path/to/R-devel/bin/R Run - install.packages(c("gamlss", "BayesX", "gamlss.dist", "mboost", "R2BayesX")) + install.packages(c("gamlss", "gamlss.dist", "mboost", "BayesX", "R2BayesX")) Quit R and run ../relative/path/to/R-devel/bin/R CMD check --as-cran --run-dontrun --run-donttest gamboostLSS_XXX.tar.gz @@ -35,9 +33,9 @@ Increase patch or minor level in DESCRIPTION Update Date: field in DESCRIPTION - Update NEWS - - R CMD build --resave-data --compact-vignettes [[pkg OR patch]] && + Update inst/NEWS.Rd + + R CMD build --resave-data --compact-vignettes . && R CMD check --as-cran --run-dontrun --run-donttest gamboostLSS_XXX.tar.gz Run check with R-devel @@ -45,18 +43,12 @@ If differences to .Rout.save occure: - Copy new .Rout files to .Rout.save [1,2]: - - Rscript copy_Rout_to_Routsave.R "path='pkg'" "vignettes=FALSE" - or - Rscript copy_Rout_to_Routsave.R "path='patch'" "vignettes=FALSE" - + Rscript copy_Rout_to_Routsave.R "vignettes=FALSE" + - Update vignette .Rout.save files if necessary [1,2]: + Rscript copy_Rout_to_Routsave.R "vignettes=TRUE" - Rscript copy_Rout_to_Routsave.R "path='pkg'" "vignettes=TRUE" - or - Rscript copy_Rout_to_Routsave.R "path='patch'" "vignettes=TRUE" - - [1] For details see + [1] For details see http://r.789695.n4.nabble.com/Generate-Rout-save-files-for-vignettes-td4652752.html [2] NOTE: Reference output should be produced without having the --timings option set. @@ -69,14 +61,16 @@ Commit changes - Update ChangeLog (see above) + Update ChangeLog (see above) and amend previous commit + + Remove cvrisk results to reduce package size (see above) but do NOT COMMIT these changes to github. Now build package without test folder to be submitted to CRAN - - R CMD buildCRAN --resave-data --compact-vignettes pkg && R CMD check --as-cran gamboostLSS_XXX.tar.gz - or - R CMD buildCRAN --resave-data --compact-vignettes patch && R CMD check --as-cran gamboostLSS_XXX.tar.gz - - To use the script copy it to R RHOME (< enter this in the console) /bin and make it executable. - + Therefore, do the following: + + mv vignettes/gamboostLSS_Tutorial_CRAN.Rnw vignettes/gamboostLSS_Tutorial.Rnw + mv .RbuildignoreCRAN .Rbuildignore + R CMD build --resave-data --compact-vignettes . && R CMD check --as-cran --run-donttest gamboostLSS_XXX.tar.gz + git checkout -- .RbuildignoreCRAN .Rbuildignore vignettes/gamboostLSS_Tutorial_CRAN.Rnw vignettes/gamboostLSS_Tutorial.Rnw + Use web form at http://xmpalantir.wu.ac.at/cransubmit/. diff --git a/checks.R b/checks.R new file mode 100644 index 0000000..aa0f642 --- /dev/null +++ b/checks.R @@ -0,0 +1,48 @@ +require("tools") + +make_check <- function(srcpkg, dir = "./") { + if (dir == "") dir <- "./" + .libPaths("") + options(repos = "http://CRAN.at.R-project.org") + odir <- setwd(dir) + + pkg <- strsplit(srcpkg, "_")[[1]][1] + + cdir <- paste(pkg, "CRAN", sep = "_") + ddir <- paste(pkg, "devel", sep = "_") + + if (!file.exists(cdir)) { + dir.create(cdir) + } else { + system(paste("rm -rf", cdir)) + dir.create(cdir) + } + if (!file.exists(ddir)) { + dir.create(ddir) + } else { + system(paste("rm -rf", ddir)) + dir.create(ddir) + } + file.copy(srcpkg, ddir) + download.packages(pkg, repos = options("repos"), destdir = cdir) + + check_packages_in_dir(cdir, reverse = list(), Ncpus = 4) + check_packages_in_dir(ddir, reverse = list(), Ncpus = 4) + + cat("\n\nReverse tests with CRAN package:\n") + summarize_check_packages_in_dir_results(cdir, all = TRUE) + summarize_check_packages_in_dir_timings(cdir, all = TRUE) + + cat("\n\nReverse tests with NEW package:\n") + summarize_check_packages_in_dir_results(ddir, all = TRUE) + summarize_check_packages_in_dir_timings(ddir, all = TRUE) + + cat("\n\nComparison of results:\n") + check_packages_in_dir_changes(ddir, cdir, outputs = TRUE, sources = TRUE) + # setwd(odir) +} + +# package_dependencies("gamboostLSS", available.packages(), reverse = TRUE) +.owd <- setwd("../") +make_check(srcpkg = "gamboostLSS_2.0-0.tar.gz") +setwd(.owd) \ No newline at end of file diff --git a/copy_Rout_to_Routsave.R b/copy_Rout_to_Routsave.R index e9adcfa..cd3088d 100644 --- a/copy_Rout_to_Routsave.R +++ b/copy_Rout_to_Routsave.R @@ -7,36 +7,23 @@ # USAGE: # Use # ## To copy test output -# Rscript copy_Rout_to_Routsave.R "path='pkg'" "vignettes=FALSE" +# Rscript copy_Rout_to_Routsave.R "vignettes=FALSE" # ## To copy vignette output -# Rscript copy_Rout_to_Routsave.R "path='pkg'" "vignettes=TRUE" -# -# or use -# ## To copy test output -# Rscript copy_Rout_to_Routsave.R "path='patch'" "vignettes=FALSE" -# ## To copy vignette output -# Rscript copy_Rout_to_Routsave.R "path='patch'" "vignettes=TRUE" +# Rscript copy_Rout_to_Routsave.R "vignettes=TRUE" # ################################################################################ ## Get command line arguments args <- commandArgs(TRUE) -if (length(args) > 2) - stop("specify (at maximum) two arguments (i.e., which and vignettes)") +if (length(args) > 1) + stop("specify (at maximum) one argument (i.e., vignettes)") eval(parse(text=args)) -if (length(args) == 0) { - vignettes <- FALSE - path <- "pkg" -} - -if (is.null(path)) - path <- "pkg" - -if (is.null(vignettes)) +if (length(args) == 0) vignettes <- FALSE - + which <- "gamboostLSS" -check_path <- "gamboostLSS.Rcheck/" +path <- "." +check_path <- "../gamboostLSS.Rcheck/" ################################################################################ ## Copy output of test files diff --git a/inst/CITATION b/inst/CITATION index 0c4012e..10ae371 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -69,23 +69,26 @@ citEntry( citEntry( - entry= "TechReport", - title = "Stability selection for component-wise gradient boosting in multiple dimensions", + entry= "Article", + title = "Gradient boosting for distributional regression - faster tuning and improved variable selection via noncyclical updates", author = personList(as.person("Janek Thomas"), as.person("Andreas Mayr"), - as.person("Bernd Bischl"), - as.person("Matthias Schmid"), - as.person("Adam Smith"), + as.person("Bernd Bischl"), + as.person("Matthias Schmid"), + as.person("Adam Smith"), as.person("Benjamin Hofner")), - year = "2016", - institution = "ArXiv", + year = "2018", + journal = "{Statistics and Computing}", + volume = "28", + number = "3", + pages = "673--687", + doi = "10.1007/s11222-017-9754-6", header = "To cite the noncyclical fitting method of 'gamboostLSS' use:", - url = "https://arxiv.org/abs/1611.10171", textVersion = - paste("Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016).", - "Stability selection for component-wise gradient boosting in multiple dimensions.", - "arXiv preprint arXiv:1611.10171.") + paste("Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018).", + "Gradient boosting for distributional regression - faster tuning and improved variable selection via noncyclical updates.", + "Statistics and Computing. 28(3): 673-687. DOI 10.1007/s11222-017-9754-6") ) -citFooter('\nUse ', sQuote('toBibtex(citation("gamboostLSS"))'), ' to extract BibTeX references.') +citFooter('\nUse ', sQuote('toBibtex(citation("gamboostLSS"))'), ' to extract BibTeX references.\n') diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index a2619cb..8de95a5 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -1,38 +1,78 @@ \name{NEWS} \title{News for Package 'gamboostLSS'} -\section{Changes in gamboostLSS version 1.5-0 (2016-yy-zz)}{ +\section{Changes in gamboostLSS version 2.0-2 (2018-06-xx)}{ + \subsection{Bug-fixes}{ + \itemize{ + \item{Initialize \code{combined_risk} in \code{mboostLSS_fit} to avoid writing + (and overwriting) the combined risk in the global environment. + Closes issue \href{https://github.com/boost-R/gamboostLSS/issues/49}{#49}.} + } + } +} + +\section{Changes in gamboostLSS version 2.0-1 (2018-06-14)}{ + \subsection{Miscellaneous}{ + \itemize{ + \item{Export plot functions, streamlined interface of cvrisk and updated manuals. + Closes issue \href{https://github.com/boost-R/gamboostLSS/issues/46}{#46}.} + \item{Export \code{predict.mboostLSS} to make \pkg{betaboost} happy.} + \item{Updated reference for Thomas et al. + Closes issues \href{https://github.com/boost-R/gamboostLSS/issues/47}{#47}.} + } + } +} + +\section{Changes in gamboostLSS version 2.0-0 (2017-05-05)}{ \subsection{User-visible changes}{ \itemize{ - \item Added new non-cyclic fitting \code{method}s for \code{gamboostLSS} models: - \itemize{ - \item \code{method = "cycling"} is the standard approach which was also - used previously. - \item \code{method = "inner"} and \code{"outer"} provide two new - non-cyclic algorithms. For details see XXX. - \item Non-cyclic methods allow faster cross-validation and better results - for stability selection (see below). - } + \item Added new fitting algorithm which can be used via the argument + \code{method}: + \itemize{ + \item \code{method = "cyclic"} is the standard approach which was also + used previously. + \item \code{method = "noncyclic"} provides a new non-cyclical algorithm. + } + The \code{"noncyclic"} method allow faster cross-validation and better results + for stability selection (see below). For details see + + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2017), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + \emph{Statistics and Computing}. Online First. + DOI \href{http://dx.doi.org/10.1007/s11222-017-9754-6}{10.1007/s11222-017-9754-6}\cr + (Preliminary version: \url{http://arxiv.org/abs/1611.10171}). \item Stability selection (\code{stabsel}) implemented for \code{gamboostLSS} models. - \item The (\code{as.families}) interface to incorporate 'gamlss' distributions for \code{gamboostLSS} - models was adapted to allow for other link functions. - \item Added option \code{stabilization = "L2"} to use the mean L2 norm of - the negative gradient to make the updated for the different distribution parameters comparable. + \item Models can now be fitted with zero steps (i.e., models containing only + the offset). This change was mainly realized with the release of mboost 2.8-0. + This closes issue \href{https://github.com/boost-R/gamboostLSS/issues/22}{#22}. + \item The \code{as.families} interface to incorporate \pkg{gamlss} distributions + for \code{gamboostLSS} models was adapted to allow for other link functions. + \item Added option \code{stabilization = "L2"} to use the average L2 norm of + the negative gradient to make the updates for the different distribution + parameters comparable. } } \subsection{Bug-fixes}{ \itemize{ - \item Fixed \code{as.families("BB")} and \code{as.families("BI")}, which is actually an mboost family - Closes issue + \item Fixed \code{as.families("BB")} and \code{as.families("BI")}, which is + actually an \pkg{mboost} family. Closes issue \href{https://github.com/boost-R/gamboostLSS/issues/12}{#12}. + \item Fixed \code{as.families} for 4-parametric \code{"Mixed"}-type families + (e.g. \code{as.families("BEOI")} and \code{as.families("BEINF")}). + Closes issue + \href{https://github.com/boost-R/gamboostLSS/issues/28}{#28}. } } \subsection{Miscellaneous}{ \itemize{ \item Added Janek Thomas as new author. - \item Updated \file{inst/CITATION} due to release of JSS tutorial paper. + \item Updated \file{inst/CITATION} due to release of JSS tutorial paper and + added new noncyclical fitting paper (also to manuals). + The latter closes issue \href{https://github.com/boost-R/gamboostLSS/issues/27}{#27}. \item Updated URL in \file{DESCRIPTION}. + \item Adapt \code{mboostLSS_fit()} such that it can be called by \code{FDboost::FDboostLSS()}. } } } diff --git a/man/cvrisk.Rd b/man/cvrisk.Rd index 23db946..2b09eee 100644 --- a/man/cvrisk.Rd +++ b/man/cvrisk.Rd @@ -14,23 +14,28 @@ \usage{ \method{cvrisk}{mboostLSS}(object, folds = cv(model.weights(object)), grid = make.grid(mstop(object)), papply = mclapply, - trace = TRUE, fun = NULL, ...) + trace = TRUE, mc.preschedule = FALSE, fun = NULL, ...) make.grid(max, length.out = 10, min = NULL, log = TRUE, dense_mu_grid = TRUE) + +\method{cvrisk}{nc_mboostLSS}(object, folds = cv(model.weights(object)), + grid = 1:sum(mstop(object)), papply = mclapply, + trace = TRUE, mc.preschedule = FALSE, fun = NULL, ...) \method{plot}{cvriskLSS}(x, type = c("heatmap", "lines"), xlab = NULL, ylab = NULL, ylim = range(x), main = attr(x, "type"), ...) -\method{plot}{nc_cvriskLSS}(x, type = "lines", - xlab = "Number of boosting iterations", ylab = NULL, +\method{plot}{nc_cvriskLSS}(x, xlab = "Number of boosting iterations", ylab = NULL, ylim = range(x), main = attr(x, "type"), ...) } \arguments{ \item{object}{ - an object of class \code{mboostLSS}, i.e., a boosted GAMLSS model. + an object of class \code{mboostLSS} (i.e., a boosted GAMLSS model with + \code{method = "cyclic"}) or class \code{nc_mboostLSS} (i.e., a boosted + GAMLSS model with \code{method = "noncyclic"}) } \item{folds}{ a weight matrix with number of rows equal to the number of @@ -60,6 +65,10 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, should status information beein printed during cross-validation? Default: \code{TRUE}. } + \item{mc.preschedule}{ + preschedule tasks if are parallelized using \code{\link{mclapply}} + (default: \code{FALSE})? For details see \code{\link{mclapply}}. + } \item{fun}{ if \code{fun} is NULL, the out-of-sample risk is returned. \code{fun}, as a function of \code{object}, may extract any other characteristic @@ -96,8 +105,8 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, computational costs. For details see examples. } \item{x}{ - an object of class \code{cvriskLSS}, which results from running - \code{cvrisk}. + an object of class \code{cvriskLSS} (cyclic fitting) or \code{nc_cvriskLSS} + (non-cyclic fitting), which results from running \code{cvrisk}. } \item{type}{ should \code{"lines"} or a \code{"heatmap"} (default) be plotted? @@ -132,15 +141,17 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, fold to fold. The heatmap shows only the average risk but in a nicer fashion. - For the \code{method = "noncyclic"} only the line plot for exists. + For the \code{method = "noncyclic"} only the line plot exists. Hofner et al. (2016) provide a detailed description of cross-validation for \code{\link{gamboostLSS}} models and show a - worked example. + worked example. Thomas et al. (2018) compare cross-validation for the + the cyclic and non-cyclic boosting approach and provide worked examples. } \value{ - An object of class \code{cvriskLSS} (when \code{fun} wasn't - specified), basically a matrix containing estimates of the empirical + An object of class \code{cvriskLSS} or \code{nc_cvriskLSS} for cyclic and + non-cyclic fitting, respectively, (when \code{fun} wasn't specified); + Basically a matrix containing estimates of the empirical risk for a varying number of bootstrap iterations. \code{plot} and \code{print} methods are available as well as an \code{mstop} method. } @@ -150,6 +161,13 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, Journal of Statistical Software, 74(1), 1-31. Available as \code{vignette("gamboostLSS_Tutorial")}. + + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + \emph{Statistics and Computing}. 28: 673-687. + DOI \href{http://dx.doi.org/10.1007/s11222-017-9754-6}{10.1007/s11222-017-9754-6}\cr + (Preliminary version: \url{http://arxiv.org/abs/1611.10171}). } \seealso{ \code{\link{cvrisk.mboost}} and \code{\link{cv}} (both in package diff --git a/man/gamboostLSS-package.Rd b/man/gamboostLSS-package.Rd index 02a437f..f65a403 100644 --- a/man/gamboostLSS-package.Rd +++ b/man/gamboostLSS-package.Rd @@ -15,7 +15,11 @@ additive models for location, scale and shape). For information on GAMLSS theory see Rigby and Stasinopoulos (2005), or the information provided at \url{http://gamlss.org}. For a tutorial on - \code{\link{gamboostLSS}} see Hofner et al. (2015). + \code{\link{gamboostLSS}} see Hofner et al. (2015). Thomas et al. (2018) + developed a novel non-cyclic approach to fit \code{\link{gamboostLSS}} models. + This approach is suitable for the combination with \code{\link{stabsel}} and + speeds up model tuning via \code{\link{cvrisk}}. + The fitting methods \code{\link{glmboostLSS}} and \code{\link{gamboostLSS}}, are alternatives for the algorithms @@ -45,7 +49,7 @@ \author{ Benjamin Hofner, Andreas Mayr, Nora Fenske, Janek Thomas, Matthias Schmid - Maintainer: Benjamin Hofner + Maintainer: Benjamin Hofner } \references{ @@ -83,6 +87,13 @@ Hothorn, T., Buehlmann, P., Kneib, T., Schmid, M. and Hofner, B. (2015). mboost: Model-based boosting. R package version 2.4-2. \url{https://CRAN.R-project.org/package=mboost} + + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + \emph{Statistics and Computing}. 28: 673-687. + DOI \href{http://dx.doi.org/10.1007/s11222-017-9754-6}{10.1007/s11222-017-9754-6}\cr + (Preliminary version: \url{http://arxiv.org/abs/1611.10171}). } \keyword{ package } diff --git a/man/mboostLSS.Rd b/man/mboostLSS.Rd index 348e0a3..b269d67 100644 --- a/man/mboostLSS.Rd +++ b/man/mboostLSS.Rd @@ -10,10 +10,15 @@ } \description{ Functions for fitting GAMLSS (generalized additive models for - location, scale and shape) using boosting techniques. The algorithm + location, scale and shape) using boosting techniques. + Two algorithms are implemented: (a) The cyclic algorithm iteratively rotates between the distribution parameters, updating one while using the current fits of the others as offsets (for details see Mayr et - al., 2012). + al., 2012). + (b) The noncyclic algorithm selects in each step the update of a base-learner + for the distribution parameter that best fits the negative gradient + (algorithm with inner loss of Thomas et al., 2018). + } \usage{ mboostLSS(formula, data = list(), families = GaussianLSS(), @@ -50,6 +55,10 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), \item{control}{ a list of parameters controlling the algorithm. For more details see \code{\link{boost_control}}. } \item{weights}{ a numeric vector of weights (optional). } + \item{method}{ fitting method, currently two methods are supported: + \code{"cyclic"} (see Mayr et al., 2012) and \code{"noncyclic"} + (algorithm with inner loss of Thomas et al., 2018). + The latter requires a one dimensional \code{mstop} value.} \item{fun}{ fit function. Either \code{\link{mboost}}, \code{\link{glmboost}}, \code{\link{gamboost}} or \code{\link{blackboost}}. Specified directly via the corresponding LSS @@ -61,9 +70,6 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), \item{call}{ used to forward the call from \code{mboostLSS}, \code{glmboostLSS}, \code{gamboostLSS} and \code{blackboostLSS}. This argument should not be directly specified by users!} - \item{method}{ fitting method, currently two methods are supported: - \code{"cyclic"} and \code{"noncyclic"}. The latter two requires a one dimensional \code{mstop} - value.} \item{\dots}{Further arguments to be passed to \code{mboostLSS_fit}. In \code{mboostLSS_fit}, \code{\dots} represent further arguments to be passed to \code{\link{mboost}} and \code{\link{mboost_fit}}. So @@ -74,7 +80,10 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), For information on GAMLSS theory see Rigby and Stasinopoulos (2005) or the information provided at \url{http://gamlss.org}. For a tutorial on - \code{\link{gamboostLSS}} see Hofner et al. (2016). + \code{\link{gamboostLSS}} see Hofner et al. (2016). Thomas et al. (2018) + developed a novel non-cyclic approach to fit gamboostLSS models. This approach + is suitable for the combination with \code{\link{stabsel}} and speeds up + model tuning via \code{\link{cvrisk}} (see also below). \code{glmboostLSS} uses \code{\link[mboost]{glmboost}} to fit the distribution parameters of a GAMLSS -- a linear boosting model is @@ -85,7 +94,7 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), default with smooth effects) is fitted for each parameter. With the \code{formula} argument, a wide range of different base-learners can be specified (see \code{\link[mboost]{baselearners}}). The - base-learners inply the type of effect each covariate has on the + base-learners imply the type of effect each covariate has on the corresponding distribution parameter. \code{mboostLSS} uses \code{\link[mboost]{mboost}} to fit the @@ -100,14 +109,15 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), \code{blackboostLSS} all call \code{mboostLSS_fit} while \code{fun} is the corresponding \code{\link{mboost}} function, i.e., the same function without \code{LSS}. For further possible arguments see - these functions as well as \code{\link{mboost_fit}}. + these functions as well as \code{\link{mboost_fit}}. + Note that \code{mboostLSS_fit} is usually not called directly by the user. For \code{method = "cyclic"} it is possible to specify one or multiple \code{mstop} and \code{nu} values via \code{\link{boost_control}}. In the case of one single value, this value is used for all distribution parameters of the GAMLSS model. Alternatively, a (named) vector or a (named) list with separate values - for each component can be used to specify a seperate value for each + for each component can be used to specify a separate value for each parameter of the GAMLSS model. The names of the list must correspond to the names of the distribution parameters of the GAMLSS family. If no names are given, the order of the \code{mstop} or \code{nu} values @@ -115,13 +125,14 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), \code{families}. For one-dimensional stopping, the user therefore can specify, e.g., \code{mstop = 100} via \code{\link{boost_control}}. For more-dimensional stopping, one can specify, e.g., \code{mstop = - list(mu = 100, sigma = 200)} (see examples). + list(mu = 100, sigma = 200)} (see examples). If \code{method} is set to \code{"noncyclic"}, \code{mstop} has to be a one dimensional integer. Instead of cycling through all distribution - parameters, in each iteration only the best baselearner is used. One baselearner of every - parameter is selected via RSS, the distribution parameter is then chosen via the loss. - For details on the noncyclic fitting method see Thomas et. al. (2016). + parameters, in each iteration only the best base-learner is used. One base-learner of every + parameter is selected via RSS, the distribution parameter is then chosen via the loss + (in Thomas et. al., 2018, called inner loss). + For details on the noncyclic fitting method see Thomas et. al. (2018). To (potentially) stabilize the model estimation by standardizing the negative gradients one can use the argument \code{stabilization} of @@ -129,8 +140,13 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), } \value{ - An object of class \code{mboostLSS} with corresponding methods to - extract information. + An object of class \code{mboostLSS} or \code{nc_mboostLSS} (inheriting from + class \code{mboostLSS}) for models fitted with \code{method = "cyclic"} + and \code{method = "non-cyclic"}, respectively, with corresponding methods to + extract information. A \code{mboostLSS} model object is a named list + with one list entry for each modelled distribution parameter. + Special "subclasses" inheriting from \code{mboostLSS} exist for each of the + model-types (with the same name as the function, e.g., \code{gamboostLSS}). } \references{ B. Hofner, A. Mayr, M. Schmid (2016). gamboostLSS: An R Package for @@ -156,9 +172,12 @@ Statistical Society, Series C (Applied Statistics), 54, 507-554. Buehlmann, P. and Hothorn, T. (2007), Boosting algorithms: Regularization, prediction and model fitting. Statistical Science, 22(4), 477--505. -Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016), -Stability selection for component-wise gradient boosting in multiple dimensions. -arXiv preprint arXiv:1611.10171. + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + \emph{Statistics and Computing}. 28: 673-687. + DOI \href{http://dx.doi.org/10.1007/s11222-017-9754-6}{10.1007/s11222-017-9754-6}\cr + (Preliminary version: \url{http://arxiv.org/abs/1611.10171}). } \seealso{ diff --git a/man/methods.Rd b/man/methods.Rd index 7c63c02..a48eec5 100644 --- a/man/methods.Rd +++ b/man/methods.Rd @@ -8,6 +8,7 @@ \alias{risk} \alias{risk.mboostLSS} +\alias{risk.nc_mboostLSS} \alias{[.mboostLSS} diff --git a/man/stabsel.mboostLSS.Rd b/man/stabsel.mboostLSS.Rd index 84800a0..833dca6 100644 --- a/man/stabsel.mboostLSS.Rd +++ b/man/stabsel.mboostLSS.Rd @@ -68,8 +68,13 @@ } \details{ - For details see \code{\link[stabs]{stabsel}} in package \pkg{stabs} - and Hofner et al. (2014). + Stability selection is to be preferably used with non-cyclic \code{\link{gamboostLSS}} + models, as proposed by Thomas et al. (2018). In this publication, the combination + of package \pkg{gamboostLSS} with stability selection was devoloped and is + investigated in depth. + + For details on stability selection see \code{\link[stabs]{stabsel}} in package + \pkg{stabs} and Hofner et al. (2014). } \value{ @@ -89,10 +94,9 @@ } \references{ - B. Hofner, L. Boccuto and M. Goeker (2014), + B. Hofner, L. Boccuto and M. Goeker (2015), Controlling false discoveries in high-dimensional situations: Boosting - with stability selection. \emph{Technical Report}, arXiv:1411.1285.\cr - \url{http://arxiv.org/abs/1411.1285}. + with stability selection. \emph{BMC Bioinformatics}, \bold{16:144}. N. Meinshausen and P. Buehlmann (2010), Stability selection. \emph{Journal of the Royal Statistical Society, Series B}, @@ -101,7 +105,13 @@ R.D. Shah and R.J. Samworth (2013), Variable selection with error control: another look at stability selection. \emph{Journal of the Royal Statistical Society, Series B}, \bold{75}, 55--80. - + + Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), + Gradient boosting for distributional regression - faster tuning and improved + variable selection via noncyclical updates. + \emph{Statistics and Computing}. 28: 673-687. + DOI \href{http://dx.doi.org/10.1007/s11222-017-9754-6}{10.1007/s11222-017-9754-6}\cr + (Preliminary version: \url{http://arxiv.org/abs/1611.10171}). } \seealso{ \code{\link[stabs]{stabsel}} and diff --git a/tests/Examples/gamboostLSS-Ex.Rout.save b/tests/Examples/gamboostLSS-Ex.Rout.save index 356229c..855c5a2 100644 --- a/tests/Examples/gamboostLSS-Ex.Rout.save +++ b/tests/Examples/gamboostLSS-Ex.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -20,6 +20,7 @@ Type 'q()' to quit R. > pkgname <- "gamboostLSS" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) +> options(pager = "console") > base::assign(".ExTimings", "gamboostLSS-Ex.timings", pos = 'CheckExEnv') > base::cat("name\tuser\tsystem\telapsed\n", file=base::get(".ExTimings", pos = 'CheckExEnv')) > base::assign(".format_ptime", @@ -36,13 +37,14 @@ Type 'q()' to quit R. Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See ‘package?mboost’ and ‘news(package = "mboost")’ +Warning: package 'stabs' was built under R version 3.4.4 +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. -Attaching package: ‘gamboostLSS’ +Attaching package: 'gamboostLSS' -The following object is masked from ‘package:stats’: +The following object is masked from 'package:stats': model.weights @@ -81,11 +83,12 @@ The following object is masked from ‘package:stats’: + glmss <- glmboostLSS(y ~ x, families = as.families(NO())) + } Loading required package: gamlss.dist +Warning: package 'gamlss.dist' was built under R version 3.4.4 Loading required package: MASS -Attaching package: ‘gamlss.dist’ +Attaching package: 'gamlss.dist' -The following object is masked from ‘package:mboost’: +The following object is masked from 'package:mboost': Family @@ -102,10 +105,12 @@ $sigma > > ## compare to gamlss > library(gamlss) +Warning: package 'gamlss' was built under R version 3.4.4 Loading required package: splines Loading required package: gamlss.data +Warning: package 'gamlss.data' was built under R version 3.4.4 Loading required package: nlme - ********** GAMLSS Version 4.3-8 ********** + ********** GAMLSS Version 5.1-0 ********** For more on GAMLSS look at http://www.gamlss.org/ Type gamlssNews() to see new features/changes/bug fixes. @@ -127,8 +132,8 @@ GAMLSS-RS iteration 3: Global Deviance = 3844.149 > base::cat("as.families", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() -detaching ‘package:gamlss’, ‘package:nlme’, ‘package:gamlss.data’, - ‘package:splines’, ‘package:gamlss.dist’, ‘package:MASS’ +detaching 'package:gamlss', 'package:nlme', 'package:gamlss.data', + 'package:splines', 'package:gamlss.dist', 'package:MASS' > nameEx("cvrisk") > ### * cvrisk @@ -138,7 +143,8 @@ detaching ‘package:gamlss’, ‘package:nlme’, ‘package:gamlss.data’, > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: cvrisk.mboostLSS > ### Title: Cross-Validation -> ### Aliases: cvrisk cvrisk.mboostLSS make.grid plot.cvriskLSS +> ### Aliases: cvrisk cvrisk.mboostLSS cvrisk.nc_mboostLSS make.grid +> ### plot.cvriskLSS plot.nc_cvriskLSS > ### Keywords: models regression > > ### ** Examples @@ -167,2643 +173,7 @@ detaching ‘package:gamlss’, ‘package:nlme’, ‘package:gamlss.data’, > grid <- make.grid(mstop(model), length.out = 5, dense_mu_grid = FALSE) > plot(grid) > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### a tiny toy example (5-fold bootsrap with maximum stopping value 100) -> ## (to run it on multiple cores of a Linux or Mac OS computer remove -> ## set papply = mclapply (default) and set mc.nodes to the -> ## appropriate number of nodes) -> cvr <- cvrisk(model, folds = cv(model.weights(model), B = 5), -+ papply = lapply, grid = grid) -Starting cross-validation... -[fold] [current mstop] - [1] [1,1] - [1] [3,1] - [1] [10,1] - [1] [32,1] - [1] [100,1] - [1] [1,3] - [1] [3,3] - [1] [10,3] - [1] [32,3] - [1] [100,3] - [1] [1,10] - [1] [3,10] - [1] [10,10] - [1] [32,10] - [1] [100,10] - [1] [1,32] - [1] [3,32] - [1] [10,32] - [1] [32,32] - [1] [100,32] - [1] [1,100] - [1] [3,100] - [1] [10,100] - [1] [32,100] - [1] [100,100] - [2] [1,1] - [2] [3,1] - [2] [10,1] - [2] [32,1] - [2] [100,1] - [2] [1,3] - [2] [3,3] - [2] [10,3] - [2] [32,3] - [2] [100,3] - [2] [1,10] - [2] [3,10] - [2] [10,10] - [2] [32,10] - [2] [100,10] - [2] [1,32] - [2] [3,32] - [2] [10,32] - [2] [32,32] - [2] [100,32] - [2] [1,100] - [2] [3,100] - [2] [10,100] - [2] [32,100] - [2] [100,100] - [3] [1,1] - [3] [3,1] - [3] [10,1] - [3] [32,1] - [3] [100,1] - [3] [1,3] - [3] [3,3] - [3] [10,3] - [3] [32,3] - [3] [100,3] - [3] [1,10] - [3] [3,10] - [3] [10,10] - [3] [32,10] - [3] [100,10] - [3] [1,32] - [3] [3,32] - [3] [10,32] - [3] [32,32] - [3] [100,32] - [3] [1,100] - [3] [3,100] - [3] [10,100] - [3] [32,100] - [3] [100,100] - [4] [1,1] - [4] [3,1] - [4] [10,1] - [4] [32,1] - [4] [100,1] - [4] [1,3] - [4] [3,3] - [4] [10,3] - [4] [32,3] - [4] [100,3] - [4] [1,10] - [4] [3,10] - [4] [10,10] - [4] [32,10] - [4] [100,10] - [4] [1,32] - [4] [3,32] - [4] [10,32] - [4] [32,32] - [4] [100,32] - [4] [1,100] - [4] [3,100] - [4] [10,100] - [4] [32,100] - [4] [100,100] - [5] [1,1] - [5] [3,1] - [5] [10,1] - [5] [32,1] - [5] [100,1] - [5] [1,3] - [5] [3,3] - [5] [10,3] - [5] [32,3] - [5] [100,3] - [5] [1,10] - [5] [3,10] - [5] [10,10] - [5] [32,10] - [5] [100,10] - [5] [1,32] - [5] [3,32] - [5] [10,32] - [5] [32,32] - [5] [100,32] - [5] [1,100] - [5] [3,100] - [5] [10,100] - [5] [32,100] - [5] [100,100] -> cvr - - Cross-validated risk - glmboostLSS(formula = y ~ ., data = dat, families = NBinomialLSS(), control = boost_control(mstop = 100), center = TRUE) - - 1,1 3,1 10,1 32,1 100,1 1,3 3,3 10,3 -3.302710 3.289003 3.254355 3.201048 3.132957 3.299061 3.285124 3.249949 - 32,3 100,3 1,10 3,10 10,10 32,10 100,10 1,32 -3.195998 3.128614 3.292346 3.277996 3.240225 3.182837 3.117276 3.289440 - 3,32 10,32 32,32 100,32 1,100 3,100 10,100 32,100 -3.273384 3.228145 3.151754 3.088634 3.290803 3.272903 3.222363 3.120687 - 100,100 -3.020682 - - Optimal number of boosting iterations: 100 100 -> ## plot the results -> par(mfrow = c(1, 2)) -> plot(cvr) -> plot(cvr, type = "lines") -> ## extract optimal mstop (here: grid to small) -> mstop(cvr) - mu sigma - 100 100 -> ### END (don't test automatically) -> ## End(No test) -> -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### a more realistic example -> grid <- make.grid(c(mu = 400, sigma = 400), dense_mu_grid = FALSE) -> plot(grid) -> cvr <- cvrisk(model, grid = grid) -Starting cross-validation... -[fold] [current mstop] - [2] [1,1] - [2] [2,1] - [2] [4,1] - [2] [7,1] - [2] [14,1] - [1] [1,1] - [1] [2,1] - [1] [4,1] - [1] [7,1] - [2] [28,1] - [1] [14,1] - [1] [28,1] - [2] [54,1] - [1] [54,1] - [2] [106,1] - [1] [106,1] - [2] [206,1] - [1] [206,1] - [2] [400,1] - [2] [1,2] - [2] [2,2] - [2] [4,2] - [2] [7,2] - [2] [14,2] - [1] [400,1] - [1] [1,2] - [1] [2,2] - [2] [28,2] - [1] [4,2] - [1] [7,2] - [1] [14,2] - [2] [54,2] - [1] [28,2] - [1] [54,2] - [2] [106,2] - [1] [106,2] - [2] [206,2] - [1] [206,2] - [2] [400,2] - [2] [1,4] - [2] [2,4] - [2] [4,4] - [2] [7,4] - [2] [14,4] - [2] [28,4] - [2] [54,4] - [1] [400,2] - [1] [1,4] - [1] [2,4] - [1] [4,4] - [1] [7,4] - [1] [14,4] - [2] [106,4] - [1] [28,4] - [1] [54,4] - [1] [106,4] - [2] [206,4] - [1] [206,4] - [2] [400,4] - [2] [1,7] - [2] [2,7] - [2] [4,7] - [2] [7,7] - [2] [14,7] - [2] [28,7] - [1] [400,4] - [2] [54,7] - [1] [1,7] - [1] [2,7] - [1] [4,7] - [1] [7,7] - [1] [14,7] - [2] [106,7] - [1] [28,7] - [1] [54,7] - [2] [206,7] - [1] [106,7] - [1] [206,7] - [2] [400,7] - [2] [1,14] - [2] [2,14] - [2] [4,14] - [2] [7,14] - [2] [14,14] - [2] [28,14] - [2] [54,14] - [1] [400,7] - [1] [1,14] - [1] [2,14] - [1] [4,14] - [1] [7,14] - [1] [14,14] - [1] [28,14] - [2] [106,14] - [1] [54,14] - [2] [206,14] - [1] [106,14] - [1] [206,14] - [2] [400,14] - [2] [1,28] - [2] [2,28] - [2] [4,28] - [1] [400,14] - [2] [7,28] - [1] [1,28] - [2] [14,28] - [1] [2,28] - [2] [28,28] - [1] [4,28] - [2] [54,28] - [1] [7,28] - [2] [106,28] - [1] [14,28] - [1] [28,28] - [1] [54,28] - [2] [206,28] - [1] [106,28] - [1] [206,28] - [2] [400,28] - [2] [1,54] - [1] [400,28] - [2] [2,54] - [1] [1,54] - [2] [4,54] - [1] [2,54] - [2] [7,54] - [1] [4,54] - [1] [7,54] - [2] [14,54] - [1] [14,54] - [2] [28,54] - [1] [28,54] - [2] [54,54] - [1] [54,54] - [2] [106,54] - [1] [106,54] - [2] [206,54] - [1] [206,54] - [2] [400,54] - [1] [400,54] - [2] [1,106] - [1] [1,106] - [2] [2,106] - [1] [2,106] - [2] [4,106] - [1] [4,106] - [2] [7,106] - [1] [7,106] - [2] [14,106] - [1] [14,106] - [2] [28,106] - [1] [28,106] - [1] [54,106] - [2] [54,106] - [1] [106,106] - [2] [106,106] - [1] [206,106] - [2] [206,106] - [1] [400,106] - [2] [400,106] - [1] [1,206] - [2] [1,206] - [1] [2,206] - [2] [2,206] - [2] [4,206] - [1] [4,206] - [1] [7,206] - [2] [7,206] - [2] [14,206] - [1] [14,206] - [2] [28,206] - [1] [28,206] - [2] [54,206] - [1] [54,206] - [2] [106,206] - [1] [106,206] - [2] [206,206] - [2] [400,206] - [1] [206,206] - [1] [400,206] - [2] [1,400] - [1] [1,400] - [2] [2,400] - [1] [2,400] - [2] [4,400] - [1] [4,400] - [2] [7,400] - [1] [7,400] - [1] [14,400] - [2] [14,400] - [1] [28,400] - [2] [28,400] - [1] [54,400] - [2] [54,400] - [1] [106,400] - [2] [106,400] - [1] [206,400] - [2] [206,400] - [1] [400,400] - [3] [1,1] - [3] [2,1] - [3] [4,1] - [3] [7,1] - [3] [14,1] - [3] [28,1] - [3] [54,1] - [3] [106,1] - [2] [400,400] - [4] [1,1] - [4] [2,1] - [4] [4,1] - [4] [7,1] - [4] [14,1] - [4] [28,1] - [4] [54,1] - [4] [106,1] - [3] [400,1] - [3] [1,2] - [3] [2,2] - [3] [4,2] - [3] [7,2] - [3] [14,2] - [3] [28,2] - [4] [206,1] - [3] [54,2] - [3] [106,2] - [3] [206,2] - [4] [400,1] - [4] [1,2] - [4] [2,2] - [4] [4,2] - [4] [7,2] - [4] [14,2] - [4] [28,2] - [4] [54,2] - [4] [106,2] - [3] [400,2] - [3] [1,4] - [3] [2,4] - [3] [4,4] - [3] [7,4] - [3] [14,4] - [3] [28,4] - [4] [206,2] - [3] [54,4] - [3] [106,4] - [4] [400,2] - [4] [1,4] - [4] [2,4] - [4] [4,4] - [4] [7,4] - [4] [14,4] - [4] [28,4] - [4] [54,4] - [4] [106,4] - [4] [206,4] - [3] [400,4] - [3] [1,7] - [3] [2,7] - [3] [4,7] - [3] [7,7] - [3] [14,7] - [3] [28,7] - [3] [54,7] - [3] [106,7] - [4] [400,4] - [4] [1,7] - [3] [206,7] - [4] [2,7] - [4] [4,7] - [4] [7,7] - [4] [14,7] - [4] [28,7] - [4] [54,7] - [4] [106,7] - [3] [400,7] - [3] [1,14] - [3] [2,14] - [3] [4,14] - [3] [7,14] - [4] [206,7] - [3] [14,14] - [3] [28,14] - [3] [54,14] - [3] [106,14] - [4] [400,7] - [3] [206,14] - [4] [1,14] - [4] [2,14] - [4] [4,14] - [4] [7,14] - [4] [14,14] - [4] [28,14] - [4] [54,14] - [3] [400,14] - [4] [106,14] - [3] [1,28] - [3] [2,28] - [3] [4,28] - [4] [206,14] - [3] [7,28] - [3] [14,28] - [3] [28,28] - [3] [54,28] - [3] [106,28] - [3] [206,28] - [4] [400,14] - [4] [1,28] - [4] [2,28] - [4] [4,28] - [4] [7,28] - [3] [400,28] - [4] [14,28] - [4] [28,28] - [4] [54,28] - [3] [1,54] - [4] [106,28] - [3] [2,54] - [4] [206,28] - [3] [4,54] - [3] [7,54] - [3] [14,54] - [4] [400,28] - [3] [28,54] - [4] [1,54] - [3] [54,54] - [4] [2,54] - [3] [106,54] - [4] [4,54] - [4] [7,54] - [3] [206,54] - [4] [14,54] - [4] [28,54] - [4] [54,54] - [3] [400,54] - [4] [106,54] - [4] [206,54] - [3] [1,106] - [3] [2,106] - [4] [400,54] - [3] [4,106] - [4] [1,106] - [3] [7,106] - [4] [2,106] - [3] [14,106] - [4] [4,106] - [3] [28,106] - [4] [7,106] - [3] [54,106] - [3] [106,106] - [4] [14,106] - [3] [206,106] - [4] [28,106] - [3] [400,106] - [4] [54,106] - [4] [106,106] - [4] [206,106] - [3] [1,206] - [4] [400,106] - [3] [2,206] - [4] [1,206] - [3] [4,206] - [4] [2,206] - [3] [7,206] - [4] [4,206] - [3] [14,206] - [4] [7,206] - [3] [28,206] - [4] [14,206] - [3] [54,206] - [4] [28,206] - [3] [106,206] - [4] [54,206] - [3] [206,206] - [4] [106,206] - [3] [400,206] - [4] [206,206] - [4] [400,206] - [3] [1,400] - [4] [1,400] - [3] [2,400] - [4] [2,400] - [3] [4,400] - [4] [4,400] - [3] [7,400] - [4] [7,400] - [3] [14,400] - [4] [14,400] - [3] [28,400] - [4] [28,400] - [3] [54,400] - [4] [54,400] - [3] [106,400] - [4] [106,400] - [3] [206,400] - [3] [400,400] - [5] [1,1] - [5] [2,1] - [5] [4,1] - [5] [7,1] - [5] [14,1] - [5] [28,1] - [4] [206,400] - [5] [54,1] - [5] [106,1] - [5] [206,1] - [5] [400,1] - [5] [1,2] - [5] [2,2] - [5] [4,2] - [5] [7,2] - [5] [14,2] - [4] [400,400] - [5] [28,2] - [6] [1,1] - [6] [2,1] - [6] [4,1] - [6] [7,1] - [6] [14,1] - [5] [54,2] - [6] [28,1] - [5] [106,2] - [6] [54,1] - [6] [106,1] - [5] [206,2] - [6] [206,1] - [5] [400,2] - [5] [1,4] - [5] [2,4] - [5] [4,4] - [5] [7,4] - [6] [400,1] - [6] [1,2] - [6] [2,2] - [6] [4,2] - [6] [7,2] - [6] [14,2] - [5] [54,4] - [6] [28,2] - [6] [54,2] - [6] [106,2] - [5] [106,4] - [6] [206,2] - [5] [206,4] - [6] [400,2] - [6] [1,4] - [6] [2,4] - [6] [4,4] - [6] [7,4] - [5] [400,4] - [5] [1,7] - [5] [2,7] - [6] [14,4] - [5] [4,7] - [5] [7,7] - [5] [14,7] - [5] [28,7] - [6] [54,4] - [5] [54,7] - [6] [106,4] - [5] [106,7] - [6] [206,4] - [5] [206,7] - [6] [400,4] - [6] [1,7] - [6] [2,7] - [5] [400,7] - [6] [4,7] - [5] [1,14] - [6] [7,7] - [6] [14,7] - [5] [2,14] - [6] [28,7] - [5] [4,14] - [5] [7,14] - [6] [54,7] - [5] [14,14] - [5] [28,14] - [6] [106,7] - [5] [54,14] - [5] [106,14] - [6] [206,7] - [5] [206,14] - [6] [400,7] - [6] [1,14] - [6] [2,14] - [6] [4,14] - [6] [7,14] - [6] [14,14] - [5] [400,14] - [6] [28,14] - [6] [54,14] - [5] [1,28] - [5] [2,28] - [6] [106,14] - [5] [4,28] - [5] [7,28] - [5] [14,28] - [5] [28,28] - [6] [206,14] - [5] [54,28] - [5] [106,28] - [5] [206,28] - [6] [400,14] - [6] [1,28] - [6] [2,28] - [6] [4,28] - [5] [400,28] - [6] [7,28] - [6] [14,28] - [5] [1,54] - [6] [28,28] - [5] [2,54] - [6] [54,28] - [6] [106,28] - [5] [4,54] - [6] [206,28] - [5] [7,54] - [5] [14,54] - [5] [28,54] - [5] [54,54] - [6] [400,28] - [5] [106,54] - [6] [1,54] - [6] [2,54] - [5] [206,54] - [6] [4,54] - [6] [7,54] - [5] [400,54] - [6] [14,54] - [6] [28,54] - [6] [54,54] - [5] [1,106] - [6] [106,54] - [6] [206,54] - [5] [2,106] - [5] [4,106] - [6] [400,54] - [5] [7,106] - [6] [1,106] - [5] [14,106] - [6] [2,106] - [5] [28,106] - [5] [54,106] - [6] [4,106] - [5] [106,106] - [6] [7,106] - [5] [206,106] - [6] [14,106] - [5] [400,106] - [6] [28,106] - [6] [54,106] - [6] [106,106] - [5] [1,206] - [6] [206,106] - [6] [400,106] - [5] [2,206] - [6] [1,206] - [5] [4,206] - [6] [2,206] - [5] [7,206] - [6] [4,206] - [5] [14,206] - [6] [7,206] - [5] [28,206] - [6] [14,206] - [5] [54,206] - [6] [28,206] - [5] [106,206] - [6] [54,206] - [5] [206,206] - [6] [106,206] - [5] [400,206] - [6] [206,206] - [6] [400,206] - [5] [1,400] - [6] [1,400] - [5] [2,400] - [6] [2,400] - [5] [4,400] - [6] [4,400] - [5] [7,400] - [6] [7,400] - [5] [14,400] - [6] [14,400] - [5] [28,400] - [6] [28,400] - [5] [54,400] - [6] [54,400] - [5] [106,400] - [6] [106,400] - [5] [206,400] - [5] [400,400] - [7] [1,1] - [7] [2,1] - [7] [4,1] - [7] [7,1] - [7] [14,1] - [6] [206,400] - [7] [28,1] - [7] [54,1] - [7] [106,1] - [7] [206,1] - [6] [400,400] - [8] [1,1] - [8] [2,1] - [8] [4,1] - [8] [7,1] - [8] [14,1] - [8] [28,1] - [7] [400,1] - [7] [1,2] - [7] [2,2] - [7] [4,2] - [7] [7,2] - [7] [14,2] - [8] [54,1] - [7] [28,2] - [7] [54,2] - [7] [106,2] - [8] [106,1] - [7] [206,2] - [8] [206,1] - [7] [400,2] - [8] [400,1] - [8] [1,2] - [8] [2,2] - [8] [4,2] - [7] [4,4] - [7] [7,4] - [7] [14,4] - [8] [28,2] - [7] [28,4] - [8] [54,2] - [7] [54,4] - [8] [106,2] - [7] [106,4] - [8] [206,2] - [7] [206,4] - [8] [400,2] - [8] [1,4] - [7] [400,4] - [8] [2,4] - [8] [4,4] - [7] [1,7] - [8] [7,4] - [8] [14,4] - [7] [2,7] - [7] [4,7] - [8] [28,4] - [7] [7,7] - [7] [14,7] - [7] [28,7] - [7] [54,7] - [8] [106,4] - [7] [106,7] - [8] [206,4] - [7] [206,7] - [8] [400,4] - [8] [1,7] - [8] [2,7] - [8] [4,7] - [8] [7,7] - [8] [14,7] - [8] [28,7] - [8] [54,7] - [7] [400,7] - [7] [1,14] - [8] [106,7] - [7] [2,14] - [7] [4,14] - [7] [7,14] - [7] [14,14] - [7] [28,14] - [7] [54,14] - [8] [206,7] - [7] [106,14] - [7] [206,14] - [8] [400,7] - [8] [1,14] - [8] [2,14] - [8] [4,14] - [8] [7,14] - [8] [14,14] - [8] [28,14] - [8] [54,14] - [7] [400,14] - [8] [106,14] - [7] [1,28] - [7] [2,28] - [7] [4,28] - [8] [206,14] - [7] [7,28] - [7] [14,28] - [7] [28,28] - [7] [54,28] - [7] [106,28] - [8] [400,14] - [7] [206,28] - [8] [1,28] - [8] [2,28] - [8] [4,28] - [8] [7,28] - [8] [14,28] - [8] [28,28] - [8] [54,28] - [7] [400,28] - [8] [106,28] - [7] [1,54] - [8] [206,28] - [7] [2,54] - [7] [4,54] - [7] [7,54] - [8] [400,28] - [7] [14,54] - [8] [1,54] - [7] [28,54] - [7] [54,54] - [8] [2,54] - [7] [106,54] - [8] [4,54] - [7] [206,54] - [8] [7,54] - [8] [14,54] - [8] [28,54] - [8] [54,54] - [8] [106,54] - [7] [400,54] - [8] [206,54] - [7] [1,106] - [7] [2,106] - [8] [400,54] - [7] [4,106] - [8] [1,106] - [7] [7,106] - [7] [14,106] - [8] [2,106] - [8] [4,106] - [7] [28,106] - [8] [7,106] - [7] [54,106] - [7] [106,106] - [8] [14,106] - [7] [206,106] - [8] [28,106] - [7] [400,106] - [8] [54,106] - [8] [106,106] - [8] [206,106] - [7] [1,206] - [8] [400,106] - [7] [2,206] - [8] [1,206] - [7] [4,206] - [8] [2,206] - [7] [7,206] - [8] [4,206] - [7] [14,206] - [8] [7,206] - [7] [28,206] - [8] [14,206] - [7] [54,206] - [8] [28,206] - [7] [106,206] - [7] [206,206] - [8] [54,206] - [7] [400,206] - [8] [106,206] - [8] [206,206] - [8] [400,206] - [7] [1,400] - [8] [1,400] - [7] [2,400] - [8] [2,400] - [7] [4,400] - [8] [4,400] - [7] [7,400] - [8] [7,400] - [7] [14,400] - [8] [14,400] - [7] [28,400] - [8] [28,400] - [7] [54,400] - [8] [54,400] - [7] [106,400] - [7] [206,400] - [8] [106,400] - [7] [400,400] - [9] [1,1] - [9] [2,1] - [9] [4,1] - [9] [7,1] - [9] [14,1] - [9] [28,1] - [9] [54,1] - [8] [206,400] - [9] [106,1] - [9] [206,1] - [9] [400,1] - [9] [1,2] - [9] [2,2] - [9] [4,2] - [9] [7,2] - [9] [14,2] - [9] [28,2] - [8] [400,400] - [10] [1,1] - [10] [2,1] - [9] [54,2] - [10] [7,1] - [10] [14,1] - [9] [106,2] - [10] [28,1] - [9] [206,2] - [10] [54,1] - [10] [106,1] - [9] [400,2] - [9] [1,4] - [9] [2,4] - [10] [206,1] - [9] [7,4] - [9] [14,4] - [9] [28,4] - [9] [54,4] - [9] [106,4] - [9] [206,4] - [10] [400,1] - [10] [1,2] - [10] [2,2] - [10] [4,2] - [10] [7,2] - [10] [14,2] - [10] [28,2] - [10] [54,2] - [10] [106,2] - [9] [400,4] - [9] [1,7] - [9] [2,7] - [9] [4,7] - [9] [7,7] - [9] [14,7] - [10] [206,2] - [9] [28,7] - [9] [54,7] - [9] [106,7] - [10] [400,2] - [10] [1,4] - [10] [2,4] - [10] [4,4] - [10] [7,4] - [10] [14,4] - [9] [206,7] - [10] [28,4] - [10] [54,4] - [10] [106,4] - [10] [206,4] - [9] [400,7] - [9] [1,14] - [9] [2,14] - [9] [4,14] - [9] [7,14] - [9] [14,14] - [9] [28,14] - [9] [54,14] - [10] [400,4] - [10] [1,7] - [10] [2,7] - [10] [4,7] - [10] [7,7] - [10] [14,7] - [10] [28,7] - [10] [54,7] - [9] [206,14] - [10] [106,7] - [10] [206,7] - [9] [400,14] - [9] [1,28] - [9] [2,28] - [9] [4,28] - [10] [400,7] - [10] [1,14] - [10] [2,14] - [9] [14,28] - [10] [4,14] - [10] [7,14] - [9] [28,28] - [10] [14,14] - [10] [28,14] - [10] [54,14] - [9] [106,28] - [10] [106,14] - [9] [206,28] - [9] [400,28] - [10] [400,14] - [10] [1,28] - [9] [1,54] - [10] [2,28] - [10] [4,28] - [10] [7,28] - [9] [2,54] - [10] [14,28] - [10] [28,28] - [10] [54,28] - [9] [4,54] - [10] [106,28] - [9] [7,54] - [10] [206,28] - [9] [14,54] - [9] [28,54] - [9] [54,54] - [10] [400,28] - [9] [106,54] - [10] [1,54] - [9] [206,54] - [10] [2,54] - [10] [4,54] - [10] [7,54] - [9] [400,54] - [10] [14,54] - [10] [28,54] - [9] [1,106] - [10] [54,54] - [10] [106,54] - [9] [2,106] - [10] [206,54] - [9] [4,106] - [10] [400,54] - [9] [7,106] - [10] [1,106] - [9] [14,106] - [10] [2,106] - [9] [28,106] - [10] [4,106] - [9] [54,106] - [9] [106,106] - [10] [7,106] - [9] [206,106] - [10] [14,106] - [9] [400,106] - [10] [28,106] - [10] [54,106] - [10] [106,106] - [9] [1,206] - [10] [206,106] - [10] [400,106] - [9] [2,206] - [10] [1,206] - [9] [4,206] - [10] [2,206] - [9] [7,206] - [10] [4,206] - [9] [14,206] - [10] [7,206] - [9] [28,206] - [10] [14,206] - [9] [54,206] - [10] [28,206] - [9] [106,206] - [10] [54,206] - [9] [206,206] - [10] [106,206] - [9] [400,206] - [10] [206,206] - [10] [400,206] - [9] [1,400] - [10] [1,400] - [9] [2,400] - [10] [2,400] - [9] [4,400] - [10] [4,400] - [9] [7,400] - [10] [7,400] - [9] [14,400] - [10] [14,400] - [9] [28,400] - [9] [54,400] - [10] [28,400] - [10] [54,400] - [9] [106,400] - [9] [206,400] - [10] [106,400] - [9] [400,400] - [11] [1,1] - [11] [2,1] - [11] [4,1] - [11] [7,1] - [11] [14,1] - [11] [28,1] - [10] [206,400] - [11] [54,1] - [11] [106,1] - [11] [206,1] - [11] [400,1] - [11] [1,2] - [11] [2,2] - [11] [4,2] - [11] [7,2] - [11] [14,2] - [11] [28,2] - [10] [400,400] - [12] [1,1] - [12] [2,1] - [12] [4,1] - [12] [7,1] - [11] [54,2] - [12] [14,1] - [12] [28,1] - [12] [54,1] - [12] [106,1] - [11] [106,2] - [11] [206,2] - [12] [206,1] - [11] [400,2] - [11] [1,4] - [11] [2,4] - [11] [4,4] - [12] [400,1] - [12] [1,2] - [12] [2,2] - [12] [4,2] - [12] [7,2] - [12] [14,2] - [11] [28,4] - [12] [28,2] - [11] [54,4] - [12] [54,2] - [12] [106,2] - [11] [106,4] - [12] [206,2] - [11] [206,4] - [11] [400,4] - [11] [1,7] - [11] [2,7] - [11] [4,7] - [11] [7,7] - [11] [14,7] - [11] [28,7] - [11] [54,7] - [12] [400,2] - [12] [1,4] - [12] [2,4] - [12] [4,4] - [12] [7,4] - [12] [14,4] - [12] [28,4] - [12] [54,4] - [12] [106,4] - [11] [206,7] - [12] [206,4] - [11] [400,7] - [11] [1,14] - [11] [2,14] - [11] [4,14] - [11] [7,14] - [12] [400,4] - [11] [14,14] - [12] [1,7] - [11] [28,14] - [12] [2,7] - [12] [4,7] - [12] [7,7] - [12] [14,7] - [11] [54,14] - [12] [28,7] - [12] [54,7] - [11] [106,14] - [12] [106,7] - [11] [206,14] - [12] [206,7] - [11] [400,14] - [11] [1,28] - [11] [2,28] - [11] [4,28] - [12] [400,7] - [12] [1,14] - [11] [7,28] - [11] [14,28] - [12] [2,14] - [11] [28,28] - [12] [4,14] - [12] [7,14] - [12] [14,14] - [12] [28,14] - [11] [54,28] - [12] [54,14] - [11] [106,28] - [12] [106,14] - [11] [206,28] - [12] [206,14] - [11] [400,28] - [12] [400,14] - [11] [1,54] - [12] [1,28] - [12] [2,28] - [12] [4,28] - [12] [7,28] - [11] [4,54] - [12] [14,28] - [12] [28,28] - [11] [7,54] - [12] [54,28] - [12] [106,28] - [11] [14,54] - [11] [28,54] - [12] [206,28] - [11] [54,54] - [11] [106,54] - [11] [206,54] - [12] [400,28] - [12] [1,54] - [12] [2,54] - [11] [400,54] - [12] [4,54] - [11] [1,106] - [12] [7,54] - [12] [14,54] - [12] [28,54] - [11] [2,106] - [12] [54,54] - [12] [106,54] - [11] [4,106] - [11] [7,106] - [12] [400,54] - [11] [14,106] - [12] [1,106] - [11] [28,106] - [12] [2,106] - [11] [54,106] - [11] [106,106] - [11] [206,106] - [12] [7,106] - [11] [400,106] - [12] [14,106] - [12] [28,106] - [11] [1,206] - [12] [54,106] - [12] [106,106] - [12] [206,106] - [11] [2,206] - [12] [400,106] - [11] [4,206] - [12] [1,206] - [11] [7,206] - [12] [2,206] - [11] [14,206] - [12] [4,206] - [11] [28,206] - [12] [7,206] - [11] [54,206] - [12] [14,206] - [11] [106,206] - [11] [206,206] - [12] [28,206] - [11] [400,206] - [12] [54,206] - [12] [106,206] - [12] [206,206] - [11] [1,400] - [12] [400,206] - [11] [2,400] - [12] [1,400] - [11] [4,400] - [12] [2,400] - [11] [7,400] - [12] [4,400] - [11] [14,400] - [12] [7,400] - [11] [28,400] - [12] [14,400] - [11] [54,400] - [12] [28,400] - [11] [106,400] - [12] [54,400] - [12] [106,400] - [11] [206,400] - [11] [400,400] - [13] [1,1] - [13] [2,1] - [13] [4,1] - [13] [7,1] - [13] [14,1] - [13] [28,1] - [13] [54,1] - [13] [106,1] - [13] [206,1] - [12] [400,400] - [14] [1,1] - [14] [2,1] - [14] [4,1] - [14] [7,1] - [14] [14,1] - [14] [28,1] - [13] [400,1] - [13] [1,2] - [13] [2,2] - [13] [4,2] - [14] [54,1] - [13] [7,2] - [13] [14,2] - [13] [28,2] - [13] [54,2] - [14] [106,1] - [13] [106,2] - [14] [206,1] - [13] [206,2] - [13] [400,2] - [13] [1,4] - [13] [2,4] - [13] [4,4] - [13] [7,4] - [13] [14,4] - [13] [28,4] - [13] [54,4] - [14] [400,1] - [14] [1,2] - [14] [2,2] - [14] [4,2] - [14] [7,2] - [14] [14,2] - [14] [28,2] - [14] [54,2] - [14] [106,2] - [13] [206,4] - [14] [206,2] - [13] [400,4] - [13] [1,7] - [13] [2,7] - [13] [4,7] - [13] [7,7] - [13] [14,7] - [13] [28,7] - [13] [54,7] - [14] [400,2] - [14] [1,4] - [14] [2,4] - [14] [4,4] - [14] [7,4] - [14] [14,4] - [13] [106,7] - [14] [28,4] - [14] [54,4] - [13] [206,7] - [14] [106,4] - [14] [206,4] - [13] [400,7] - [13] [1,14] - [13] [2,14] - [13] [4,14] - [13] [7,14] - [13] [14,14] - [13] [28,14] - [14] [400,4] - [13] [54,14] - [14] [1,7] - [14] [2,7] - [14] [4,7] - [14] [7,7] - [14] [14,7] - [13] [106,14] - [14] [28,7] - [14] [54,7] - [14] [106,7] - [13] [206,14] - [14] [206,7] - [13] [400,14] - [13] [1,28] - [13] [2,28] - [13] [4,28] - [14] [400,7] - [14] [1,14] - [13] [7,28] - [14] [2,14] - [14] [4,14] - [13] [14,28] - [14] [7,14] - [14] [14,14] - [13] [28,28] - [14] [28,14] - [14] [54,14] - [13] [54,28] - [14] [106,14] - [13] [106,28] - [14] [206,14] - [13] [206,28] - [14] [400,14] - [14] [1,28] - [13] [400,28] - [14] [2,28] - [14] [4,28] - [13] [1,54] - [14] [14,28] - [14] [28,28] - [13] [2,54] - [14] [54,28] - [14] [106,28] - [13] [4,54] - [13] [7,54] - [14] [206,28] - [13] [14,54] - [13] [28,54] - [14] [400,28] - [13] [54,54] - [13] [106,54] - [14] [1,54] - [13] [206,54] - [14] [2,54] - [14] [4,54] - [14] [7,54] - [13] [400,54] - [14] [14,54] - [14] [28,54] - [14] [54,54] - [13] [1,106] - [14] [106,54] - [14] [206,54] - [13] [2,106] - [13] [4,106] - [14] [400,54] - [13] [7,106] - [14] [1,106] - [14] [2,106] - [13] [14,106] - [14] [4,106] - [13] [54,106] - [14] [7,106] - [13] [106,106] - [14] [14,106] - [13] [206,106] - [14] [28,106] - [13] [400,106] - [14] [54,106] - [14] [106,106] - [14] [206,106] - [13] [1,206] - [14] [400,106] - [13] [2,206] - [14] [1,206] - [13] [4,206] - [14] [2,206] - [14] [4,206] - [13] [7,206] - [14] [7,206] - [13] [14,206] - [14] [14,206] - [13] [28,206] - [14] [28,206] - [13] [54,206] - [14] [54,206] - [13] [106,206] - [14] [106,206] - [14] [206,206] - [13] [400,206] - [14] [400,206] - [13] [1,400] - [14] [1,400] - [13] [2,400] - [14] [2,400] - [13] [4,400] - [14] [4,400] - [13] [7,400] - [14] [7,400] - [13] [14,400] - [14] [14,400] - [13] [28,400] - [14] [28,400] - [13] [54,400] - [14] [54,400] - [13] [106,400] - [14] [106,400] - [13] [206,400] - [14] [206,400] - [13] [400,400] - [15] [1,1] - [15] [2,1] - [15] [4,1] - [15] [7,1] - [15] [14,1] - [15] [28,1] - [15] [54,1] - [14] [400,400] - [16] [1,1] - [16] [2,1] - [16] [4,1] - [16] [7,1] - [16] [14,1] - [16] [28,1] - [16] [54,1] - [15] [206,1] - [16] [106,1] - [16] [206,1] - [15] [400,1] - [15] [1,2] - [15] [2,2] - [15] [4,2] - [15] [7,2] - [15] [14,2] - [15] [28,2] - [16] [1,2] - [16] [2,2] - [16] [4,2] - [16] [7,2] - [16] [14,2] - [15] [54,2] - [16] [28,2] - [16] [54,2] - [15] [106,2] - [16] [106,2] - [15] [206,2] - [16] [206,2] - [15] [400,2] - [15] [1,4] - [15] [2,4] - [15] [4,4] - [15] [7,4] - [15] [14,4] - [16] [400,2] - [16] [1,4] - [16] [2,4] - [16] [4,4] - [16] [7,4] - [16] [14,4] - [15] [54,4] - [16] [28,4] - [16] [54,4] - [15] [106,4] - [15] [206,4] - [16] [106,4] - [16] [206,4] - [15] [400,4] - [15] [1,7] - [15] [2,7] - [15] [4,7] - [15] [7,7] - [15] [14,7] - [15] [28,7] - [15] [54,7] - [15] [106,7] - [16] [400,4] - [16] [1,7] - [16] [2,7] - [16] [4,7] - [16] [7,7] - [16] [14,7] - [16] [28,7] - [16] [54,7] - [15] [206,7] - [16] [106,7] - [16] [206,7] - [15] [400,7] - [15] [1,14] - [15] [2,14] - [15] [4,14] - [15] [7,14] - [16] [400,7] - [15] [14,14] - [15] [28,14] - [16] [2,14] - [15] [54,14] - [16] [4,14] - [16] [7,14] - [16] [14,14] - [15] [106,14] - [16] [28,14] - [16] [54,14] - [15] [206,14] - [16] [106,14] - [16] [206,14] - [15] [400,14] - [15] [1,28] - [15] [2,28] - [15] [4,28] - [16] [400,14] - [15] [7,28] - [16] [1,28] - [15] [14,28] - [16] [2,28] - [15] [54,28] - [16] [4,28] - [15] [106,28] - [16] [14,28] - [16] [28,28] - [15] [206,28] - [16] [54,28] - [16] [106,28] - [16] [206,28] - [15] [400,28] - [15] [1,54] - [16] [400,28] - [15] [2,54] - [16] [1,54] - [15] [4,54] - [16] [2,54] - [15] [7,54] - [16] [4,54] - [15] [14,54] - [16] [7,54] - [16] [14,54] - [15] [28,54] - [16] [28,54] - [15] [54,54] - [16] [54,54] - [15] [106,54] - [16] [106,54] - [15] [206,54] - [16] [206,54] - [15] [400,54] - [16] [400,54] - [15] [1,106] - [16] [1,106] - [15] [2,106] - [16] [2,106] - [15] [4,106] - [16] [4,106] - [15] [7,106] - [16] [7,106] - [15] [14,106] - [16] [14,106] - [15] [28,106] - [15] [54,106] - [16] [28,106] - [15] [106,106] - [16] [54,106] - [15] [206,106] - [16] [106,106] - [16] [206,106] - [15] [400,106] - [16] [400,106] - [15] [1,206] - [16] [1,206] - [15] [2,206] - [16] [2,206] - [15] [4,206] - [16] [4,206] - [15] [7,206] - [16] [7,206] - [15] [14,206] - [16] [14,206] - [15] [28,206] - [16] [28,206] - [15] [54,206] - [16] [54,206] - [15] [106,206] - [16] [106,206] - [15] [206,206] - [16] [206,206] - [15] [400,206] - [16] [400,206] - [15] [1,400] - [16] [1,400] - [15] [2,400] - [16] [2,400] - [15] [4,400] - [16] [4,400] - [15] [7,400] - [16] [7,400] - [15] [14,400] - [16] [14,400] - [15] [28,400] - [16] [28,400] - [16] [54,400] - [15] [54,400] - [16] [106,400] - [15] [106,400] - [16] [206,400] - [15] [206,400] - [16] [400,400] - [18] [1,1] - [18] [2,1] - [18] [4,1] - [18] [7,1] - [18] [14,1] - [18] [28,1] - [18] [54,1] - [15] [400,400] - [18] [106,1] - [17] [1,1] - [17] [2,1] - [17] [4,1] - [17] [7,1] - [17] [14,1] - [17] [28,1] - [18] [206,1] - [17] [54,1] - [17] [106,1] - [18] [400,1] - [18] [1,2] - [18] [2,2] - [18] [4,2] - [18] [7,2] - [18] [14,2] - [18] [28,2] - [17] [206,1] - [18] [54,2] - [18] [106,2] - [18] [206,2] - [17] [400,1] - [17] [1,2] - [17] [2,2] - [17] [4,2] - [17] [7,2] - [17] [14,2] - [17] [28,2] - [17] [54,2] - [17] [106,2] - [18] [400,2] - [18] [1,4] - [18] [2,4] - [18] [4,4] - [18] [7,4] - [18] [14,4] - [17] [206,2] - [18] [28,4] - [18] [54,4] - [18] [106,4] - [18] [206,4] - [17] [400,2] - [17] [1,4] - [17] [2,4] - [17] [4,4] - [17] [7,4] - [17] [14,4] - [17] [28,4] - [17] [54,4] - [17] [106,4] - [18] [400,4] - [18] [1,7] - [18] [2,7] - [18] [4,7] - [18] [7,7] - [18] [14,7] - [18] [28,7] - [17] [206,4] - [18] [54,7] - [18] [106,7] - [18] [206,7] - [17] [400,4] - [17] [1,7] - [17] [2,7] - [17] [4,7] - [17] [7,7] - [17] [14,7] - [17] [28,7] - [17] [54,7] - [17] [106,7] - [18] [400,7] - [18] [1,14] - [18] [2,14] - [17] [206,7] - [18] [4,14] - [18] [7,14] - [18] [14,14] - [18] [28,14] - [18] [54,14] - [18] [106,14] - [17] [400,7] - [17] [1,14] - [18] [206,14] - [17] [2,14] - [17] [4,14] - [17] [7,14] - [17] [14,14] - [17] [28,14] - [17] [54,14] - [17] [106,14] - [18] [400,14] - [18] [1,28] - [17] [206,14] - [18] [2,28] - [18] [4,28] - [18] [7,28] - [18] [14,28] - [18] [28,28] - [18] [54,28] - [17] [400,14] - [18] [106,28] - [17] [1,28] - [17] [2,28] - [17] [4,28] - [18] [206,28] - [17] [7,28] - [17] [14,28] - [17] [28,28] - [17] [54,28] - [17] [106,28] - [18] [400,28] - [17] [206,28] - [18] [1,54] - [18] [2,54] - [18] [4,54] - [17] [400,28] - [18] [7,54] - [17] [1,54] - [18] [14,54] - [17] [2,54] - [18] [28,54] - [18] [54,54] - [17] [4,54] - [18] [106,54] - [17] [7,54] - [18] [206,54] - [17] [14,54] - [17] [28,54] - [17] [54,54] - [18] [400,54] - [17] [106,54] - [17] [206,54] - [18] [1,106] - [18] [2,106] - [17] [400,54] - [18] [4,106] - [17] [1,106] - [18] [7,106] - [17] [2,106] - [18] [14,106] - [17] [4,106] - [18] [28,106] - [17] [7,106] - [18] [54,106] - [18] [106,106] - [17] [14,106] - [18] [206,106] - [17] [28,106] - [18] [400,106] - [17] [54,106] - [17] [106,106] - [18] [1,206] - [17] [206,106] - [17] [400,106] - [18] [2,206] - [17] [1,206] - [18] [4,206] - [17] [2,206] - [18] [7,206] - [17] [4,206] - [18] [14,206] - [17] [7,206] - [18] [28,206] - [18] [54,206] - [17] [14,206] - [18] [106,206] - [17] [28,206] - [18] [206,206] - [18] [400,206] - [17] [54,206] - [17] [106,206] - [17] [206,206] - [18] [1,400] - [17] [400,206] - [18] [2,400] - [17] [1,400] - [17] [2,400] - [18] [4,400] - [17] [4,400] - [18] [7,400] - [17] [7,400] - [18] [14,400] - [17] [14,400] - [18] [28,400] - [17] [28,400] - [18] [54,400] - [17] [54,400] - [18] [106,400] - [17] [106,400] - [18] [206,400] - [17] [206,400] - [18] [400,400] - [20] [1,1] - [20] [2,1] - [20] [4,1] - [20] [7,1] - [20] [14,1] - [20] [28,1] - [20] [54,1] - [17] [400,400] - [19] [1,1] - [19] [2,1] - [19] [4,1] - [19] [7,1] - [20] [106,1] - [19] [14,1] - [19] [28,1] - [19] [54,1] - [19] [106,1] - [20] [206,1] - [19] [206,1] - [20] [400,1] - [20] [1,2] - [20] [2,2] - [20] [4,2] - [20] [7,2] - [20] [14,2] - [20] [28,2] - [20] [54,2] - [19] [400,1] - [19] [1,2] - [19] [2,2] - [19] [4,2] - [19] [7,2] - [19] [14,2] - [20] [106,2] - [19] [28,2] - [19] [54,2] - [20] [206,2] - [19] [106,2] - [19] [206,2] - [20] [400,2] - [20] [1,4] - [20] [2,4] - [20] [4,4] - [20] [7,4] - [20] [14,4] - [20] [28,4] - [20] [54,4] - [20] [106,4] - [19] [400,2] - [19] [1,4] - [19] [2,4] - [19] [4,4] - [19] [7,4] - [19] [14,4] - [19] [28,4] - [20] [206,4] - [19] [54,4] - [19] [106,4] - [19] [206,4] - [20] [400,4] - [20] [1,7] - [19] [400,4] - [20] [2,7] - [19] [1,7] - [20] [4,7] - [19] [2,7] - [20] [7,7] - [20] [14,7] - [19] [4,7] - [19] [7,7] - [20] [28,7] - [19] [14,7] - [19] [28,7] - [20] [54,7] - [19] [54,7] - [20] [106,7] - [19] [106,7] - [19] [206,7] - [20] [206,7] - [19] [400,7] - [20] [400,7] - [19] [1,14] - [20] [1,14] - [19] [2,14] - [20] [2,14] - [19] [4,14] - [20] [4,14] - [19] [7,14] - [20] [7,14] - [19] [14,14] - [20] [14,14] - [19] [28,14] - [20] [28,14] - [19] [54,14] - [20] [54,14] - [19] [106,14] - [20] [106,14] - [20] [206,14] - [19] [206,14] - [20] [400,14] - [20] [1,28] - [19] [400,14] - [20] [2,28] - [19] [1,28] - [20] [4,28] - [20] [7,28] - [19] [4,28] - [20] [14,28] - [19] [7,28] - [20] [28,28] - [19] [14,28] - [20] [54,28] - [19] [28,28] - [19] [54,28] - [20] [106,28] - [19] [106,28] - [20] [206,28] - [19] [206,28] - [20] [400,28] - [20] [1,54] - [19] [400,28] - [19] [1,54] - [19] [2,54] - [20] [4,54] - [19] [4,54] - [20] [7,54] - [19] [7,54] - [20] [14,54] - [19] [14,54] - [20] [28,54] - [19] [28,54] - [20] [54,54] - [19] [54,54] - [20] [106,54] - [19] [106,54] - [19] [206,54] - [20] [206,54] - [20] [400,54] - [19] [400,54] - [19] [1,106] - [20] [1,106] - [19] [2,106] - [20] [2,106] - [19] [4,106] - [20] [4,106] - [20] [7,106] - [19] [7,106] - [20] [14,106] - [19] [14,106] - [20] [28,106] - [19] [28,106] - [20] [54,106] - [19] [54,106] - [20] [106,106] - [19] [106,106] - [20] [206,106] - [19] [206,106] - [20] [400,106] - [19] [400,106] - [20] [1,206] - [19] [1,206] - [20] [2,206] - [19] [2,206] - [20] [4,206] - [19] [4,206] - [20] [7,206] - [19] [7,206] - [20] [14,206] - [19] [14,206] - [20] [28,206] - [19] [28,206] - [20] [54,206] - [19] [54,206] - [20] [106,206] - [19] [106,206] - [20] [206,206] - [19] [206,206] - [20] [400,206] - [19] [400,206] - [20] [1,400] - [19] [1,400] - [20] [2,400] - [19] [2,400] - [20] [4,400] - [19] [4,400] - [20] [7,400] - [19] [7,400] - [20] [14,400] - [19] [14,400] - [19] [28,400] - [20] [28,400] - [19] [54,400] - [20] [54,400] - [19] [106,400] - [20] [106,400] - [19] [206,400] - [20] [206,400] - [19] [400,400] - [21] [1,1] - [21] [2,1] - [21] [4,1] - [21] [7,1] - [21] [14,1] - [20] [400,400] - [21] [28,1] - [22] [1,1] - [22] [2,1] - [22] [4,1] - [22] [7,1] - [22] [14,1] - [21] [54,1] - [22] [28,1] - [22] [54,1] - [21] [106,1] - [22] [106,1] - [21] [206,1] - [22] [206,1] - [21] [400,1] - [21] [1,2] - [21] [2,2] - [21] [4,2] - [21] [7,2] - [21] [14,2] - [22] [400,1] - [22] [1,2] - [21] [28,2] - [22] [2,2] - [22] [4,2] - [22] [7,2] - [22] [14,2] - [21] [54,2] - [22] [28,2] - [22] [54,2] - [21] [106,2] - [22] [106,2] - [21] [206,2] - [22] [206,2] - [21] [400,2] - [21] [1,4] - [21] [2,4] - [21] [4,4] - [21] [7,4] - [21] [14,4] - [21] [28,4] - [21] [54,4] - [22] [400,2] - [22] [1,4] - [22] [2,4] - [21] [106,4] - [22] [7,4] - [22] [14,4] - [22] [28,4] - [22] [54,4] - [21] [206,4] - [22] [106,4] - [22] [206,4] - [21] [400,4] - [21] [1,7] - [21] [2,7] - [21] [4,7] - [21] [7,7] - [21] [14,7] - [21] [28,7] - [21] [54,7] - [21] [106,7] - [22] [400,4] - [22] [1,7] - [21] [206,7] - [22] [2,7] - [22] [4,7] - [22] [7,7] - [22] [14,7] - [22] [28,7] - [22] [54,7] - [22] [106,7] - [21] [400,7] - [21] [1,14] - [22] [206,7] - [21] [2,14] - [21] [4,14] - [21] [7,14] - [21] [14,14] - [21] [28,14] - [21] [54,14] - [21] [106,14] - [22] [400,7] - [22] [1,14] - [22] [2,14] - [22] [4,14] - [22] [7,14] - [22] [14,14] - [22] [28,14] - [21] [206,14] - [22] [54,14] - [22] [106,14] - [22] [206,14] - [21] [400,14] - [21] [1,28] - [21] [2,28] - [21] [4,28] - [21] [7,28] - [22] [400,14] - [21] [14,28] - [22] [1,28] - [21] [28,28] - [22] [2,28] - [22] [4,28] - [21] [106,28] - [22] [7,28] - [22] [14,28] - [22] [28,28] - [22] [54,28] - [21] [206,28] - [22] [106,28] - [22] [206,28] - [21] [400,28] - [22] [400,28] - [21] [1,54] - [22] [1,54] - [21] [2,54] - [22] [2,54] - [21] [4,54] - [22] [4,54] - [21] [7,54] - [22] [14,54] - [21] [14,54] - [22] [28,54] - [21] [28,54] - [21] [54,54] - [22] [54,54] - [21] [106,54] - [22] [106,54] - [21] [206,54] - [22] [206,54] - [21] [400,54] - [22] [400,54] - [21] [1,106] - [22] [1,106] - [21] [2,106] - [22] [2,106] - [21] [4,106] - [22] [4,106] - [21] [7,106] - [22] [7,106] - [21] [14,106] - [22] [14,106] - [21] [28,106] - [21] [54,106] - [22] [28,106] - [21] [106,106] - [22] [54,106] - [21] [206,106] - [22] [106,106] - [21] [400,106] - [22] [206,106] - [22] [400,106] - [21] [1,206] - [22] [1,206] - [21] [2,206] - [21] [4,206] - [22] [2,206] - [21] [7,206] - [22] [4,206] - [21] [14,206] - [22] [7,206] - [21] [28,206] - [22] [14,206] - [21] [54,206] - [22] [28,206] - [21] [106,206] - [22] [54,206] - [21] [206,206] - [22] [106,206] - [21] [400,206] - [22] [206,206] - [22] [400,206] - [21] [1,400] - [22] [1,400] - [21] [2,400] - [22] [2,400] - [21] [4,400] - [22] [4,400] - [22] [7,400] - [21] [7,400] - [22] [14,400] - [21] [14,400] - [21] [28,400] - [22] [28,400] - [21] [54,400] - [22] [54,400] - [21] [106,400] - [22] [106,400] - [21] [206,400] - [22] [206,400] - [21] [400,400] - [23] [1,1] - [23] [2,1] - [23] [4,1] - [23] [7,1] - [23] [14,1] - [23] [28,1] - [22] [400,400] - [24] [1,1] - [23] [54,1] - [24] [2,1] - [24] [4,1] - [24] [7,1] - [24] [14,1] - [24] [28,1] - [23] [106,1] - [24] [54,1] - [24] [106,1] - [23] [206,1] - [24] [206,1] - [23] [400,1] - [23] [1,2] - [23] [2,2] - [23] [4,2] - [23] [7,2] - [24] [400,1] - [24] [1,2] - [23] [14,2] - [24] [2,2] - [24] [4,2] - [24] [7,2] - [23] [28,2] - [24] [14,2] - [24] [28,2] - [23] [54,2] - [24] [54,2] - [23] [106,2] - [24] [106,2] - [23] [206,2] - [24] [206,2] - [23] [400,2] - [23] [1,4] - [23] [2,4] - [23] [4,4] - [23] [7,4] - [23] [14,4] - [23] [28,4] - [23] [54,4] - [24] [400,2] - [24] [1,4] - [24] [2,4] - [24] [4,4] - [24] [7,4] - [24] [14,4] - [24] [28,4] - [23] [106,4] - [24] [54,4] - [24] [106,4] - [23] [206,4] - [24] [206,4] - [23] [400,4] - [24] [400,4] - [24] [1,7] - [23] [1,7] - [23] [2,7] - [24] [4,7] - [23] [4,7] - [24] [7,7] - [23] [7,7] - [24] [14,7] - [23] [14,7] - [24] [28,7] - [23] [28,7] - [24] [54,7] - [23] [54,7] - [24] [106,7] - [23] [106,7] - [24] [206,7] - [23] [206,7] - [24] [400,7] - [23] [400,7] - [24] [1,14] - [23] [1,14] - [24] [2,14] - [23] [2,14] - [24] [4,14] - [24] [7,14] - [23] [4,14] - [24] [14,14] - [23] [7,14] - [24] [28,14] - [23] [14,14] - [23] [28,14] - [24] [54,14] - [23] [54,14] - [24] [106,14] - [23] [106,14] - [24] [206,14] - [23] [206,14] - [24] [400,14] - [23] [400,14] - [24] [1,28] - [23] [1,28] - [24] [2,28] - [23] [2,28] - [24] [4,28] - [23] [4,28] - [24] [7,28] - [23] [7,28] - [24] [14,28] - [24] [28,28] - [23] [14,28] - [24] [54,28] - [23] [28,28] - [23] [54,28] - [24] [106,28] - [23] [106,28] - [24] [206,28] - [23] [206,28] - [24] [400,28] - [23] [400,28] - [24] [1,54] - [23] [1,54] - [24] [2,54] - [23] [2,54] - [24] [4,54] - [23] [4,54] - [24] [7,54] - [23] [7,54] - [24] [14,54] - [23] [14,54] - [24] [28,54] - [23] [28,54] - [24] [54,54] - [23] [54,54] - [24] [106,54] - [23] [106,54] - [24] [206,54] - [23] [206,54] - [23] [400,54] - [24] [400,54] - [23] [1,106] - [24] [1,106] - [23] [2,106] - [24] [2,106] - [23] [4,106] - [24] [4,106] - [23] [7,106] - [24] [7,106] - [23] [14,106] - [24] [14,106] - [23] [28,106] - [24] [28,106] - [23] [54,106] - [24] [54,106] - [23] [106,106] - [24] [106,106] - [23] [206,106] - [24] [206,106] - [23] [400,106] - [24] [400,106] - [23] [1,206] - [24] [1,206] - [24] [2,206] - [23] [2,206] - [24] [4,206] - [23] [4,206] - [24] [7,206] - [23] [7,206] - [23] [14,206] - [24] [14,206] - [23] [28,206] - [24] [28,206] - [23] [54,206] - [24] [54,206] - [23] [106,206] - [24] [106,206] - [23] [206,206] - [24] [206,206] - [23] [400,206] - [24] [400,206] - [23] [1,400] - [24] [1,400] - [23] [2,400] - [24] [2,400] - [23] [4,400] - [24] [4,400] - [23] [7,400] - [24] [7,400] - [24] [14,400] - [23] [14,400] - [23] [28,400] - [24] [28,400] - [23] [54,400] - [24] [54,400] - [23] [106,400] - [24] [106,400] - [23] [206,400] - [24] [206,400] - [23] [400,400] - [24] [400,400] - [25] [1,1] - [25] [2,1] - [25] [4,1] - [25] [7,1] - [25] [14,1] - [25] [28,1] - [25] [54,1] - [25] [106,1] - [25] [206,1] - [25] [400,1] - [25] [1,2] - [25] [2,2] - [25] [4,2] - [25] [7,2] - [25] [14,2] - [25] [28,2] - [25] [54,2] - [25] [106,2] - [25] [206,2] - [25] [400,2] - [25] [1,4] - [25] [2,4] - [25] [4,4] - [25] [7,4] - [25] [14,4] - [25] [28,4] - [25] [54,4] - [25] [106,4] - [25] [206,4] - [25] [400,4] - [25] [1,7] - [25] [2,7] - [25] [4,7] - [25] [7,7] - [25] [14,7] - [25] [28,7] - [25] [54,7] - [25] [106,7] - [25] [206,7] - [25] [400,7] - [25] [1,14] - [25] [2,14] - [25] [4,14] - [25] [7,14] - [25] [14,14] - [25] [28,14] - [25] [54,14] - [25] [106,14] - [25] [206,14] - [25] [400,14] - [25] [1,28] - [25] [2,28] - [25] [4,28] - [25] [7,28] - [25] [14,28] - [25] [28,28] - [25] [54,28] - [25] [106,28] - [25] [206,28] - [25] [400,28] - [25] [1,54] - [25] [2,54] - [25] [4,54] - [25] [7,54] - [25] [14,54] - [25] [28,54] - [25] [54,54] - [25] [106,54] - [25] [206,54] - [25] [400,54] - [25] [1,106] - [25] [2,106] - [25] [4,106] - [25] [7,106] - [25] [14,106] - [25] [28,106] - [25] [54,106] - [25] [106,106] - [25] [206,106] - [25] [400,106] - [25] [1,206] - [25] [2,206] - [25] [4,206] - [25] [7,206] - [25] [14,206] - [25] [28,206] - [25] [54,206] - [25] [106,206] - [25] [206,206] - [25] [400,206] - [25] [1,400] - [25] [2,400] - [25] [4,400] - [25] [7,400] - [25] [14,400] - [25] [28,400] - [25] [54,400] - [25] [106,400] - [25] [206,400] - [25] [400,400] -> mstop(cvr) - mu sigma - 400 400 -> ## set model to optimal values: -> mstop(model) <- mstop(cvr) -> ### END (don't test automatically) -> ## End(No test) +> > > ### Other grids: > plot(make.grid(mstop(model), length.out = 3, dense_mu_grid = FALSE)) @@ -2885,7 +255,7 @@ Starting cross-validation... + risk <- function(y, f, w = 1) { + sum(w * loss(y = y, f = f, df = df)) + } -+ # ngradient is the negative derivate w.r.t. mu ++ # ngradient is the negative derivate w.r.t. mu (=f) + ngradient <- function(y, f, w = 1) { + (df + 1) * (y - f)/(df + (y - f)^2) + } @@ -2910,7 +280,8 @@ Starting cross-validation... + risk <- function(y, f, w = 1) { + sum(w * loss(y = y, f = f, mu = mu)) + } -+ # ngradient is the negative derivate w.r.t. df ++ # ngradient is the negative derivate of the loss w.r.t. f ++ # in this case, just the derivative of the log-likelihood + ngradient <- function(y, f, w = 1) { + exp(f)/2 * (digamma((exp(f) + 1)/2) - digamma(exp(f)/2)) - + 0.5 - (exp(f)/2 * log(1 + (y - mu)^2 / (exp(f) )) - @@ -2926,87 +297,6 @@ Starting cross-validation... > newStudentT <- Families(mu= newStudentTMu(mu=mu, df=df), + df=newStudentTDf(mu=mu, df=df)) > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### usage of the new Student's t distribution: -> library(gamlss) ## required for rTF -Loading required package: splines -Loading required package: gamlss.data -Loading required package: gamlss.dist -Loading required package: MASS - -Attaching package: ‘gamlss.dist’ - -The following object is masked from ‘package:mboost’: - - Family - -Loading required package: nlme - ********** GAMLSS Version 4.3-8 ********** -For more on GAMLSS look at http://www.gamlss.org/ -Type gamlssNews() to see new features/changes/bug fixes. - -> set.seed(1907) -> n <- 5000 -> x1 <- runif(n) -> x2 <- runif(n) -> mu <- 2 -1*x1 - 3*x2 -> df <- exp(1 + 0.5*x1 ) -> y <- rTF(n = n, mu = mu, nu = df) -> -> ## model fitting -> model <- glmboostLSS(y ~ x1 + x2, families = newStudentT, -+ control = boost_control(mstop = 100), -+ center = TRUE) -> ## shrinked effect estimates -> coef(model, off2int = TRUE) -$mu -(Intercept) x1 x2 - 2.0013497 -0.9745979 -2.9987269 - -$df -(Intercept) x1 - 1.0798780 0.1213042 - -> -> ## compare to pre-defined three parametric t-distribution: -> model2 <- glmboostLSS(y ~ x1 + x2, families = StudentTLSS(), -+ control = boost_control(mstop = 100), -+ center = TRUE) -> coef(model2, off2int = TRUE) -$mu -(Intercept) x1 x2 - 1.9900788 -0.9658828 -2.9889112 - -$sigma -(Intercept) x1 x2 - 0.01314517 -0.02731268 0.03867761 - -$df -(Intercept) x1 - 1.3213044 0.1913091 - -> -> ## with effect on sigma: -> sigma <- 3+ 1*x2 -> y <- rTF(n = n, mu = mu, nu = df, sigma=sigma) -> model3 <- glmboostLSS(y ~ x1 + x2, families = StudentTLSS(), -+ control = boost_control(mstop = 100), -+ center = TRUE) -> coef(model3, off2int = TRUE) -$mu -(Intercept) x2 - 0.5625243 -1.1556618 - -$sigma -(Intercept) x1 x2 - 1.17619100 -0.09742086 0.32657481 - -$df -(Intercept) x1 x2 - 1.14269183 0.33422503 -0.02387359 - -> ## End(No test) > > > @@ -3014,10 +304,6 @@ $df > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("families", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() - -detaching ‘package:gamlss’, ‘package:nlme’, ‘package:gamlss.dist’, - ‘package:MASS’, ‘package:gamlss.data’, ‘package:splines’ - > nameEx("gamboostLSS-package") > ### * gamboostLSS-package > @@ -3085,12 +371,13 @@ $sigma + drawmap(india, map = india.bnd, regionvar = "mcdist", plotvar = "stunting") + } Loading required package: BayesX +Warning: package 'BayesX' was built under R version 3.4.4 Loading required package: shapefiles Loading required package: foreign -Attaching package: ‘shapefiles’ +Attaching package: 'shapefiles' -The following objects are masked from ‘package:foreign’: +The following objects are masked from 'package:foreign': read.dbf, write.dbf @@ -3106,7 +393,7 @@ Note: Function plotsurf depends on akima which has > base::cat("india", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() -detaching ‘package:BayesX’, ‘package:shapefiles’, ‘package:foreign’ +detaching 'package:BayesX', 'package:shapefiles', 'package:foreign' > nameEx("mboostLSS") > ### * mboostLSS @@ -3158,38 +445,6 @@ $sigma > names(NBinomialLSS()) # names of the family [1] "mu" "sigma" > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> # Note: Multiple formulas must be specified via a _named list_ -> # where the names correspond to the names of the distribution parameters -> # in the family (see above) -> model2 <- glmboostLSS(formula = list(mu = y ~ x1 + x2 + x3 + x4, -+ sigma = y ~ x3 + x4 + x5 + x6), -+ families = NBinomialLSS(), data = dat, -+ control = boost_control(mstop = 400, trace = TRUE), -+ center = TRUE) -[ 1] ...................................... -- risk: 3102.966 -[ 41] ...................................... -- risk: 3026.774 -[ 81] ...................................... -- risk: 2963.095 -[ 121] ...................................... -- risk: 2901.378 -[ 161] ...................................... -- risk: 2841.725 -[ 201] ...................................... -- risk: 2788.547 -[ 241] ...................................... -- risk: 2746.665 -[ 281] ...................................... -- risk: 2717.919 -[ 321] ...................................... -- risk: 2700.384 -[ 361] ...................................... -Final risk: 2690.598 -> coef(model2, off2int = TRUE) -$mu -(Intercept) x1 x2 x3 x4 - 1.6068970 0.9754679 0.4773532 -0.4662399 -0.8897124 - -$sigma -(Intercept) x3 x4 x5 x6 - -0.1230354 -0.3630657 -0.2697535 0.1403489 0.3301615 - -> ### END (don't test automatically) -> ## End(No test) > > > ### Offset needs to be specified via the arguments of families object: @@ -3218,60 +473,11 @@ attr(,"offset") > log(mean(sigma)) [1] 0.1828927 > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### use different mstop values for the two distribution parameters -> ### (two-dimensional early stopping) -> ### the number of iterations is passed to boost_control via a named list -> model3 <- glmboostLSS(formula = list(mu = y ~ x1 + x2 + x3 + x4, -+ sigma = y ~ x3 + x4 + x5 + x6), -+ families = NBinomialLSS(), data = dat, -+ control = boost_control(mstop = list(mu = 400, -+ sigma = 300), -+ trace = TRUE), -+ center = TRUE) -[ 1] ...................................... -- risk: 3102.966 -[ 41] ...................................... -- risk: 3026.774 -[ 81] ...................................... -- risk: 2963.095 -[ 121] ...................................... -- risk: 2901.378 -[ 161] ...................................... -- risk: 2841.725 -[ 201] ...................................... -- risk: 2788.547 -[ 241] ...................................... -- risk: 2746.665 -[ 281] ...................................... -- risk: 2721.225 -[ 321] ...................................... -- risk: 2709.597 -[ 361] ...................................... -Final risk: 2703.712 -> coef(model3, off2int = TRUE) -$mu -(Intercept) x1 x2 x3 x4 - 1.6111180 0.9718734 0.4694390 -0.4644485 -0.8799773 - -$sigma -(Intercept) x3 x4 x5 x6 --0.29089309 -0.36306567 -0.26975349 0.06302647 0.23897680 - -> -> ### Alternatively we can change mstop of model2: -> # here it is assumed that the first element in the vector corresponds to -> # the first distribution parameter of model2 etc. -> mstop(model2) <- c(400, 300) -Model first reduced to mstop = 300. -Now continue ... -[ 301] ...................................... -- risk: 2714.448 -[ 341] ...................................... -- risk: 2706.16 -[ 381] .................. -Final risk: 2703.712 -> par(mfrow = c(1,2)) -> plot(model2, xlim = c(0, max(mstop(model2)))) -> ## all.equal(coef(model2), coef(model3)) # same! -> ### END (don't test automatically) -> ## End(No test) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("mboostLSS", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") -> graphics::par(get("par.postscript", pos = 'CheckExEnv')) > cleanEx() > nameEx("methods") > ### * methods @@ -3282,11 +488,11 @@ Final risk: 2703.712 > ### Name: methods > ### Title: Methods for mboostLSS > ### Aliases: print.mboostLSS summary.mboostLSS coef.mboostLSS -> ### coef.glmboostLSS risk risk.mboostLSS [.mboostLSS mstop.mboostLSS -> ### mstop.oobag mstop.cvriskLSS selected selected.mboostLSS -> ### fitted.mboostLSS predict.mboostLSS predint PI plot.glmboostLSS -> ### plot.gamboostLSS plot.predint update.mboostLSS model.weights -> ### model.weights.default model.weights.mboostLSS +> ### coef.glmboostLSS risk risk.mboostLSS risk.nc_mboostLSS [.mboostLSS +> ### mstop.mboostLSS mstop.oobag mstop.cvriskLSS selected +> ### selected.mboostLSS fitted.mboostLSS predict.mboostLSS predint PI +> ### plot.glmboostLSS plot.gamboostLSS plot.predint update.mboostLSS +> ### model.weights model.weights.default model.weights.mboostLSS > ### Keywords: methods > > ### ** Examples @@ -3311,53 +517,18 @@ Final risk: 2703.712 > model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 100)) > -> ## No test: -> ### Do not test the following line per default on CRAN as it takes some time to run: -> ### use a model with more iterations for a better fit -> mstop(model) <- 400 -> ## End(No test) > ### extract coefficients > coef(model) $mu $mu$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 3.58171139 2.47642513 1.38321567 0.35846954 -0.55844450 -1.37386804 - 7 8 9 10 11 12 --2.06458712 -2.59383111 -2.94780872 -3.13830905 -3.18456719 -3.09471615 - 13 14 15 16 17 18 --2.86320001 -2.46725579 -1.87824927 -1.08842591 -0.09273062 1.10805073 - 19 20 21 22 23 24 - 2.48864870 4.00437945 5.60292622 7.22757798 8.84769996 10.46670845 - -$mu$`bbs(x2, df = dfbase)` - 1 2 3 4 5 6 --0.587567018 -0.597235489 -0.606895359 -0.616493825 -0.625795262 -0.634226162 - 7 8 9 10 11 12 --0.640764144 -0.643706523 -0.640665097 -0.629114205 -0.606465892 -0.565608617 - 13 14 15 16 17 18 --0.499359388 -0.406250941 -0.289077229 -0.151094932 -0.001816165 0.145180788 - 19 20 21 22 23 24 - 0.286945776 0.419084241 0.545588456 0.669020116 0.789859912 0.910388836 - -$mu$`bbs(x3, df = dfbase)` - 1 2 3 4 5 6 - 0.56795274 0.55780411 0.54748652 0.53585628 0.51985234 0.49117077 + 1.12319367 0.71021708 0.30207869 -0.07821225 -0.41073709 -0.68864711 7 8 9 10 11 12 - 0.42717980 0.28842829 0.05046406 -0.25478976 -0.54504698 -0.75532084 +-0.90747173 -1.06720601 -1.17544613 -1.24116448 -1.26789847 -1.24860860 13 14 15 16 17 18 --0.86898074 -0.89962648 -0.87265488 -0.81495209 -0.74603751 -0.67649672 +-1.16452195 -0.98825060 -0.69178712 -0.25652249 0.32409064 1.04219294 19 20 21 22 23 24 --0.61055474 -0.54837543 -0.48890118 -0.43106752 -0.37389397 -0.31679225 - -$mu$`bbs(x4, df = dfbase)` - 1 2 3 4 5 6 - 2.567451544 2.183750845 1.800221518 1.415979616 1.034193922 0.667283578 - 7 8 9 10 11 12 - 0.322037905 0.009307943 -0.257545411 -0.481058705 -0.669711734 -0.813301926 - 13 14 15 16 17 18 --0.899364082 -0.932976652 -0.928286940 -0.899492000 -0.856040251 -0.804129859 - 19 20 21 22 23 24 --0.747632885 -0.688917493 -0.629440065 -0.569818844 -0.510251318 -0.450694813 + 1.87642291 2.79735337 3.77145080 4.76261955 5.75135840 6.73946889 attr(,"offset") [1] 8.987845 @@ -3365,111 +536,61 @@ attr(,"offset") $sigma $sigma$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 0.89659893 0.78713824 0.67616360 0.55710969 0.42730638 0.29198962 + 0.86207415 0.74904269 0.63511275 0.51632356 0.39102403 0.26276196 7 8 9 10 11 12 - 0.16213560 0.05105498 -0.03122623 -0.07636368 -0.07847080 -0.03347975 + 0.14010106 0.03466296 -0.04353613 -0.08711014 -0.09194466 -0.05666238 13 14 15 16 17 18 - 0.05806291 0.17903227 0.30635650 0.42810619 0.53761725 0.62880999 + 0.01551477 0.11002846 0.20900409 0.30099114 0.37917858 0.43977891 19 20 21 22 23 24 - 0.69997365 0.75391415 0.79637610 0.83506324 0.87429112 0.91365873 + 0.48283417 0.51150467 0.53082810 0.54708328 0.56376074 0.58054853 $sigma$`bbs(x2, df = dfbase)` - 1 2 3 4 5 6 --0.563658099 -0.511757632 -0.459777813 -0.407331656 -0.354135638 -0.300469789 - 7 8 9 10 11 12 --0.248309055 -0.199527201 -0.154491030 -0.108894039 -0.058401175 -0.002441743 - 13 14 15 16 17 18 - 0.056485633 0.113495345 0.164241662 0.204929795 0.235290786 0.264020513 - 19 20 21 22 23 24 - 0.296512365 0.333895128 0.373456407 0.413559904 0.453779441 0.494022639 - -$sigma$`bbs(x3, df = dfbase)` - 1 2 3 4 5 6 --0.06993609 0.21499858 0.50028302 0.78648040 1.06468531 1.31096130 - 7 8 9 10 11 12 - 1.49064677 1.57871832 1.56759726 1.44135824 1.19071516 0.83344537 - 13 14 15 16 17 18 - 0.42510438 0.03477713 -0.30056403 -0.55740737 -0.72441644 -0.78900239 - 19 20 21 22 23 24 --0.75566343 -0.65077256 -0.49956583 -0.32729936 -0.14808084 0.03192021 - -$sigma$`bbs(x4, df = dfbase)` - 1 2 3 4 5 6 - 0.96287395 0.92067646 0.87872609 0.83856810 0.80135245 0.76642932 - 7 8 9 10 11 12 - 0.73502975 0.69991232 0.65251688 0.58964226 0.51238783 0.41489277 - 13 14 15 16 17 18 - 0.29149880 0.14358469 -0.01622096 -0.18308842 -0.36174266 -0.55165256 - 19 20 21 22 23 24 --0.75216843 -0.96121405 -1.17697566 -1.39582787 -1.61463881 -1.83317154 - -$sigma$`bbs(x5, df = dfbase)` - 1 2 3 4 5 6 --0.35133403 -0.31818178 -0.28487218 -0.25048316 -0.21414472 -0.17551639 - 7 8 9 10 11 12 --0.13478066 -0.09043638 -0.04036516 0.01140995 0.05490236 0.08804387 - 13 14 15 16 17 18 - 0.11539467 0.13861908 0.15950497 0.17917571 0.19811973 0.21841061 - 19 20 21 22 23 24 - 0.23971872 0.26186474 0.28466756 0.30781904 0.33074669 0.35356977 - -$sigma$`bbs(x6, df = dfbase)` 1 2 3 4 5 --0.2844651880 -0.2186158355 -0.1534117880 -0.0928846917 -0.0448736951 +-0.3084370522 -0.2828511396 -0.2572366040 -0.2314539552 -0.2054021439 6 7 8 9 10 --0.0144611066 -0.0007836747 0.0043527943 0.0116311707 0.0272086244 +-0.1791726420 -0.1534018589 -0.1287416748 -0.1053850596 -0.0821077870 11 12 13 14 15 - 0.0517831869 0.0848199173 0.1248441166 0.1671483807 0.2026641167 +-0.0572887494 -0.0298604913 0.0003318223 0.0324249908 0.0646804993 16 17 18 19 20 - 0.2336020503 0.2661041115 0.3023380856 0.3417998069 0.3836024077 + 0.0950414716 0.1227414933 0.1495224831 0.1770388262 0.2056443388 21 22 23 24 - 0.4268453691 0.4711745653 0.5160781652 0.5610761947 + 0.2348918284 0.2644328623 0.2940230289 0.3236218625 -attr(,"offset") -[1] -2.427222 - -> -> ### only for distribution parameter mu -> coef(model, parameter = "mu") -$`bbs(x1, df = dfbase)` +$sigma$`bbs(x3, df = dfbase)` 1 2 3 4 5 6 - 3.58171139 2.47642513 1.38321567 0.35846954 -0.55844450 -1.37386804 + 0.59910262 0.67045019 0.74177471 0.81250768 0.87887880 0.93220571 7 8 9 10 11 12 --2.06458712 -2.59383111 -2.94780872 -3.13830905 -3.18456719 -3.09471615 + 0.95786423 0.93903295 0.86423014 0.72936356 0.54142463 0.31659946 13 14 15 16 17 18 --2.86320001 -2.46725579 -1.87824927 -1.08842591 -0.09273062 1.10805073 + 0.07928798 -0.14504818 -0.33831119 -0.48959099 -0.59533997 -0.65500733 19 20 21 22 23 24 - 2.48864870 4.00437945 5.60292622 7.22757798 8.84769996 10.46670845 +-0.67444210 -0.66621553 -0.64159409 -0.60933661 -0.57443579 -0.53923521 -$`bbs(x2, df = dfbase)` +$sigma$`bbs(x4, df = dfbase)` 1 2 3 4 5 6 --0.587567018 -0.597235489 -0.606895359 -0.616493825 -0.625795262 -0.634226162 + 1.056216416 0.969750219 0.883337364 0.797254322 0.712113354 0.629070036 7 8 9 10 11 12 --0.640764144 -0.643706523 -0.640665097 -0.629114205 -0.606465892 -0.565608617 + 0.549376494 0.473113681 0.399795129 0.326531456 0.249608183 0.167647563 13 14 15 16 17 18 --0.499359388 -0.406250941 -0.289077229 -0.151094932 -0.001816165 0.145180788 + 0.081403318 -0.008272202 -0.098290327 -0.188187103 -0.280297223 -0.374966387 19 20 21 22 23 24 - 0.286945776 0.419084241 0.545588456 0.669020116 0.789859912 0.910388836 +-0.472376762 -0.572284577 -0.674275407 -0.777223345 -0.880174184 -0.983046592 -$`bbs(x3, df = dfbase)` +attr(,"offset") +[1] -2.427222 + +> +> ### only for distribution parameter mu +> coef(model, parameter = "mu") +$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 0.56795274 0.55780411 0.54748652 0.53585628 0.51985234 0.49117077 + 1.12319367 0.71021708 0.30207869 -0.07821225 -0.41073709 -0.68864711 7 8 9 10 11 12 - 0.42717980 0.28842829 0.05046406 -0.25478976 -0.54504698 -0.75532084 +-0.90747173 -1.06720601 -1.17544613 -1.24116448 -1.26789847 -1.24860860 13 14 15 16 17 18 --0.86898074 -0.89962648 -0.87265488 -0.81495209 -0.74603751 -0.67649672 +-1.16452195 -0.98825060 -0.69178712 -0.25652249 0.32409064 1.04219294 19 20 21 22 23 24 --0.61055474 -0.54837543 -0.48890118 -0.43106752 -0.37389397 -0.31679225 - -$`bbs(x4, df = dfbase)` - 1 2 3 4 5 6 - 2.567451544 2.183750845 1.800221518 1.415979616 1.034193922 0.667283578 - 7 8 9 10 11 12 - 0.322037905 0.009307943 -0.257545411 -0.481058705 -0.669711734 -0.813301926 - 13 14 15 16 17 18 --0.899364082 -0.932976652 -0.928286940 -0.899492000 -0.856040251 -0.804129859 - 19 20 21 22 23 24 --0.747632885 -0.688917493 -0.629440065 -0.569818844 -0.510251318 -0.450694813 + 1.87642291 2.79735337 3.77145080 4.76261955 5.75135840 6.73946889 attr(,"offset") [1] 8.987845 @@ -3479,13 +600,13 @@ attr(,"offset") $mu $mu$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 3.58171139 2.47642513 1.38321567 0.35846954 -0.55844450 -1.37386804 + 1.12319367 0.71021708 0.30207869 -0.07821225 -0.41073709 -0.68864711 7 8 9 10 11 12 --2.06458712 -2.59383111 -2.94780872 -3.13830905 -3.18456719 -3.09471615 +-0.90747173 -1.06720601 -1.17544613 -1.24116448 -1.26789847 -1.24860860 13 14 15 16 17 18 --2.86320001 -2.46725579 -1.87824927 -1.08842591 -0.09273062 1.10805073 +-1.16452195 -0.98825060 -0.69178712 -0.25652249 0.32409064 1.04219294 19 20 21 22 23 24 - 2.48864870 4.00437945 5.60292622 7.22757798 8.84769996 10.46670845 + 1.87642291 2.79735337 3.77145080 4.76261955 5.75135840 6.73946889 attr(,"offset") [1] 8.987845 @@ -3493,13 +614,13 @@ attr(,"offset") $sigma $sigma$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 0.89659893 0.78713824 0.67616360 0.55710969 0.42730638 0.29198962 + 0.86207415 0.74904269 0.63511275 0.51632356 0.39102403 0.26276196 7 8 9 10 11 12 - 0.16213560 0.05105498 -0.03122623 -0.07636368 -0.07847080 -0.03347975 + 0.14010106 0.03466296 -0.04353613 -0.08711014 -0.09194466 -0.05666238 13 14 15 16 17 18 - 0.05806291 0.17903227 0.30635650 0.42810619 0.53761725 0.62880999 + 0.01551477 0.11002846 0.20900409 0.30099114 0.37917858 0.43977891 19 20 21 22 23 24 - 0.69997365 0.75391415 0.79637610 0.83506324 0.87429112 0.91365873 + 0.48283417 0.51150467 0.53082810 0.54708328 0.56376074 0.58054853 attr(,"offset") [1] -2.427222 @@ -3519,46 +640,11 @@ attr(,"offset") > par(mfrow = c(1, 1)) > plot(model, which = 1, parameter = 2) > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### plot marginal prediction interval -> pi <- predint(model, pi = 0.9, which = "x1") -> pi <- predint(model, pi = c(0.8, 0.9), which = "x1") -> plot(pi, log = "y") # warning as some y values are below 0 -Warning in xy.coords(x, y, xlabel, ylabel, log) : - 229 y values <= 0 omitted from logarithmic plot -> ## here it would be better to plot x1 against -> ## sqrt(y) and sqrt(pi) -> -> ### set model to mstop = 300 (one-dimensional) -> mstop(model) <- 300 -> ### END (don't test automatically) -> ## End(No test) > > par(mfrow = c(2, 2)) > plot(risk(model, parameter = "mu")[[1]]) > plot(risk(model, parameter = "sigma")[[1]]) > -> ## No test: -> ### Do not test the following code per default on CRAN as it takes some time to run: -> ### get back to orignal fit -> mstop(model) <- 400 -> plot(risk(model, parameter = "mu")[[1]]) -> plot(risk(model, parameter = "sigma")[[1]]) -> -> ### use different mstop values for the components -> mstop(model) <- c(100, 200) -> ## same as -> mstop(model) <- c(mu = 100, sigma = 200) -> ## or -> mstop(model) <- list(mu = 100, sigma = 200) -> ## or -> mstop(model) <- list(100, 200) -> -> plot(risk(model, parameter = "mu")[[1]]) -> plot(risk(model, parameter = "sigma")[[1]]) -> ### END (don't test automatically) -> ## End(No test) > > > @@ -3566,6 +652,46 @@ Warning in xy.coords(x, y, xlabel, ylabel, log) : > base::cat("methods", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > graphics::par(get("par.postscript", pos = 'CheckExEnv')) > cleanEx() +> nameEx("stabsel.mboostLSS") +> ### * stabsel.mboostLSS +> +> flush(stderr()); flush(stdout()) +> +> base::assign(".ptime", proc.time(), pos = "CheckExEnv") +> ### Name: stabsel +> ### Title: Stability Selection +> ### Aliases: stabsel.mboostLSS selected.stabsel_mboostLSS +> ### Keywords: nonparametric +> +> ### ** Examples +> +> +> ### Data generating process: +> set.seed(1907) +> x1 <- rnorm(500) +> x2 <- rnorm(500) +> x3 <- rnorm(500) +> x4 <- rnorm(500) +> x5 <- rnorm(500) +> x6 <- rnorm(500) +> mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +> sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +> y <- numeric(500) +> for( i in 1:500) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> ### linear model with y ~ . for both components: 400 boosting iterations +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 400), ++ center = TRUE, method = "noncyclic") +> +> +> +> +> base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") +> base::cat("stabsel.mboostLSS", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") +> cleanEx() > nameEx("weighted_median") > ### * weighted_median > @@ -3601,7 +727,7 @@ Warning in xy.coords(x, y, xlabel, ylabel, log) : > ### > options(digits = 7L) > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n") -Time elapsed: 38.826 0.288 690.576 376.315 0.543 +Time elapsed: 11.31 0.23 11.65 NA NA > grDevices::dev.off() null device 1 diff --git a/tests/bugfixes.R b/tests/bugfixes.R index b8eef2e..e4cb011 100644 --- a/tests/bugfixes.R +++ b/tests/bugfixes.R @@ -68,3 +68,43 @@ round(data.frame(BB_gamlss = coef(m1), BI_gamlss = coef(m2), BB_gamboostLSS = coef(m3, off2int = TRUE, parameter = "mu"), BI_gamboostLSS = coef(m4, off2int = TRUE)), 3) + + +## make sure that combined_risk is not written to the global environment +## (and thus replaced if another model is fitted) +set.seed(1907) +x1 <- rnorm(1000) +x2 <- rnorm(1000) +x3 <- rnorm(1000) +x4 <- rnorm(1000) +x5 <- rnorm(1000) +x6 <- rnorm(1000) +mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +y <- numeric(1000) +for( i in 1:1000) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +dat <- data.frame(x1, x2, x3, x4, x5, x6, y) + +model_0 <- mboostLSS(x1 ~ ., families = GaussianLSS(), data = dat, + control = boost_control(mstop = 30),method = "noncyclic") +length_0 <- length(risk(model_0, merge = TRUE)) +if (length_0 != 32) + stop("combined risk not correct.") + +model_1 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20), method = "noncyclic") +length_1 <- length(risk(model_1, merge = TRUE)) +if (length_1 != 22) + stop("combined risk not correct.") +if (length(risk(model_0, merge = TRUE)) != length_0) + stop("Combined risk overwritten by new model. Scoping error.") + +model_2 <- mboostLSS(x1 ~ ., families = GaussianLSS(), data = dat, + control = boost_control(mstop = 11),method = "cyclic") +if (length(risk(model_2, merge = TRUE)) != 24) + stop("combined risk not correct.") +if (length(risk(model_0, merge = TRUE)) != length_0) + stop("Combined risk overwritten by new model. Scoping error.") +if (length(risk(model_1, merge = TRUE)) != length_1) + stop("Combined risk overwritten by new model. Scoping error.") diff --git a/tests/bugfixes.Rout.save b/tests/bugfixes.Rout.save index a74595d..ebf7307 100644 --- a/tests/bugfixes.Rout.save +++ b/tests/bugfixes.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -23,7 +23,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -33,6 +33,30 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 +> require("gamlss") +Loading required package: gamlss +Loading required package: splines +Loading required package: gamlss.data +Loading required package: gamlss.dist +Loading required package: MASS + +Attaching package: 'gamlss.dist' + +The following object is masked from 'package:mboost': + + Family + +Loading required package: nlme + ********** GAMLSS Version 5.1-0 ********** +For more on GAMLSS look at http://www.gamlss.org/ +Type gamlssNews() to see new features/changes/bug fixes. + +Warning messages: +1: package 'gamlss' was built under R version 3.4.4 +2: package 'gamlss.data' was built under R version 3.4.4 +3: package 'gamlss.dist' was built under R version 3.4.4 > > ## subset method was missing if initial mstop = 1 > set.seed(1907) @@ -81,8 +105,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * + list(mu = mboost::selected(model[[1]]), + sigma = mboost::selected(model[[2]])))) > -> -> # If the families argument is not specified explicitly in mboostLSSone gets an +> # If the families argument is not specified explicitly in mboostLSS one gets an > # error in cvrisk.mboostLSS() (spotted by Almond Stöcker). > # (https://github.com/boost-R/gamboostLSS/issues/9) > set.seed(1907) @@ -104,6 +127,41 @@ In make.grid(mstop(object)) : > if (inherits(cvr, "try-error")) + stop("cvrisk does not work if no family was (explicitly) chosen") > +> +> # Make sure that gamlss.dist::BB and gamlss.dist::BI work (spotted by F. Scheipl) +> # (https://github.com/boost-R/gamboostLSS/issues/12) +> set.seed(123) +> n <- 100 +> x <- rnorm(n) +> z <- rnorm(n) +> data <- data.frame(y = rbinom(n, p = plogis(x + z), size = 60), x = x, z= z) +> data$ymat <- with(data, cbind(success = data$y, fail = 60 - data$y)) +> +> m1 <- gamlss(ymat ~ x + z, data = data, family = BB) +GAMLSS-RS iteration 1: Global Deviance = 699.795 +GAMLSS-RS iteration 2: Global Deviance = 534.6991 +GAMLSS-RS iteration 3: Global Deviance = 517.8675 +GAMLSS-RS iteration 4: Global Deviance = 517.8664 +GAMLSS-RS iteration 5: Global Deviance = 517.8664 +> m2 <- gamlss(ymat ~ x + z, data = data, family = BI) +GAMLSS-RS iteration 1: Global Deviance = 517.8684 +GAMLSS-RS iteration 2: Global Deviance = 517.8684 +> # same with boosting +> m3 <- glmboostLSS(ymat ~ x + z, data = data, families = as.families("BB")) +> m4 <- glmboost(ymat ~ x + z, data = data, family = as.families("BI")) +Warning message: +In as.families("BI") : + For boosting one-parametric families, please use the mboost package. +> +> round(data.frame(BB_gamlss = coef(m1), ++ BI_gamlss = coef(m2), ++ BB_gamboostLSS = coef(m3, off2int = TRUE, parameter = "mu"), ++ BI_gamboostLSS = coef(m4, off2int = TRUE)), 3) + BB_gamlss BI_gamlss BB_gamboostLSS BI_gamboostLSS +(Intercept) -0.014 -0.014 -0.006 -0.014 +x 1.009 1.009 0.964 1.009 +z 0.965 0.965 0.925 0.965 +> > proc.time() user system elapsed - 1.989 0.104 2.296 + 6.45 0.18 7.05 diff --git a/tests/regtest-families.Rout.save b/tests/regtest-families.Rout.save index 5cbebe9..96669fd 100644 --- a/tests/regtest-families.Rout.save +++ b/tests/regtest-families.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -23,7 +23,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -33,6 +33,8 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 > require("gamlss") Loading required package: gamlss Loading required package: splines @@ -47,10 +49,14 @@ The following object is masked from 'package:mboost': Family Loading required package: nlme - ********** GAMLSS Version 4.3-8 ********** + ********** GAMLSS Version 5.1-0 ********** For more on GAMLSS look at http://www.gamlss.org/ Type gamlssNews() to see new features/changes/bug fixes. +Warning messages: +1: package 'gamlss' was built under R version 3.4.4 +2: package 'gamlss.data' was built under R version 3.4.4 +3: package 'gamlss.dist' was built under R version 3.4.4 > > > ### check families with only one offset specified (other to choose via optim) @@ -231,6 +237,155 @@ $df attr(,"offset") [1] 0.7357846 +> +> ### check as families +> +> ### Gaussian: two different ways +> model <- glmboostLSS(y ~ x1 + x2, families = as.families("NO"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> +> model2 <- glmboostLSS(y ~ x1 + x2, families = GaussianLSS(), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> +> coef(model, off2int = TRUE) # as.families("NO") +$mu +(Intercept) x2 + 0.04778256 -0.13022745 + +$sigma +(Intercept) x1 x2 + 1.0141654 -0.1381848 1.3859472 + +> coef(model2, off2int = TRUE) # GaussianLSS() +$mu +(Intercept) x2 + 0.04778256 -0.13022745 + +$sigma +(Intercept) x1 x2 + 1.0141654 -0.1381848 1.3859472 + +> +> ### change link function inside as.families() +> model2 <- glmboostLSS(abs(y) ~ x1 + x2, families = as.families("NO", mu.link = "log"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> coef(model2) +$mu +(Intercept) x2 + -0.5210051 1.0517957 +attr(,"offset") +[1] 1.248357 + +$sigma +(Intercept) x2 + -0.7673921 1.2107247 +attr(,"offset") +[1] 1.539722 + +> +> +> model3 <- glmboostLSS(abs(y)/(max(y)+.01) ~ x1 + x2, families = as.families("BE", mu.link = "logit", ++ sigma.link = "log"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> coef(model3) +$mu +(Intercept) x2 + -0.2137522 0.7177991 +attr(,"offset") +[1] -2.931116 + +$sigma +(Intercept) x2 +-0.68253369 0.09271978 +attr(,"offset") +[1] -0.6931472 + +> +> +> model4 <- glmboostLSS(y ~ x1 + x2, families = as.families("TF", mu.link = "identity", ++ sigma.link = "log", ++ nu.link = "log"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> coef(model4) +$mu +(Intercept) x2 + 0.05795847 -0.11700550 +attr(,"offset") +[1] -0.01672538 + +$sigma +(Intercept) x2 + -0.6968961 1.0374093 +attr(,"offset") +[1] 1.761574 + +$nu +(Intercept) x2 + 0.0572190 -0.1155127 +attr(,"offset") +[1] 2.302585 + +> +> ### Additionally use stabilization +> +> model4 <- glmboostLSS(y ~ x1 + x2, families = as.families("TF", mu.link = "identity", ++ sigma.link = "log", ++ nu.link = "log", ++ stabilization = "L2"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> coef(model4) +$mu +(Intercept) x2 + 0.4233792 -0.8547101 +attr(,"offset") +[1] -0.01672538 + +$sigma +(Intercept) x2 + -0.6004438 0.8826834 +attr(,"offset") +[1] 1.761574 + +$nu +(Intercept) x2 + 0.2748352 -0.5548323 +attr(,"offset") +[1] 2.302585 + +> +> +> model4 <- glmboostLSS(y ~ x1 + x2, families = as.families("TF", mu.link = "identity", ++ sigma.link = "log", ++ nu.link = "log", ++ stabilization = "MAD"), ++ data = data, ++ control = boost_control(mstop = 10), center = TRUE) +> coef(model4) +$mu +(Intercept) x2 + 1.146445 -2.314422 +attr(,"offset") +[1] -0.01672538 + +$sigma +(Intercept) x1 x2 + -1.586238 -0.701680 2.484648 +attr(,"offset") +[1] 1.761574 + +$nu +(Intercept) x1 x2 + -0.5822664 3.5450017 -1.0655973 +attr(,"offset") +[1] 2.302585 + +> > > ### check survival families > @@ -280,9 +435,9 @@ Loglik(model)= -6051 Loglik(intercept only)= -6199.6 n= 1000 > model <- glmboostLSS(Surv(time, status) ~ x1 + x2 + x3, families = LogNormalLSS(), + control = boost_control(trace = TRUE), center = TRUE) -[ 1] ...................................... -- risk: 1552.695 -[ 41] ...................................... -- risk: 1543.948 -[ 81] .................. +[ 1] ........................................ -- risk: 1552.695 +[ 41] ........................................ -- risk: 1543.948 +[ 81] ................... Final risk: 1543.569 > stopifnot(sum(abs(coef(model, off2int = TRUE)[[1]] - c(3, 1, 2, 0))) + < sum(abs(coef(m1) - c(3, 1, 2, 0)))) @@ -309,18 +464,18 @@ Loglik(model)= -6681.6 Loglik(intercept only)= -6721.9 n= 1000 > model <- glmboostLSS(Surv(time, status) ~ x1 + x2 + x3, families = LogLogLSS(), + control = boost_control(trace = TRUE), center = TRUE) -[ 1] ...................................... -- risk: 2246.758 -[ 41] ...................................... -- risk: 2240.168 -[ 81] .................. +[ 1] ........................................ -- risk: 2246.758 +[ 41] ........................................ -- risk: 2240.168 +[ 81] ................... Final risk: 2238.548 > model[350] -[ 101] ...................................... -- risk: 2236.777 -[ 141] ...................................... -- risk: 2235.907 -[ 181] ...................................... -- risk: 2235.422 -[ 221] ...................................... -- risk: 2235.153 -[ 261] ...................................... -- risk: 2235.003 -[ 301] ...................................... -- risk: 2234.92 -[ 341] ........ +[ 101] ........................................ -- risk: 2236.777 +[ 141] ........................................ -- risk: 2235.907 +[ 181] ........................................ -- risk: 2235.422 +[ 221] ........................................ -- risk: 2235.153 +[ 261] ........................................ -- risk: 2235.003 +[ 301] ........................................ -- risk: 2234.92 +[ 341] ......... Final risk: 2234.906 LSS Models fitted via Model-based Boosting @@ -380,16 +535,16 @@ n= 1000 > model <- glmboostLSS(Surv(time, status) ~ x1 + x2 + x3, + families = WeibullLSS(), + control = boost_control(trace = TRUE), center = TRUE) -[ 1] ...................................... -- risk: 2134.384 -[ 41] ...................................... -- risk: 1904.389 -[ 81] .................. +[ 1] ........................................ -- risk: 2134.384 +[ 41] ........................................ -- risk: 1904.389 +[ 81] ................... Final risk: 1858.757 > model[300] -[ 101] ...................................... -- risk: 1804.158 -[ 141] ...................................... -- risk: 1782.137 -[ 181] ...................................... -- risk: 1775.546 -[ 221] ...................................... -- risk: 1773.984 -[ 261] ...................................... +[ 101] ........................................ -- risk: 1804.158 +[ 141] ........................................ -- risk: 1782.137 +[ 181] ........................................ -- risk: 1775.546 +[ 221] ........................................ -- risk: 1773.984 +[ 261] ....................................... Final risk: 1773.656 LSS Models fitted via Model-based Boosting @@ -506,4 +661,4 @@ Error in Families(mu = NBinomialMu2(mu = mu, sigma = sigma), sigma = NBinomialSi > > proc.time() user system elapsed - 7.791 0.111 8.166 + 13.13 0.23 13.85 diff --git a/tests/regtest-gamboostLSS.Rout.save b/tests/regtest-gamboostLSS.Rout.save index 455f0f7..3286d3a 100644 --- a/tests/regtest-gamboostLSS.Rout.save +++ b/tests/regtest-gamboostLSS.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -23,7 +23,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -33,6 +33,8 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 > > set.seed(1907) > x1 <- rnorm(1000) @@ -55,45 +57,45 @@ The following object is masked from 'package:stats': $mu $mu$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 - 1.15444380 0.93545384 0.71693694 0.50042422 0.28570766 0.07534539 + 1.15444358 0.93545366 0.71693679 0.50042410 0.28570758 0.07534535 7 8 9 10 11 12 --0.11839907 -0.27732533 -0.38543942 -0.43568218 -0.42822228 -0.36389776 +-0.11839908 -0.27732531 -0.38543938 -0.43568214 -0.42822224 -0.36389774 13 14 15 16 17 18 --0.24633715 -0.08712616 0.09162065 0.27043062 0.45036696 0.63285558 +-0.24633715 -0.08712618 0.09162060 0.27043055 0.45036688 0.63285550 19 20 21 22 23 24 - 0.81691750 1.00419544 1.19460226 1.38640555 1.57832857 1.77025951 + 0.81691740 1.00419534 1.19460215 1.38640544 1.57832844 1.77025938 $mu$`bbs(x2, df = dfbase)` 1 2 3 4 5 --0.3672899875 -0.3558804237 -0.3444700127 -0.3330501572 -0.3215346330 +-0.3672899861 -0.3558804221 -0.3444700110 -0.3330501554 -0.3215346309 6 7 8 9 10 --0.3097128631 -0.2972908134 -0.2840287809 -0.2698403885 -0.2552168139 +-0.3097128609 -0.2972908111 -0.2840287784 -0.2698403860 -0.2552168115 11 12 13 14 15 --0.2393630063 -0.2180543333 -0.1868538301 -0.1421989720 -0.0805408296 +-0.2393630042 -0.2180543316 -0.1868538288 -0.1421989711 -0.0805408289 16 17 18 19 20 --0.0007499793 0.0957227225 0.2045754639 0.3171470502 0.4253103459 +-0.0007499782 0.0957227241 0.2045754665 0.3171470538 0.4253103506 21 22 23 24 - 0.5293373790 0.6314883314 0.7326624464 0.8337638331 + 0.5293373847 0.6314883382 0.7326624542 0.8337638420 $mu$`bbs(x3, df = dfbase)` - 1 2 3 4 5 6 - 1.62690753 1.66179015 1.69621917 1.72718130 1.74723416 1.73977904 - 7 8 9 10 11 12 - 1.66796113 1.47592050 1.11762042 0.61818391 0.03382329 -0.54796310 - 13 14 15 16 17 18 --1.03626818 -1.38264045 -1.58369400 -1.66929672 -1.67606559 -1.63348348 - 19 20 21 22 23 24 --1.56155293 -1.47222602 -1.37332081 -1.26954034 -1.16391410 -1.05812520 + 1 2 3 4 5 6 7 + 1.6269075 1.6617902 1.6962192 1.7271813 1.7472342 1.7397791 1.6679611 + 8 9 10 11 12 13 14 + 1.4759205 1.1176204 0.6181839 0.0338233 -0.5479631 -1.0362682 -1.3826404 + 15 16 17 18 19 20 21 +-1.5836940 -1.6692967 -1.6760656 -1.6334835 -1.5615529 -1.4722260 -1.3733208 + 22 23 24 +-1.2695403 -1.1639141 -1.0581252 $mu$`bbs(x4, df = dfbase)` 1 2 3 4 5 6 - 2.55229363 2.24960553 1.94876411 1.65787562 1.38476696 1.13402108 + 2.55229361 2.24960552 1.94876409 1.65787561 1.38476695 1.13402107 7 8 9 10 11 12 - 0.90460104 0.69199958 0.48667665 0.27458711 0.04879986 -0.17271442 + 0.90460104 0.69199958 0.48667665 0.27458711 0.04879986 -0.17271441 13 14 15 16 17 18 --0.36207876 -0.50902480 -0.61846342 -0.69684702 -0.75045489 -0.78790732 +-0.36207875 -0.50902479 -0.61846341 -0.69684702 -0.75045489 -0.78790731 19 20 21 22 23 24 --0.81606432 -0.83940053 -0.86068781 -0.88116968 -0.90145775 -0.92172844 +-0.81606431 -0.83940053 -0.86068780 -0.88116968 -0.90145775 -0.92172844 attr(,"offset") [1] 4.055461 @@ -101,65 +103,65 @@ attr(,"offset") $sigma $sigma$`bbs(x1, df = dfbase)` 1 2 3 4 5 6 --0.030414460 -0.022125586 -0.013853684 -0.005537589 0.003564304 0.013555209 +-0.030414452 -0.022125579 -0.013853677 -0.005537584 0.003564307 0.013555209 7 8 9 10 11 12 - 0.022918018 0.029180388 0.030332229 0.026724515 0.021145904 0.015189258 + 0.022918014 0.029180381 0.030332222 0.026724511 0.021145904 0.015189262 13 14 15 16 17 18 - 0.010297457 0.008747271 0.011992089 0.020480823 0.032398964 0.046426108 + 0.010297464 0.008747280 0.011992096 0.020480825 0.032398957 0.046426092 19 20 21 22 23 24 - 0.061714551 0.077812287 0.094364484 0.111135167 0.128012802 0.144908293 + 0.061714526 0.077812251 0.094364438 0.111135110 0.128012734 0.144908214 $sigma$`bbs(x2, df = dfbase)` 1 2 3 4 5 6 --1.00531259 -0.89894200 -0.79242156 -0.68500258 -0.57580592 -0.46457099 +-1.00531259 -0.89894200 -0.79242156 -0.68500259 -0.57580592 -0.46457099 7 8 9 10 11 12 --0.35361689 -0.24906416 -0.15890635 -0.08197539 -0.01224824 0.05171766 +-0.35361690 -0.24906416 -0.15890635 -0.08197540 -0.01224825 0.05171766 13 14 15 16 17 18 - 0.11197233 0.16734047 0.20896077 0.23023383 0.22855164 0.21542982 + 0.11197233 0.16734047 0.20896077 0.23023382 0.22855164 0.21542982 19 20 21 22 23 24 - 0.20447996 0.20878255 0.22816320 0.25642982 0.28851624 0.32095755 + 0.20447995 0.20878254 0.22816319 0.25642980 0.28851622 0.32095753 $sigma$`bbs(x3, df = dfbase)` 1 2 3 4 5 6 - 0.12063540 0.25402927 0.38682572 0.51548480 0.63424539 0.73889097 + 0.12063540 0.25402927 0.38682571 0.51548479 0.63424538 0.73889096 7 8 9 10 11 12 - 0.82637001 0.89629039 0.94519455 0.94149973 0.85858321 0.66899777 + 0.82637000 0.89629038 0.94519454 0.94149972 0.85858321 0.66899776 13 14 15 16 17 18 - 0.37527216 0.03122394 -0.30529938 -0.58985492 -0.78942381 -0.90269096 + 0.37527216 0.03122394 -0.30529939 -0.58985492 -0.78942381 -0.90269096 19 20 21 22 23 24 --0.94431872 -0.93456023 -0.89272664 -0.83376343 -0.76851225 -0.70247345 +-0.94431872 -0.93456023 -0.89272664 -0.83376343 -0.76851225 -0.70247346 $sigma$`bbs(x4, df = dfbase)` 1 2 3 4 5 6 0.86989263 0.87994191 0.88853846 0.88929979 0.87589781 0.84275362 7 8 9 10 11 12 - 0.78591633 0.70378776 0.60586559 0.50846766 0.41948694 0.32797963 + 0.78591632 0.70378776 0.60586559 0.50846766 0.41948694 0.32797963 13 14 15 16 17 18 - 0.21454813 0.07793247 -0.07497320 -0.23830307 -0.39954949 -0.55170872 + 0.21454813 0.07793246 -0.07497320 -0.23830308 -0.39954949 -0.55170872 19 20 21 22 23 24 --0.69771925 -0.84192690 -0.98877500 -1.13761935 -1.28717664 -1.43691338 +-0.69771925 -0.84192690 -0.98877499 -1.13761935 -1.28717664 -1.43691337 $sigma$`bbs(x5, df = dfbase)` 1 2 3 4 5 --0.0023004785 -0.0022666544 -0.0022291319 -0.0021710972 -0.0020661463 +-0.0023004782 -0.0022666543 -0.0022291318 -0.0021710972 -0.0020661463 6 7 8 9 10 --0.0018478101 -0.0014703493 -0.0009604533 -0.0003984815 0.0001909117 +-0.0018478101 -0.0014703494 -0.0009604535 -0.0003984816 0.0001909115 11 12 13 14 15 - 0.0008694374 0.0017723943 0.0029451304 0.0043234390 0.0057278955 + 0.0008694373 0.0017723942 0.0029451303 0.0043234390 0.0057278955 16 17 18 19 20 - 0.0070223602 0.0081077392 0.0090216913 0.0098791882 0.0107300396 + 0.0070223603 0.0081077393 0.0090216914 0.0098791885 0.0107300399 21 22 23 24 - 0.0116063910 0.0125359458 0.0134878183 0.0144412251 + 0.0116063914 0.0125359462 0.0134878188 0.0144412255 $sigma$`bbs(x6, df = dfbase)` 1 2 3 4 5 6 -0.21860926 -0.21538642 -0.21197455 -0.20629754 -0.19256418 -0.16763016 7 8 9 10 11 12 --0.12961304 -0.07657484 -0.01111446 0.05578871 0.11239796 0.15326313 +-0.12961305 -0.07657484 -0.01111446 0.05578871 0.11239796 0.15326313 13 14 15 16 17 18 - 0.18085349 0.19695872 0.20194124 0.20159996 0.20232816 0.20546306 + 0.18085349 0.19695872 0.20194123 0.20159996 0.20232816 0.20546306 19 20 21 22 23 24 - 0.21031886 0.21775473 0.22809248 0.24020569 0.25283988 0.26553388 + 0.21031886 0.21775473 0.22809249 0.24020570 0.25283989 0.26553389 attr(,"offset") [1] -1.650591 @@ -229,4 +231,4 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > > proc.time() user system elapsed - 6.422 0.087 6.639 + 8.37 0.10 8.59 diff --git a/tests/regtest-glmboostLSS.Rout.save b/tests/regtest-glmboostLSS.Rout.save index 43ea365..aef6bf3 100644 --- a/tests/regtest-glmboostLSS.Rout.save +++ b/tests/regtest-glmboostLSS.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -23,7 +23,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -33,6 +33,8 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 > require("gamlss") Loading required package: gamlss Loading required package: splines @@ -47,10 +49,14 @@ The following object is masked from 'package:mboost': Family Loading required package: nlme - ********** GAMLSS Version 4.3-8 ********** + ********** GAMLSS Version 5.1-0 ********** For more on GAMLSS look at http://www.gamlss.org/ Type gamlssNews() to see new features/changes/bug fixes. +Warning messages: +1: package 'gamlss' was built under R version 3.4.4 +2: package 'gamlss.data' was built under R version 3.4.4 +3: package 'gamlss.dist' was built under R version 3.4.4 > > set.seed(1907) > n <- 5000 @@ -161,12 +167,12 @@ Loss function: { > model <- glmboostLSS(y ~ x1 + x2, families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 13591.79 > model[100] -[ 11] ...................................... -- risk: 12681.81 -[ 51] ...................................... -- risk: 12508.02 -[ 91] ........ +[ 11] ........................................ -- risk: 12681.81 +[ 51] ........................................ -- risk: 12508.02 +[ 91] ......... Final risk: 12491.57 LSS Models fitted via Model-based Boosting @@ -221,7 +227,7 @@ Loss function: { + families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 13810.72 > > stopifnot(all.equal(lapply(coef(model, which = ""), function(x) names(x)[-1]), @@ -233,7 +239,7 @@ Final risk: 13810.72 + families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 13810.72 > > stopifnot(all.equal(lapply(coef(model, which = ""), function(x) names(x)[-1]), @@ -248,7 +254,7 @@ Final risk: 13810.72 + families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 14135.24 Warning message: In mboostLSS_fit(formula = formula, data = data, families = families, : @@ -305,7 +311,7 @@ In mboostLSS_fit(formula = formula, data = data, families = families, : + families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 13810.99 > > model2 <- glmboostLSS(list(mu = y ~ x2, @@ -314,11 +320,11 @@ Final risk: 13810.99 + data = dat2, families = StudentTLSS(), + control = boost_control(mstop = 10, trace =TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 13810.99 > > stopifnot(all.equal(coef(model), coef(model2))) > > proc.time() user system elapsed - 7.002 0.110 7.310 + 8.19 0.26 8.43 diff --git a/tests/regtest-mstop.Rout.save b/tests/regtest-mstop.Rout.save index b6ce8df..61d1af9 100644 --- a/tests/regtest-mstop.Rout.save +++ b/tests/regtest-mstop.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -24,7 +24,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -34,6 +34,8 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 > > ### create some data first > set.seed(1907) @@ -98,13 +100,13 @@ The following object is masked from 'package:stats': > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3197.832 > > model[c(20, 30)] # check if two values can be specified Model first reduced to mstop = 10. Now continue ... -[ 11] .................. +[ 11] ................... Final risk: 3150.898 LSS Models fitted via Model-based Boosting @@ -132,7 +134,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > modela <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] ............................ +[ 1] ............................. Final risk: 3150.898 > stopifnot(max(abs(coef(model)[[1]] - coef(modela)[[1]])) + < sqrt(.Machine$double.eps)) @@ -142,7 +144,7 @@ Final risk: 3150.898 > model[40] # check if one value can be specified Model first reduced to mstop = 20. Now continue ... -[ 21] .................. +[ 21] ................... Final risk: 3098.51 LSS Models fitted via Model-based Boosting @@ -172,17 +174,17 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > modelb <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 40, trace = TRUE), + center = TRUE) -[ 1] ...................................... +[ 1] ....................................... Final risk: 3098.51 > stopifnot(all.equal(risk(model), risk(modelb))) > > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10, trace = TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 3209.164 > model[20] -[ 11] ........ +[ 11] ......... Final risk: 3162.029 LSS Models fitted via Model-based Boosting @@ -209,7 +211,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > model2 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3162.029 > stopifnot(all.equal(risk(model), risk(model2))) > @@ -217,12 +219,12 @@ Final risk: 3162.029 > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3197.832 > model[c(5,10)] Model first reduced to mstop = 5. Now continue ... -[ 6] ... +[ 6] .... Final risk: 3232.999 LSS Models fitted via Model-based Boosting @@ -250,7 +252,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > model2 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 3232.999 > stopifnot(all.equal(risk(model), risk(model2))) > @@ -261,10 +263,10 @@ Final risk: 3232.999 > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3197.832 > model[c(10,25)] -[21] ... +[21] .... Final risk: 3193.834 LSS Models fitted via Model-based Boosting @@ -295,7 +297,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > model2 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] ....................... +[ 1] ........................ Final risk: 3193.834 > stopifnot(all.equal(risk(model), risk(model2))) > @@ -304,7 +306,7 @@ Final risk: 3193.834 > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3197.832 > model[c(10,15)] Model first reduced to mstop = 15. @@ -338,7 +340,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > model2 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] ............. +[ 1] .............. Final risk: 3202.876 > stopifnot(all.equal(risk(model), risk(model2))) > @@ -347,7 +349,7 @@ Final risk: 3202.876 > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] .................. +[ 1] ................... Final risk: 3197.832 > model[c(10,9)] Model first reduced to mstop = 9. @@ -383,7 +385,7 @@ Loss function: -(lgamma(y + exp(f)) - lgamma(exp(f)) - lgamma(y + 1) + exp(f) * > model2 <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = ms, trace = TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 3210.562 > stopifnot(all.equal(risk(model), risk(model2))) > @@ -393,11 +395,11 @@ Final risk: 3210.562 > model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10, nu = nus, trace = TRUE), + center = TRUE) -[ 1] ........ +[ 1] ......... Final risk: 3261.092 > stopifnot(all(coef(model)[[1]] == 0)) > stopifnot(any(coef(model)[[2]] != 0)) > > proc.time() user system elapsed - 3.761 0.078 4.077 + 4.63 0.26 4.89 diff --git a/tests/regtest-noncyclic_fitting.R b/tests/regtest-noncyclic_fitting.R index a312024..2894024 100644 --- a/tests/regtest-noncyclic_fitting.R +++ b/tests/regtest-noncyclic_fitting.R @@ -123,3 +123,58 @@ mapply(compare_models, m1 = mod, m2 = mod2) mstop(mod3) <- 1 mapply(compare_models, m1 = mod, m2 = mod3) +## check selected +set.seed(1907) +x1 <- rnorm(500) +x2 <- rnorm(500) +x3 <- rnorm(500) +x4 <- rnorm(500) +x5 <- rnorm(500) +x6 <- rnorm(500) +mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +y <- numeric(500) +for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +dat <- data.frame(x1, x2, x3, x4, x5, x6, y) + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "cyclic") +selected(model) # ok (at least in principle) +selected(model, merge = TRUE) # ok + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "noncyclic") +selected(model) # ok (at least in principle) +selected(model, merge = TRUE) ## BROKEN + +## with informative sigma: +sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 + 1 * x6) +y <- numeric(500) +for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +dat <- data.frame(x1, x2, x3, x4, x5, x6, y) + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20), + center = TRUE, method = "cyclic") +selected(model) # ok (at least in principle) +selected(model, merge = TRUE) # ok + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 20), + center = TRUE, method = "noncyclic") +selected(model) # ok (at least in principle) +selected(model, merge = TRUE) ## BROKEN + + +## Check merged risk for reducing mstop to 0, and increasing it again does not contain an NA +stopifnot(all(!is.na(risk(model, merge = TRUE)))) +mstop(model) <- 0 +stopifnot(all(!is.na(risk(model, merge = TRUE)))) +mstop(model) <- 10 +stopifnot(all(!is.na(risk(model, merge = TRUE)))) + + diff --git a/tests/regtest-noncyclic_fitting.Rout.save b/tests/regtest-noncyclic_fitting.Rout.save new file mode 100644 index 0000000..8b8246a --- /dev/null +++ b/tests/regtest-noncyclic_fitting.Rout.save @@ -0,0 +1,350 @@ + +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) + +R is free software and comes with ABSOLUTELY NO WARRANTY. +You are welcome to redistribute it under certain conditions. +Type 'license()' or 'licence()' for distribution details. + +R is a collaborative project with many contributors. +Type 'contributors()' for more information and +'citation()' on how to cite R or R packages in publications. + +Type 'demo()' for some demos, 'help()' for on-line help, or +'help.start()' for an HTML browser interface to help. +Type 'q()' to quit R. + +> require("gamboostLSS") +Loading required package: gamboostLSS +Loading required package: mboost +Loading required package: parallel +Loading required package: stabs +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' +for a complete list of changes. + + +Attaching package: 'gamboostLSS' + +The following object is masked from 'package:stats': + + model.weights + +Warning message: +package 'stabs' was built under R version 3.4.4 +> +> ###negbin dist, linear### +> +> set.seed(2611) +> x1 <- rnorm(1000) +> x2 <- rnorm(1000) +> x3 <- rnorm(1000) +> x4 <- rnorm(1000) +> x5 <- rnorm(1000) +> x6 <- rnorm(1000) +> mu <- exp(1.5 + x1^2 +0.5 * x2 - 3 * sin(x3) -1 * x4) +> sigma <- exp(-0.2 * x4 +0.2 * x5 +0.4 * x6) +> y <- numeric(1000) +> for (i in 1:1000) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> #fit models at number of params + 1 +> +> #glmboost +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 3), method = "noncyclic") +> +> #linear baselearner with bols +> model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 3), method = "noncyclic", ++ baselearner = "bols") +> +> #nonlinear bbs baselearner +> +> model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 3), method = "noncyclic", ++ baselearner = "bbs") +> +> #reducing model and increasing it afterwards should yield the same fit +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 50), method = "noncyclic") +> +> m_co <- coef(model) +> +> mstop(model) <- 5 +> mstop(model) <- 50 +> +> stopifnot(all.equal(m_co, coef(model))) +> +> +> model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 50), method = "noncyclic", ++ baselearner = "bols") +> +> m_co <- coef(model) +> +> mstop(model) <- 5 +> mstop(model) <- 50 +> +> stopifnot(all.equal(m_co, coef(model))) +> +> +> model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 50), method = "noncyclic", ++ baselearner = "bbs") +> +> m_co <- coef(model) +> +> mstop(model) <- 5 +> mstop(model) <- 50 +> +> stopifnot(all.equal(m_co, coef(model))) +> +> +> model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 50), method = "noncyclic", ++ baselearner = "bbs") +> +> m_co <- coef(model) +> +> mstop(model) <- 5 +> mstop(model) <- 50 +> +> stopifnot(all.equal(m_co, coef(model))) +> +> ## check cvrisk for noncyclic models +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 3), method = "noncyclic") +> cvr1 <- cvrisk(model, grid = 1:50, cv(model.weights(model), B = 5)) +Starting cross-validation... +[Fold: 1] +[ 1] ........................................ -- risk: 1805.447 +[ 41] ......... +Final risk: 1799.837 + +[Fold: 2] +[ 1] ........................................ -- risk: 1734.54 +[ 41] ......... +Final risk: 1732.471 + +[Fold: 3] +[ 1] ........................................ -- risk: 1836.44 +[ 41] ......... +Final risk: 1830.995 + +[Fold: 4] +[ 1] ........................................ -- risk: 1644.972 +[ 41] ......... +Final risk: 1643.654 + +[Fold: 5] +[ 1] ........................................ -- risk: 1748.201 +[ 41] ......... +Final risk: 1745.054 +> cvr1 + + Cross-validated + glmboostLSS(formula = y ~ ., data = dat, families = NBinomialLSS(), control = boost_control(mstop = 3), method = "noncyclic") + + 1 2 3 4 5 6 7 8 +4.857889 4.857889 4.854336 4.851760 4.848367 4.845487 4.843456 4.840692 + 9 10 11 12 13 14 15 16 +4.837335 4.835769 4.833281 4.831312 4.829041 4.827176 4.824955 4.822376 + 17 18 19 20 21 22 23 24 +4.820667 4.819740 4.817552 4.816011 4.813472 4.812703 4.810620 4.809356 + 25 26 27 28 29 30 31 32 +4.807388 4.806713 4.804566 4.803307 4.801508 4.800541 4.799011 4.798048 + 33 34 35 36 37 38 39 40 +4.796181 4.795305 4.793928 4.793380 4.791924 4.790579 4.789696 4.788866 + 41 42 43 44 45 46 47 48 +4.787808 4.786352 4.785354 4.784043 4.783679 4.782704 4.781571 4.780256 + 49 50 +4.779861 4.778943 + + Optimal number of boosting iterations: 50 +> plot(cvr1) +> +> risk(model, merge = TRUE) + mu sigma sigma mu mu sigma mu mu +1770.175 1770.175 1768.441 1768.050 1767.677 1766.076 1765.724 1765.388 + sigma mu mu sigma mu mu sigma sigma +1763.912 1763.594 1763.292 1761.933 1761.646 1761.373 1760.615 1759.371 + mu mu mu sigma sigma mu mu sigma +1759.107 1758.855 1758.614 1757.906 1756.754 1756.521 1756.299 1755.650 + mu mu sigma sigma mu mu mu sigma +1755.432 1755.224 1754.151 1753.555 1753.354 1753.162 1752.977 1751.993 + sigma mu mu mu sigma sigma mu mu +1751.437 1751.259 1751.089 1750.925 1750.025 1749.507 1749.349 1749.182 + mu sigma sigma mu mu mu sigma sigma +1749.036 1748.201 1747.724 1747.584 1747.450 1747.321 1746.559 1746.116 + mu mu mu sigma +1745.992 1745.873 1745.759 1745.054 +> risk(model, merge = FALSE) +$mu +[1] 4755.327 4746.600 + +$sigma +[1] 4755.327 4752.028 4749.214 + +attr(,"class") +[1] "inbag" +> +> +> ## test that mstop = 0 is possible +> compare_models <- function (m1, m2) { ++ stopifnot(all.equal(coef(m1), coef(m2))) ++ stopifnot(all.equal(predict(m1), predict(m2))) ++ stopifnot(all.equal(fitted(m1), fitted(m2))) ++ stopifnot(all.equal(selected(m1), selected(m2))) ++ stopifnot(all.equal(risk(m1), risk(m2))) ++ ## remove obvious differences from objects ++ m1$control <- m2$control <- NULL ++ m1$call <- m2$call <- NULL ++ if (!all.equal(m1, m2)) ++ stop("Objects of offset model + 1 step and model with 1 step not identical") ++ invisible(NULL) ++ } +> +> # set up models +> mod <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 0)) +> mod2 <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 1)) +> mod3 <- glmboostLSS(y ~ ., data = dat, method = "noncyclic", control = boost_control(mstop = 1)) +> +> lapply(coef(mod), function(x) stopifnot(is.null(x))) +$mu +NULL + +$sigma +NULL + +> +> mstop(mod3) <- 0 +> mapply(compare_models, m1 = mod, m2 = mod3) +$mu +NULL + +$sigma +NULL + +> +> mstop(mod) <- 1 +> mapply(compare_models, m1 = mod, m2 = mod2) +$mu +NULL + +$sigma +NULL + +> +> mstop(mod3) <- 1 +> mapply(compare_models, m1 = mod, m2 = mod3) +$mu +NULL + +$sigma +NULL + +> +> ## check selected +> set.seed(1907) +> x1 <- rnorm(500) +> x2 <- rnorm(500) +> x3 <- rnorm(500) +> x4 <- rnorm(500) +> x5 <- rnorm(500) +> x6 <- rnorm(500) +> mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +> sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +> y <- numeric(500) +> for( i in 1:500) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "cyclic") +> selected(model) # ok (at least in principle) +$mu + [1] 2 2 5 2 5 2 5 5 2 5 + +$sigma + [1] 5 5 4 5 4 3 5 4 3 2 + +> selected(model, merge = TRUE) # ok + mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu + 2 5 2 5 5 4 2 5 5 4 2 3 5 +sigma mu sigma mu sigma mu sigma + 5 5 4 2 3 5 2 +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "noncyclic") +> selected(model) # ok (at least in principle) +$mu + [1] 2 2 5 2 5 2 5 2 5 5 + +$sigma +NULL + +> selected(model, merge = TRUE) ## BROKEN +mu mu mu mu mu mu mu mu mu mu + 2 2 5 2 5 2 5 2 5 5 +> +> ## with informative sigma: +> sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 + 1 * x6) +> y <- numeric(500) +> for( i in 1:500) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 20), ++ center = TRUE, method = "cyclic") +> selected(model) # ok (at least in principle) +$mu + [1] 5 5 5 5 5 2 5 5 2 5 2 5 2 5 2 5 5 2 5 2 + +$sigma + [1] 4 5 4 5 4 5 4 5 4 5 7 4 5 2 7 4 5 2 7 4 + +> selected(model, merge = TRUE) # ok + mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu + 5 4 5 5 5 4 5 5 5 4 2 5 5 +sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma + 4 5 5 2 4 5 5 2 7 5 4 2 5 + mu sigma mu sigma mu sigma mu sigma mu sigma mu sigma mu + 5 2 2 7 5 4 5 5 2 2 5 7 2 +sigma + 4 +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 20), ++ center = TRUE, method = "noncyclic") +> selected(model) # ok (at least in principle) +$mu + [1] 5 5 5 5 5 2 5 2 5 2 5 2 5 5 2 5 + +$sigma +[1] 5 4 5 4 + +> selected(model, merge = TRUE) ## BROKEN + mu mu mu mu mu mu mu mu mu mu mu mu sigma + 5 5 5 5 5 2 5 2 5 2 5 2 5 + mu sigma mu sigma mu sigma mu + 5 4 5 5 2 4 5 +> +> +> ## Check merged risk for reducing mstop to 0, and increasing it again does not contain an NA +> stopifnot(all(!is.na(risk(model, merge = TRUE)))) +> mstop(model) <- 0 +> stopifnot(all(!is.na(risk(model, merge = TRUE)))) +> mstop(model) <- 10 +> stopifnot(all(!is.na(risk(model, merge = TRUE)))) +> +> +> +> proc.time() + user system elapsed + 9.82 0.17 10.06 diff --git a/tests/regtest-stabilization.Rout.save b/tests/regtest-stabilization.Rout.save index 85c89ed..07a4560 100644 --- a/tests/regtest-stabilization.Rout.save +++ b/tests/regtest-stabilization.Rout.save @@ -1,7 +1,7 @@ -R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -23,7 +23,7 @@ Loading required package: gamboostLSS Loading required package: mboost Loading required package: parallel Loading required package: stabs -This is mboost 2.6-0. See 'package?mboost' and 'news(package = "mboost")' +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' for a complete list of changes. @@ -33,6 +33,8 @@ The following object is masked from 'package:stats': model.weights +Warning message: +package 'stabs' was built under R version 3.4.4 > > ## simulate Gaussian data > set.seed(0804) @@ -110,6 +112,32 @@ $sigma Loss function: -dnorm(x = y, mean = mu, sd = exp(f), log = TRUE) +attr(,"class") +[1] "families" +attr(,"qfun") +function (p, mu = 0, sigma = 1, lower.tail = TRUE, log.p = FALSE) +{ + qnorm(p = p, mean = mu, sd = sigma, lower.tail = lower.tail, + log.p = log.p) +} + +attr(,"name") +[1] "Gaussian" +> GaussianLSS(stabilization = "L2") +$mu + + Normal distribution: mu(id link) + +Loss function: -dnorm(x = y, mean = f, sd = sigma, log = TRUE) + + +$sigma + + Normal distribution: sigma (log link) + +Loss function: -dnorm(x = y, mean = mu, sd = exp(f), log = TRUE) + + attr(,"class") [1] "families" attr(,"qfun") @@ -149,11 +177,11 @@ attr(,"name") [1] "Gaussian" > res <- try(GaussianLSS(stabilization = "test"), silent = TRUE) > res -[1] "Error in match.arg(stabilization) : 'arg' should be one of \"none\", \"MAD\"\n" +[1] "Error in match.arg(stabilization) : \n 'arg' should be one of \"none\", \"MAD\", \"L2\"\n" attr(,"class") [1] "try-error" attr(,"condition") - + > > > ############################################################ @@ -171,24 +199,34 @@ attr(,"condition") + m_MAD <- glmboostLSS(y ~ x1 + x2 + x3 + x4, + families = FAMILIES[[i]](stabilization = "MAD"), + data=dat) ++ m_L2 <- glmboostLSS(y ~ x1 + x2 + x3 + x4, ++ families = FAMILIES[[i]](stabilization = "L2"), ++ data=dat) ++ + stopifnot(tail(risk(m_none, merge = TRUE), 1) != tail(risk(m_MAD, merge = TRUE), 1)) + cat('Risks:\n stabilization = "none":', + tail(risk(m_none, merge = TRUE), 1), + '\n stabilization = "MAD":', -+ tail(risk(m_MAD, merge = TRUE), 1), "\n") ++ tail(risk(m_MAD, merge = TRUE), 1), ++ '\n stabilization = "L2":', ++ tail(risk(m_L2, merge = TRUE), 1), "\n") + } Risks: stabilization = "none": 126.3867 stabilization = "MAD": 126.3865 + stabilization = "L2": 126.3866 Risks: stabilization = "none": 152.9363 stabilization = "MAD": 151.7991 + stabilization = "L2": 152.3966 Risks: stabilization = "none": -16.74875 stabilization = "MAD": -16.85658 + stabilization = "L2": -16.81103 Risks: stabilization = "none": 126.3869 stabilization = "MAD": 126.3865 + stabilization = "L2": 126.3868 > > ## check as.families interface for 2:4 parametric families > dat$y <- rnorm(1000, mean = 10, sd = 1) @@ -202,17 +240,24 @@ Risks: + m_MAD <- glmboostLSS(y ~ x1 + x2 + x3 + x4, + families = as.families(FAMILIES[[i]], stabilization = "MAD"), + data=dat) ++ m_L2 <- glmboostLSS(y ~ x1 + x2 + x3 + x4, ++ families = as.families(FAMILIES[[i]], stabilization = "L2"), ++ data=dat) + cat('Risks:\n stabilization = "none":', + tail(risk(m_none, merge = TRUE), 1), + '\n stabilization = "MAD":', -+ tail(risk(m_MAD, merge = TRUE), 1), "\n") ++ tail(risk(m_MAD, merge = TRUE), 1), ++ '\n stabilization = "L2":', ++ tail(risk(m_L2, merge = TRUE), 1), "\n") + } Risks: stabilization = "none": 1417.42 stabilization = "MAD": 1417.416 + stabilization = "L2": 1417.42 Risks: stabilization = "none": 1424.665 stabilization = "MAD": 1417.082 + stabilization = "L2": 1419.756 > > FAMILIES <- list("BCT") > require("gamlss.dist") @@ -225,6 +270,8 @@ The following object is masked from 'package:mboost': Family +Warning message: +package 'gamlss.dist' was built under R version 3.4.4 > dat$y <- rBCT(1000, mu = 100, sigma = 0.1, nu = 0, tau = 2) > for (i in 1:length(FAMILIES)) { + m_none <- glmboostLSS(y ~ x1 + x2 + x3 + x4, @@ -234,32 +281,28 @@ The following object is masked from 'package:mboost': + families = as.families(FAMILIES[[i]], stabilization = "MAD"), + data=dat), silent = TRUE) + if (inherits(m_MAD, "try-error")) { -+ warning("BCT cannot be fitted with stabilization", immediate. = TRUE) ++ warning("BCT cannot be fitted with stabilization = 'MAD'", immediate. = TRUE) + break + } ++ ++ m_L2 <- try(glmboostLSS(y ~ x1 + x2 + x3 + x4, ++ families = as.families(FAMILIES[[i]], stabilization = "L2"), ++ data=dat), silent = TRUE) ++ if (inherits(m_L2, "try-error")) { ++ warning("BCT cannot be fitted with stabilization = 'L2'", immediate. = TRUE) ++ break ++ } + cat('Risks:\n stabilization = "none":', + tail(risk(m_none, merge = TRUE), 1), + '\n stabilization = "MAD":', -+ tail(risk(m_MAD, merge = TRUE), 1), "\n") ++ tail(risk(m_MAD, merge = TRUE), 1), ++ '\n stabilization = "L2":', ++ tail(risk(m_L2, merge = TRUE), 1), "\n") + } -Warning: BCT cannot be fitted with stabilization -Warning messages: -1: In min(which(cumsum(w)/sum(w) > 0.5)) : - no non-missing arguments to min; returning Inf -2: In max(which(cumsum(w)/sum(w) <= 0.5)) : - no non-missing arguments to max; returning -Inf -3: In min(which(cumsum(w)/sum(w) > 0.5)) : - no non-missing arguments to min; returning Inf -4: In max(which(cumsum(w)/sum(w) <= 0.5)) : - no non-missing arguments to max; returning -Inf -5: In min(which(cumsum(w)/sum(w) > 0.5)) : - no non-missing arguments to min; returning Inf -6: In max(which(cumsum(w)/sum(w) <= 0.5)) : - no non-missing arguments to max; returning -Inf -7: In min(which(cumsum(w)/sum(w) > 0.5)) : - no non-missing arguments to min; returning Inf -8: In max(which(cumsum(w)/sum(w) <= 0.5)) : - no non-missing arguments to max; returning -Inf +Risks: + stabilization = "none": 4336.164 + stabilization = "MAD": 4289.131 + stabilization = "L2": 4303.955 > > ## check survival families > dat$zens <- sample(c(0, 1), 1000, replace = TRUE) @@ -280,22 +323,33 @@ Loading required package: survival + data=dat), silent = TRUE) + + if (inherits(m_MAD, "try-error")) { -+ warning(families[i], "cannot be fitted with stabilization", immediate. = TRUE) ++ warning(families[i], " cannot be fitted with stabilization = 'MAD'", immediate. = TRUE) + break + } -+ cat('Risks:\n stabilization = "none":', ++ m_L2 <- try(glmboostLSS(Surv(y, zens) ~ x1 + x2 + x3 + x4, ++ families = FAMILIES[[i]](stabilization = "L2"), ++ data=dat), silent = TRUE) ++ ++ if (inherits(m_L2, "try-error")) { ++ warning(families[i], " cannot be fitted with stabilization = 'L2'", immediate. = TRUE) ++ break ++ } ++ cat('Risks:\n stabilization = "none":', + tail(risk(m_none, merge = TRUE), 1), + '\n stabilization = "MAD":', -+ tail(risk(m_MAD, merge = TRUE), 1), "\n") ++ tail(risk(m_MAD, merge = TRUE), 1), ++ '\n stabilization = "L2":', ++ tail(risk(m_L2, merge = TRUE), 1), "\n") + } LogNormalLSS Risks: stabilization = "none": 385.8235 stabilization = "MAD": 385.5457 + stabilization = "L2": 387.9062 WeibullLSS -Warning: WeibullLSScannot be fitted with stabilization +Warning: WeibullLSS cannot be fitted with stabilization = 'MAD' > > ## check count data > dat$y <- rnbinom(1000, size=10, mu=5) @@ -310,21 +364,30 @@ Warning: WeibullLSScannot be fitted with stabilization + m_MAD <- glmboostLSS(y ~ x1 + x2 + x3 + x4, + families = FAMILIES[[i]](stabilization = "MAD"), + data=dat) ++ m_L2 <- glmboostLSS(y ~ x1 + x2 + x3 + x4, ++ families = FAMILIES[[i]](stabilization = "L2"), ++ data=dat) ++ + cat('Risks:\n stabilization = "none":', + tail(risk(m_none, merge = TRUE), 1), + '\n stabilization = "MAD":', -+ tail(risk(m_MAD, merge = TRUE), 1), "\n") ++ tail(risk(m_MAD, merge = TRUE), 1), ++ '\n stabilization = "L2":', ++ tail(risk(m_L2, merge = TRUE), 1), "\n") + } Risks: stabilization = "none": 2364.811 stabilization = "MAD": 2363.998 + stabilization = "L2": 2364.279 Risks: stabilization = "none": 2414.483 stabilization = "MAD": 2401.002 + stabilization = "L2": 2393.06 Risks: stabilization = "none": 2366.661 stabilization = "MAD": 2363.148 + stabilization = "L2": 2364.33 > > proc.time() user system elapsed - 19.599 0.078 19.842 + 29.32 0.28 29.67 diff --git a/tests/regtest-stabsel.R b/tests/regtest-stabsel.R new file mode 100644 index 0000000..e843b35 --- /dev/null +++ b/tests/regtest-stabsel.R @@ -0,0 +1,50 @@ +require("gamboostLSS") +### Data generating process: +set.seed(1907) +x1 <- rnorm(500) +x2 <- rnorm(500) +x3 <- rnorm(500) +x4 <- rnorm(500) +x5 <- rnorm(500) +x6 <- rnorm(500) +mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +y <- numeric(500) +for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +dat <- data.frame(x1, x2, x3, x4, x5, x6, y) + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "cyclic") +s1 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +plot(s1) +plot(s1, type = "paths") + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "noncyclic") +s2 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +plot(s2) +plot(s2, type = "paths") + +## with informative sigma: +sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 + 1 * x6) +y <- numeric(500) +for( i in 1:500) + y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +dat <- data.frame(x1, x2, x3, x4, x5, x6, y) + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "cyclic") +s3 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +plot(s3) +plot(s3, type = "paths") + +model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, + control = boost_control(mstop = 10), + center = TRUE, method = "noncyclic") +s4 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +plot(s4) +plot(s4, type = "paths") diff --git a/tests/regtest-stabsel.Rout.save b/tests/regtest-stabsel.Rout.save new file mode 100644 index 0000000..34664cf --- /dev/null +++ b/tests/regtest-stabsel.Rout.save @@ -0,0 +1,103 @@ + +R version 3.4.3 (2017-11-30) -- "Kite-Eating Tree" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-w64-mingw32/x64 (64-bit) + +R is free software and comes with ABSOLUTELY NO WARRANTY. +You are welcome to redistribute it under certain conditions. +Type 'license()' or 'licence()' for distribution details. + +R is a collaborative project with many contributors. +Type 'contributors()' for more information and +'citation()' on how to cite R or R packages in publications. + +Type 'demo()' for some demos, 'help()' for on-line help, or +'help.start()' for an HTML browser interface to help. +Type 'q()' to quit R. + +> require("gamboostLSS") +Loading required package: gamboostLSS +Loading required package: mboost +Loading required package: parallel +Loading required package: stabs +This is mboost 2.9-0. See 'package?mboost' and 'news(package = "mboost")' +for a complete list of changes. + + +Attaching package: 'gamboostLSS' + +The following object is masked from 'package:stats': + + model.weights + +Warning message: +package 'stabs' was built under R version 3.4.4 +> ### Data generating process: +> set.seed(1907) +> x1 <- rnorm(500) +> x2 <- rnorm(500) +> x3 <- rnorm(500) +> x4 <- rnorm(500) +> x5 <- rnorm(500) +> x6 <- rnorm(500) +> mu <- exp(1.5 +1 * x1 +0.5 * x2 -0.5 * x3 -1 * x4) +> sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 +0.4 * x6) +> y <- numeric(500) +> for( i in 1:500) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "cyclic") +> s1 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +Run stabsel .................... +Warning message: +In stabsel.mboostLSS(model, q = 5, PFER = 1, B = 10) : + 'mstop' too small in 13 of the 20 subsampling replicates to select 'q' base-learners; Increase 'mstop' bevor applying 'stabsel' +> plot(s1) +> plot(s1, type = "paths") +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "noncyclic") +> s2 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +Run stabsel .................... +Warning message: +In stabsel.mboostLSS(model, q = 5, PFER = 1, B = 10) : + 'mstop' too small in 20 of the 20 subsampling replicates to select 'q' base-learners; Increase 'mstop' bevor applying 'stabsel' +> plot(s2) +> plot(s2, type = "paths") +> +> ## with informative sigma: +> sigma <- exp(-0.4 * x3 -0.2 * x4 +0.2 * x5 + 1 * x6) +> y <- numeric(500) +> for( i in 1:500) ++ y[i] <- rnbinom(1, size = sigma[i], mu = mu[i]) +> dat <- data.frame(x1, x2, x3, x4, x5, x6, y) +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "cyclic") +> s3 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +Run stabsel .................... +Warning message: +In stabsel.mboostLSS(model, q = 5, PFER = 1, B = 10) : + 'mstop' too small in 19 of the 20 subsampling replicates to select 'q' base-learners; Increase 'mstop' bevor applying 'stabsel' +> plot(s3) +> plot(s3, type = "paths") +> +> model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, ++ control = boost_control(mstop = 10), ++ center = TRUE, method = "noncyclic") +> s4 <- stabsel(model, q = 5, PFER = 1, B = 10) ## warning is expected +Run stabsel .................... +Warning message: +In stabsel.mboostLSS(model, q = 5, PFER = 1, B = 10) : + 'mstop' too small in 20 of the 20 subsampling replicates to select 'q' base-learners; Increase 'mstop' bevor applying 'stabsel' +> plot(s4) +> plot(s4, type = "paths") +> +> proc.time() + user system elapsed + 6.63 0.26 6.89 diff --git a/vignettes/fig-crossvalidation.pdf b/vignettes/fig-crossvalidation.pdf new file mode 100644 index 0000000..01ac423 Binary files /dev/null and b/vignettes/fig-crossvalidation.pdf differ diff --git a/vignettes/gamboostLSS_Tutorial.Rnw b/vignettes/gamboostLSS_Tutorial.Rnw index cd523bb..5af45c1 100644 --- a/vignettes/gamboostLSS_Tutorial.Rnw +++ b/vignettes/gamboostLSS_Tutorial.Rnw @@ -61,7 +61,7 @@ if (all(is.na(pd))){ ## for the exact reproduction of the plots R2BayesX >= 0.3.2 is needed suppressWarnings(if (!require("R2BayesX")) - install.packages("R2BayesX")) + install.packages("R2BayesX", repos = repos)) ## remove (possibly) altered versions of "india" from working environment suppressWarnings(rm("india")) diff --git a/vignettes/gamboostLSS_Tutorial_CRAN.Rnw b/vignettes/gamboostLSS_Tutorial_CRAN.Rnw new file mode 100644 index 0000000..a79b978 --- /dev/null +++ b/vignettes/gamboostLSS_Tutorial_CRAN.Rnw @@ -0,0 +1,1638 @@ +\documentclass[shortnames,nojss]{jss} % onecolumn + +% \VignetteIndexEntry{gamboostLSS Tutorial} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Setup LaTeX +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} + +\usepackage{booktabs} +\usepackage{multirow} + +\usepackage{amsmath,amssymb,amsfonts} +\usepackage{bm} +\usepackage{dsfont} + +\usepackage{pdflscape,rotating} + +\usepackage{algorithm} + +%%% Needed for RCS +\usepackage{ulem} % use this for sout +\normalem % set \emph to \textit again + +\shortcites{borghi_who,bmc_growth,schmid2013beta,villarini2009} + +%%% RCS System +\def\rcsmark#1{\colorbox{yellow}{#1}} +\def\rcscom#1{\noindent\newline\vspace*{0.5cm}\colorbox{yellow}{\parbox{\textwidth}{#1}}\vspace*{0.5cm}} +\def\rcsdel#1{{\color{red}\sout{#1}}} % delete +\def\rcsdelp#1{{\color{red}$<$DEL$>$#1$<$/DEL$>$}} % delete paragraph +\def\rcsadd#1{{\color{blue}#1}} % added + +%%% Used for diamond at the end of the case study parts +\DeclareSymbolFont{extraup}{U}{zavm}{m}{n} +\DeclareMathSymbol{\varheart}{\mathalpha}{extraup}{86} +\DeclareMathSymbol{\vardiamond}{\mathalpha}{extraup}{87} + +%% do not \usepackage{Sweave} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Setup R +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +<>= +options(prompt = "R> ", continue = "+ ", width = 76, useFancyQuotes = FALSE) +repos <- "http://cran.at.r-project.org" +## check if a current version of gamboostLSS is installed +suppressWarnings(pd <- packageDescription("gamboostLSS")) +if (all(is.na(pd))){ + install.packages("gamboostLSS", repos = repos) + pd <- packageDescription("gamboostLSS") +} else { + if (compareVersion(pd$Version, "1.2-0") < 0){ + warning(sQuote("gamboostLSS"), " (1.2-0 or newer) is being installed!") + install.packages("gamboostLSS", repos = repos) + } +} + +## for the exact reproduction of the plots R2BayesX >= 0.3.2 is needed +suppressWarnings(if (!require("R2BayesX")) + install.packages("R2BayesX", repos = repos)) + +## remove (possibly) altered versions of "india" from working environment +suppressWarnings(rm("india")) +require("gamboostLSS") + +## make graphics directory if not existing +if (!file.exists("graphics")) + dir.create("graphics") +if (!file.exists("cvrisk")) + dir.create("cvrisk") +@ + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Title Information +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\title{\pkg{gamboostLSS}: An \proglang{R} Package for Model Building and + Variable Selection in the GAMLSS Framework} +\Plaintitle{gamboostLSS: An R Package for Model Building and Variable Selection + in the GAMLSS Framework} +\Shorttitle{\pkg{gamboostLSS}: Model Building and Variable Selection for GAMLSS} +\author{Benjamin Hofner\\FAU Erlangen-Nürnberg \And + Andreas Mayr\\FAU Erlangen-Nürnberg \And + Matthias Schmid\\University of Bonn} +\Plainauthor{Benjamin Hofner, Andreas Mayr, Matthias Schmid} + +\Abstract{ \emph{This vignette is a slightly modified version of + \citet{Hofner:gamboostLSS:2015} which appeared in the + \textbf{Journal of Statistical Software}. Please cite that article when + using the package \pkg{gamboostLSS} in your work.}\\[1em] + + Generalized additive models for location, scale and shape are a + flexible class of regression models that allow to model multiple parameters of + a distribution function, such as the mean and the standard deviation, + simultaneously. With the \proglang{R} package \pkg{gamboostLSS}, we provide a + boosting method to fit these models. Variable selection and model choice are + naturally available within this regularized regression framework. To introduce + and illustrate the \proglang{R} package \pkg{gamboostLSS} and its + infrastructure, we use a data set on stunted growth in India. In addition to + the specification and application of the model itself, we present a variety of + convenience functions, including methods for tuning parameter selection, + prediction and visualization of results. The package \pkg{gamboostLSS} is + available from CRAN (\url{http://cran.r-project.org/package=gamboostLSS}).} + +\Keywords{additive models, prediction intervals, high-dimensional data} + +\Address{ + Benjamin Hofner \& Andreas Mayr\\ + Department of Medical Informatics, Biometry and Epidemiology\\ + Friedrich-Alexander-Universität Erlangen-Nürnberg\\ + Waldstra{\ss}e 6\\ + 91054 Erlangen, Germany\\ + E-mail: \email{benjamin.hofner@fau.de},\\ + \hphantom{E-mail: }\email{andreas.mayr@fau.de}\\ + URL: \url{http://www.imbe.med.uni-erlangen.de/cms/benjamin_hofner.html},\\ + \hphantom{URL: }\url{http://www.imbe.med.uni-erlangen.de/ma/A.Mayr/}\\ + + Matthias Schmid\\ + Department of Medical Biometry, Informatics and Epidemiology\\ + University of Bonn\\ + Sigmund-Freud-Stra{\ss}e 25\\ + 53105 Bonn\\ + E-mail: \email{matthias.schmid@imbie.uni-bonn.de}\\ + URL: \url{http://www.imbie.uni-bonn.de} +} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Begin Document +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{document} +\SweaveOpts{concordance=FALSE} +\SweaveOpts{echo = TRUE, results = verbatim, prefix.string=graphics/fig} +\SweaveOpts{width = 5, height = 5} +\SweaveOpts{keep.source = TRUE} + +\maketitle + +\section{Introduction} +\label{sec:intro} + +Generalized additive models for location, scale and shape (GAMLSS) are a +flexible statistical method to analyze the relationship between a response +variable and a set of predictor variables. Introduced by \citet{rs}, GAMLSS are +an extension of the classical GAM (generalized additive model) approach +\citep{hastietib}. The main difference between GAMs and GAMLSS is that GAMLSS do +not only model the conditional mean of the outcome distribution (location) but +\textit{several} of its parameters, including scale and shape parameters (hence +the extension ``LSS''). In Gaussian regression, for example, the density of the +outcome variable $Y$ conditional on the predictors $\mathbf{X}$ may depend on +the mean parameter $\mu$, and an additional scale parameter $\sigma$, which +corresponds to the standard deviation of $Y| \mathbf{X}$. Instead of assuming +$\sigma$ to be fixed, as in classical GAMs, the Gaussian GAMLSS regresses both +parameters on the predictor variables, +\begin{eqnarray} +\label{Gaussian:mu} +\mu = \E(y \, | \, \mathbf{X}) & = & \eta_\mu = \beta_{\mu,0} + \sum_j f_{\mu,j}(x_j), \\ +\label{Gaussian:sigma} +\log(\sigma) = \log ( \sqrt{\VAR(y \, | \, \mathbf{X})}) & = & +\eta_{\sigma} = \beta_{\sigma,0} +\sum_j f_{\sigma, j}(x_j), +\end{eqnarray} +where $\eta_\mu$ and $\eta_\sigma$ are \emph{additive predictors} with parameter +specific intercepts $\beta_{\mu,0}$ and $\beta_{\sigma,0}$, and functions +$f_{\mu,j}(x_j)$ and $f_{\sigma, j}(x_j)$, which represent the effects of +predictor $x_j$ on $\mu$ and $\sigma$, respectively. In this notation, the +functional terms $f(\cdot)$ can denote various types of effects (e.g., linear, +smooth, random). + +In our case study, we will analyze the prediction of stunted growth for children +in India via a Gaussian GAMLSS. The response variable is a stunting score, which +is commonly used to relate the growth of a child to a reference population in +order to assess effects of malnutrition in early childhood. In our analysis, we +model the expected value ($\mu$) of this stunting score and also its variability +($\sigma$) via smooth effects for mother- or child-specific predictors, as well +as a spatial effect to account for the region of India where the child is +growing up. This way, we are able to construct point predictors (via +$\eta_{\mu}$) and additionally child-specific prediction intervals (via +$\eta_{\mu}$ and $\eta_{\sigma}$) to evaluate the individual risk of stunted +growth. + +In recent years, due to their versatile nature, GAMLSS have been used to address +research questions in a variety of fields. Applications involving GAMLSS range +from the normalization of complementary DNA microarray data +\citep{khondoker2009} and the analysis of flood frequencies +\citep{villarini2009} to the development of rainfall models +\citep{serinaldi2012} and stream-flow forecasting models \citep{ogtrop2011}. The +most prominent application of GAMLSS is the estimation of centile curves, e.g., +for reference growth charts \citep{onis2006child, borghi_who, bmc_growth}. The +use of GAMLSS in this context has been recommended by the World Health +Organization \citep[see][and the references therein]{rigby:smoothing:2014}. + +Classical estimation of a GAMLSS is based on backfitting-type Gauss-Newton +algorithms with AIC-based selection of relevant predictors. This strategy is +implemented in the \proglang{R} \citep{R:3.1.0} package \pkg{gamlss} +\citep{rs,gamlss:jss:2007,pkg:gamlss:4.3-0}, which provides a great variety of +functions for estimation, hyper-parameter selection, variable selection and +hypothesis testing in the GAMLSS framework. + +In this article we present the \proglang{R} package \pkg{gamboostLSS} +\citep{pkg:gamboostLSS:1.2-0}, which is designed as an alternative to +\pkg{gamlss} for high-dimensional data settings where variable selection is of +major importance. Specifically, \pkg{gamboostLSS} implements the +\emph{gamboostLSS} algorithm, which is a new fitting method for GAMLSS that was +recently introduced by \cite{mayretal}. The \emph{gamboostLSS} algorithm uses +the same optimization criterion as the Gauss-Newton type algorithms implemented +in the package \pkg{gamlss} (namely, the log-likelihood of the model under +consideration) and hence fits the same type of statistical model. In contrast to +\pkg{gamlss}, however, the \pkg{gamboostLSS} package operates within the +component-wise gradient boosting framework for model fitting and variable +selection \citep{BuehlmannYu2003,BuhlmannHothorn07}. As demonstrated in +\cite{mayretal}, replacing Gauss-Newton optimization by boosting techniques +leads to a considerable increase in flexibility: Apart from being able to fit +basically any type of GAMLSS, \pkg{gamboostLSS} implements an efficient +mechanism for variable selection and model choice. As a consequence, +\pkg{gamboostLSS} is a convenient alternative to the AIC-based variable +selection methods implemented in \pkg{gamlss}. The latter methods can be +unstable, especially when it comes to selecting possibly different sets of +variables for multiple distribution parameters. Furthermore, model fitting via +\emph{gamboostLSS} is also possible for high-dimensional data with more +candidate variables than observations ($p > n$), where the classical fitting +methods become unfeasible. + +The \pkg{gamboostLSS} package is a comprehensive implementation of the most +important issues and aspects related to the use of the \emph{gamboostLSS} +algorithm. The package is available on CRAN +(\url{http://cran.r-project.org/package=gamboostLSS}). Current development +versions are hosted on GitHub (\url{https://github.com/hofnerb/gamboostLSS}). As +will be demonstrated in this paper, the package provides a large number of +response distributions \citep[e.g., distributions for continuous data, count +data and survival data, including \textit{all} distributions currently available +in the \pkg{gamlss} framework; see][]{pkg:gamlss.dist:4.3-0}. Moreover, users of +\pkg{gamboostLSS} can choose among many different possibilities for modeling +predictor effects. These include linear effects, smooth effects and trees, as +well as spatial and random effects, and interaction terms. + +After starting with a toy example (Section~\ref{sec:toy-example}) for +illustration, we will provide a brief theoretical overview of GAMLSS and +component-wise gradient boosting (Section~\ref{sec:boost-gamlss-models}). In +Section~\ref{sec:india}, we will introduce the \code{india} data set, which is +shipped with the \proglang{R} package \pkg{gamboostLSS}. We present the +infrastructure of \pkg{gamboostLSS}, discuss model comparison methods and model +tuning, and will show how the package can be used to build regression models in +the GAMLSS framework (Section~\ref{sec:package}). In particular, we will give a +step by step introduction to \pkg{gamboostLSS} by fitting a flexible GAMLSS +model to the \code{india} data. In addition, we will present a variety of +convenience functions, including methods for the selection of tuning parameters, +prediction and the visualization of results (Section~\ref{sec:methods}). + +\section[A toy example]{A toy example} +\label{sec:toy-example} + +Before we discuss the theoretical aspects of the \emph{gamboostLSS} algorithm +and the details of the implementation, we present a short, illustrative toy +example. This highlights the ease of use of the \pkg{gamboostLSS} package in +simple modeling situations. Before we start, we load the package +<>= +library("gamboostLSS") +@ +Note that \pkg{gamboostLSS} 1.2-0 or newer is needed. We simulate data +from a heteroscedastic normal distribution, i.e., both the mean and the variance +depend on covariates: +<>= +set.seed(1907) +n <- 150 +x1 <- rnorm(n) +x2 <- rnorm(n) +x3 <- rnorm(n) +toydata <- data.frame(x1 = x1, x2 = x2, x3 = x3) +toydata$y <- rnorm(n, mean = 1 + 2 * x1 - x2, + sd = exp(0.5 - 0.25 * x1 + 0.5 * x3)) +@ +Next we fit a linear model for location, scale and shape to the simulated data +<>= +lmLSS <- glmboostLSS(y ~ x1 + x2 + x3, data = toydata) +@ +and extract the coefficients using \code{coef(lmLSS)}. When we add the offset +(i.e., the starting values of the fitting algorithm) to the intercept, we obtain +<>= +coef(lmLSS, off2int = TRUE) +@ +Usually, model fitting involves additional tuning steps, which are skipped here +for the sake of simplicity (see Section~\ref{sec:model_tuning} for details). +Nevertheless, the coefficients coincide well with the true effects, which are +$\beta_\mu = (1, 2, -1, 0)$ and $\beta_\sigma = (0.5, - 0.25, 0, 0.5)$. To get a +graphical display, we \code{plot} the resulting model +<>= +par(mfrow = c(1, 2), mar = c(4, 4, 2, 5)) +plot(lmLSS, off2int = TRUE) +@ + +\setkeys{Gin}{width = 0.9\textwidth} +\begin{figure}[ht!] + \centering +<>= +<> +@ +\caption{Coefficient paths for linear LSS models, which depict the + change of the coefficients over the iterations of the algorithm.} +\end{figure} + +To extract fitted values for the mean, we use the function \code{fitted(, + parameter = "mu")}. The results are very similar to the true values: +<<>>= +muFit <- fitted(lmLSS, parameter = "mu") +rbind(muFit, truth = 1 + 2 * x1 - x2)[, 1:5] +@ +The same can be done for the standard deviation, but we need to make sure that +we apply the response function (here $\exp(\eta)$) to the fitted values by +additionally using the option \code{type = "response"}: +<<>>= +sigmaFit <- fitted(lmLSS, parameter = "sigma", type = "response")[, 1] +rbind(sigmaFit, truth = exp(0.5 - 0.25 * x1 + 0.5 * x3))[, 1:5] +@ + +For new observations stored in a data set \code{newData} we could use +\code{predict(lmLSS, newdata = newData)} essentially in the same way. As +presented in Section~\ref{sec:methods}, the complete distribution could also be +depicted as marginal prediction intervals via the function \code{predint()}. + +\section[Boosting GAMLSS models]{Boosting GAMLSS models} +\label{sec:boost-gamlss-models} + +\emph{GamboostLSS} is an algorithm to fit GAMLSS models via component-wise +gradient boosting \citep{mayretal} adapting an earlier strategy by +\cite{schmidetal}. While the concept of boosting emerged from the field of +supervised machine learning, boosting algorithms are nowadays often applied as +flexible alternative to estimate and select predictor effects in statistical +regression models \citep[statistical boosting,][]{mayr_boosting_part1}. The key +idea of statistical boosting is to iteratively fit the different predictors with +simple regression functions (base-learners) and combine the estimates to an +additive predictor. In case of gradient boosting, the base-learners are fitted +to the negative gradient of the loss function; this procedure can be described +as gradient descent in function space \citep{BuhlmannHothorn07}. For GAMLSS, we +use the negative log-likelihood as loss function. Hence, the negative gradient +of the loss function equals the (positive) gradient of the log-likelihood. To +avoid confusion we directly use the gradient of the log-likelihood in the +remainder of the article. + +To adapt the standard boosting algorithm to fit additive predictors for all +distribution parameters of a GAMLSS we extended the component-wise fitting to +multiple parameter dimensions: In each iteration, \emph{gamboostLSS} calculates +the partial derivatives of the log-likelihood function $l(y, \bm{\theta})$ with +respect to each of the additive predictors $\eta_{\theta_k}$, $k=1,\ldots,K$. +The predictors are related to the parameter vector $\bm{\theta} = +(\theta_k)^\top_{k = 1,\ldots,K}$ via parameter-specific link functions $g_k$, +$\theta_k = g_k^{-1}(\eta_{\theta_k})$. Typically, we have at maximum $K = 4$ +distribution parameters \citep{rs}, but in principle more are possible. The +predictors are updated successively in each iteration, while the current +estimates of the other distribution parameters are used as offset values. A +schematic representation of the updating process of \emph{gamboostLSS} with four +parameters in iteration $m+1$ looks as follows: + +\begin{alignat*}{8} + \frac{\partial}{\partial \eta_{\mu}}\, l(&y, \hat{\mu}^{[m]}&,& \hat{\sigma}^{[m]}&,& \hat{\nu}^{[m]}&,& + \hat{\tau}^{[m]}&) + &\quad \stackrel{\rm update}{\longrightarrow}& \hat{\eta}_\mu^{[\boldmath{m+1}]} \Longrightarrow \hat{\mu}^{[\emph{m+1}]} \ ,& \\ +% + \frac{\partial}{\partial \eta_{\sigma}}\, l(&y, \hat{\mu}^{[\emph{m+1}]}&,& \hat{\sigma}^{[m]}&,& \hat{\nu}^{[m]}&,& + \hat{\tau}^{[m]}&) + &\quad \stackrel{\rm update }{\longrightarrow} \quad & \hat{\eta}_\sigma^{[\emph{m+1}]} \Longrightarrow \hat{\sigma}^{[\emph{m+1}]} \ ,& \\ +% + \frac{\partial}{\partial \eta_{\nu}}\, l(&y, \hat{\mu}^{[\emph{m+1}]}&,& \hat{\sigma}^{[\emph{m+1}]}&,& + \hat{\nu}^{[m]}&,& \hat{\tau}^{[m]}&) + &\quad \stackrel{\rm update }{\longrightarrow}& \hat{\eta}_\nu^{[\emph{m+1}]} \Longrightarrow \hat{\nu}^{[\emph{m+1}]} \ , \\ +% + \frac{\partial}{\partial \eta_{\tau}}\, l(&y, \hat{\mu}^{[\emph{m+1}]}&,& \hat{\sigma}^{[\emph{m+1}]}&,& + \hat{\nu}^{[\emph{m+1}]}&,& \hat{\tau}^{[m]}&) + &\quad \stackrel{\rm update}{\longrightarrow}& \hat{\eta}_\tau^{[\emph{m+1}]} \Longrightarrow + \hat{\tau}^{[\emph{m+1}]} \ . +\end{alignat*} + + +The algorithm hence circles through the different parameter dimensions: in every +dimension, it carries out one boosting iteration, updates the corresponding +additive predictor and includes the new prediction in the loss function for the +next dimension. + +As in classical statistical boosting, inside each boosting iteration only the +best fitting base-learner is included in the update. Typically, each +base-learner corresponds to one component of $\mathbf{X}$, and in every boosting +iteration only a small proportion (a typical value of the \textit{step-length} +is 0.1) of the fit of the selected base-learner is added to the current additive +predictor $\eta^{[m]}_{\theta_k}$. This procedure effectively leads to +data-driven variable selection which is controlled by the stopping iterations +$\bm{m}_{\text{stop}} = (m_{\text{stop},1},...,m_{\text{stop},K})^{\top}$: Each +additive predictor $\eta_{\theta_k}$ is updated until the corresponding stopping +iterations $\bm{m}_{\text{stop},k}$ is reached. If $m$ is greater than +$m_{\text{stop},k}$, the $k$th distribution parameter dimension is no longer +updated. Predictor variables that have never been selected up to iteration +$m_{\text{stop},k}$ are effectively excluded from the resulting model. The +vector $\bm{m}_{\text{stop}}$ is a tuning parameter that can, for example, be +determined using multi-dimensional cross-validation (see +Section~\ref{sec:model_tuning} for details). A discussion of model comparison +methods and diagnostic checks can be found in +Section~\ref{sec:model_complexity}. The complete \emph{gamboostLSS} algorithm +can be found in Appendix~\ref{algorithm} and is described in detail in +\cite{mayretal}. + +\paragraph{Scalability of boosting algorithms} One of the main advantages of +boosting algorithms in practice, besides the automated variable selection, is +their applicability in situations with more variables than observations ($p > +n$). Despite the growing model complexity, the run time of boosting algorithms +for GAMs increases only linearly with the number of base-learners +\cite{BuehlmannYu2007}. An evaluation of computing times for up to $p = 10000$ +predictors can be found in \cite{BinMueSch2012}. In case of boosting GAMLSS, the +computational complexity additionally increases with the number of distribution +parameters $K$. For an example on the performance of \emph{gamboostLSS} in case +of $p > n$ see the simulation studies provided in \cite{mayretal}. To speed up +computations for the tuning of the algorithm via cross-validation or resampling, +\pkg{gamboostLSS} incorporates parallel computing (see +Section~\ref{sec:model_tuning}). + +\section{Childhood malnutrition in India} +\label{sec:india} + +Eradicating extreme poverty and hunger is one of the Millennium Development +Goals that all 193 member states of the United Nations have agreed to achieve by +the year 2015. Yet, even in democratic, fast-growing emerging countries like +India, which is one of the biggest global economies, malnutrition of children is +still a severe problem in some parts of the population. Childhood malnutrition +in India, however, is not necessarily a consequence of extreme poverty but can +also be linked to low educational levels of parents and cultural factors +\citep{india_nut}. Following a bulletin of the World Health Organization, growth +assessment is the best available way to define the health and nutritional status +of children \citep{who}. Stunted growth is defined as a reduced growth rate +compared to a standard population and is considered as the first consequence of +malnutrition of the mother during pregnancy, or malnutrition of the child during +the first months after birth. Stunted growth is often measured via a $Z$ score +that compares the anthropometric measures of the child with a reference +population: +\begin{eqnarray*} +Z_i = \frac{\text{AI}_i - \text{MAI}}{s} +\end{eqnarray*} +In our case, the individual anthropometric indicator ($\text{AI}_i$) will be the +height of the child $i$, while MAI and $s$ are the median and the standard +deviation of the height of children in a reference population. This $Z$ score +will be denoted as \textit{stunting score} in the following. Negative values of +the score indicate that the child's growth is below the expected growth of a +child with normal nutrition. + +The stunting score will be the outcome (response) variable in our application,: +we analyze the relationship of the mother's and the child's body mass index +(BMI) and age with stunted growth resulting from malnutrition in early +childhood. Furthermore, we will investigate regional differences by including +also the district of India in which the child is growing up. The aim of the +analysis is both, to explain the underlying structure in the data as well as to +develop a prediction model for children growing up in India. A prediction rule, +based also on regional differences, could help to increase awareness for the +individual risk of a child to suffer from stunted growth due to malnutrition. +For an in-depth analysis on the multi-factorial nature of child stunting in +India, based on boosted quantile regression, see \citet{Fenske:2011:JASA}, and +\citet{fenske2013plos}. + +The data set that we use in this analysis is based on the Standard Demographic +and Health Survey, 1998-99, on malnutrition of children in India, which can be +downloaded after registration from \url{http://www.measuredhs.com}. For +illustrative purposes, we use a random subset of \Sexpr{nrow(india)} +observations from the original data (approximately 12\%) and only a (very small) +subset of variables. For details on the data set and the data source see the +help file of the \code{india} data set in the \pkg{gamboostLSS} package and +\citet{FahrmeirKneib:2011}. + +\paragraph{Case study: Childhood malnutrition in India} + +First of all we load the data sets \code{india} and \code{india.bnd} into the +workspace. The first data set includes the outcome and \Sexpr{ncol(india) - 2} +explanatory variables. The latter data set consists of a special boundary file +containing the neighborhood structure of the districts in India. + +<>= +data("india") +data("india.bnd") +names(india) +@ + +The outcome variable \code{stunting} is depicted with its spatial structure in +Figure~\ref{fig:india}. An overview of the data set can be found in +Table~\ref{tab:summary_india}. One can clearly see a trend towards malnutrition +in the data set as even the 75\% quantile of the stunting score is below zero. +\hfill $\vardiamond$ + +\setkeys{Gin}{width = 0.45\textwidth} +\begin{figure}[ht!] + \centering +<>= +library("R2BayesX") +## plot mean +FUN <- mean +india_agg <- data.frame(mcdist = names(tapply(india$stunting, india$mcdist, FUN, + na.rm = TRUE)), + stunting = tapply(india$stunting, india$mcdist, FUN, + na.rm = TRUE)) +par(mar = c(1, 0, 2, 0)) +plotmap(india.bnd, india_agg, pos = "bottomright", + range = c(-5.1, 1.50), main = "Mean", mar.min = NULL) +@ +\hfill +<>= +## plot sd +FUN <- sd +india_agg <- data.frame(mcdist = names(tapply(india$stunting, india$mcdist, FUN, + na.rm = TRUE)), + stunting = tapply(india$stunting, india$mcdist, FUN, + na.rm = TRUE)) +## remove missing values (= no variation) +india_agg <- india_agg[complete.cases(india_agg),] +par(mar = c(1, 0, 2, 0)) +plotmap(india.bnd, india_agg, pos = "bottomright", + range = c(0, 4.00), main = "Standard deviation") +@ +\caption{Spatial structure of stunting in India. The raw mean per district is + given in the left figure, ranging from dark blue (low stunting score), to dark + red (higher scores). The right figure depicts the standard deviation of the + stunting score in the district, ranging from dark blue (no variation) to dark + red (maximal variability). Dashed regions represent regions without + data.}\label{fig:india} +\end{figure} + +\begin{table}[h!] + \centering + \begin{tabular}{llrrrrrr} +<>= +out <- data.frame(Description = c("Stunting", "BMI (child)", "Age (child; months)", + "BMI (mother)", "Age (mother; years)"), + Variable = paste0("\\code{", c(names(india)[1:5]),"}"), + "Min." = apply(india[, 1:5], 2, min), + "25\\% Quantile" = apply(india[, 1:5], 2, quantile, p = 0.25), + "Median" = apply(india[, 1:5], 2, median), + "Mean" = apply(india[, 1:5], 2, mean), + "75\\% Quantile" = apply(india[, 1:5], 2, quantile, p = 0.75), + "Max." = apply(india[, 1:5], 2, max)) +names(out)[c(4, 7)] <- c("25\\% Qu.", "75\\% Qu.") +names(out)[1:2] <- "" +out[, 3:8] <- round(out[, 3:8], 2) +cat("\\toprule \n") +cat(paste(colnames(out), collapse = " & "), "\\\\ \n \\cmidrule{3-8}", "\n") +out <- apply(out, 1, function(x) cat(paste(x, collapse = " & "), " \\\\ \n")) +cat("\\bottomrule \n") +@ + \end{tabular} + \caption{Overview of \code{india} data.\label{tab:summary_india}} +\end{table} +\pagebreak[3] + +\section[The Package gamboostLSS]{The package \pkg{gamboostLSS}} +\label{sec:package} +The \emph{gamboostLSS} algorithm is implemented in the publicly available +\proglang{R} add-on package \pkg{gamboostLSS} \citep{pkg:gamboostLSS:1.2-0}. The +package makes use of the fitting algorithms and some of the infrastructure of +\pkg{mboost} +\citep{BuhlmannHothorn07,Hothorn+Buehlmann+Kneib+Schmid+Hofner:mboost:2010,pkg:mboost:2.4-0}. +Furthermore, many naming conventions and features are implemented in analogy to +\pkg{mboost}. By relying on the \pkg{mboost} package, \pkg{gamboostLSS} +incorporates a wide range of base-learners and hence offers a great flexibility +when it comes to the types of predictor effects on the parameters of a GAMLSS +distribution. In addition to making the infrastructure available for GAMLSS, +\pkg{mboost} constitutes a well-tested, mature software package in the back end. +For the users of \pkg{mboost}, \pkg{gamboostLSS} offers the advantage of +providing a drastically increased number of possible distributions to be fitted +by boosting. + +As a consequence of this partial dependency on \pkg{mboost}, we recommend users +of \pkg{gamboostLSS} to make themselves familiar with the former before using +the latter package. To make this tutorial self-contained, we try to shortly +explain all relevant features here as well. However, a dedicated hands-on +tutorial is available for an applied introduction to \pkg{mboost} +\citep{Hofner:mboost:2014}. + +\subsection{Model fitting}\label{sec:model-fitting} + +The models can be fitted using the function \code{glmboostLSS()} for linear +models. For all kinds of structured additive models the function +\code{gamboostLSS()} can be used. The function calls are as +follows: + +\begin{Sinput} +R> glmboostLSS(formula, data = list(), families = GaussianLSS(), ++ control = boost_control(), weights = NULL, ...) +R> gamboostLSS(formula, data = list(), families = GaussianLSS(), ++ control = boost_control(), weights = NULL, ...) +\end{Sinput} + +Note that here and in the remainder of the paper we sometimes focus on the most +relevant (or most interesting) arguments of a function only. Further arguments +might exist. Thus, for a complete list of arguments and their description we +refer the reader to the respective help file. + +The \code{formula} can consist of a single \code{formula} object, yielding the +same candidate model for all distribution parameters. For example, +\begin{Sinput} +R> glmboostLSS(y ~ x1 + x2 + x3, data = toydata) +\end{Sinput} +specifies linear models with predictors \code{x1} to \code{x3} for all GAMLSS +parameters (here $\mu$ and $\sigma$ of the Gaussian distribution). As an +alternative, one can also use a named list to specify different candidate models +for different parameters, e.g., +\begin{Sinput} +R> glmboostLSS(list(mu = y ~ x1 + x2, sigma = y ~ x1 + x3), data = toydata) +\end{Sinput} +fits a linear model with predictors \code{x1} and \code{x2} for the \code{mu} +component and a linear model with predictors \code{x1} and \code{x3} for the +\code{sigma} component. As for all \proglang{R} functions with a formula +interface, one must specify the data set to be used (argument \code{data}). +Additionally, \code{weights} can be specified for weighted regression. Instead +of specifying the argument \code{family} as in \pkg{mboost} and other modeling +packages, the user needs to specify the argument \code{families}, which +basically consists of a list of sub-families, i.e., one family for each of the +GAMLSS distribution parameters. These sub-families define the parameters of the +GAMLSS distribution to be fitted. Details are given in the next section. + +The initial number of boosting iterations as well as the step-lengths +($\nu_{\text{sl}}$; see Appendix~\ref{algorithm}) are specified via the function +\code{boost\_control()} with the same arguments as in \pkg{mboost}. However, in +order to give the user the possibility to choose different values for each +additive predictor (corresponding to the different parameters of a GAMLSS), they +can be specified via a vector or list. Preferably a \emph{named} vector or list +should be used, where the names correspond to the names of the sub-families. For +example, one can specify: + +\begin{Sinput} +R> boost_control(mstop = c(mu = 100, sigma = 200), ++ nu = c(mu = 0.2, sigma = 0.01)) +\end{Sinput} + +Specifying a single value for the stopping iteration \code{mstop} or the +step-length \code{nu} results in equal values for all sub-families. The defaults +is \code{mstop = 100} for the initial number of boosting iterations and \code{nu + = 0.1} for the step-length. Additionally, the user can specify if status +information should be printed by setting \code{trace = TRUE} in +\code{boost_control}. Note that the argument \code{nu} can also refer to one of +the GAMLSS distribution parameters in some families (and is also used in +\pkg{gamlss} as the name of a distribution parameter). In \code{boost\_control}, +however, \code{nu} always represents the step-length $\nu_{\text{sl}}$. + +\subsection{Distributions}\label{sec:distributions} + +Some GAMLSS distributions are directly implemented in the \proglang{R} add-on +package \pkg{gamboostLSS} and can be specified via the \code{families} argument +in the fitting function \code{gamboostLSS()} and \code{glmboostLSS()}. An +overview of the implemented families is given in +Table~\ref{tab:gamboostlss_families}. The parametrization of the negative +binomial distribution, the log-logistic distribution and the $t$ distribution in +boosted GAMLSS models is given in \citet{mayretal}. The derivation of boosted +beta regression, another special case of GAMLSS, can be found in +\cite{schmid2013beta}. In our case study we will use the default +\code{GaussianLSS()} family to model childhood malnutrition in India. The +resulting object of the family looks as follows: + +<>= +str(GaussianLSS(), 1) +@ + +We obtain a list of class \code{"families"} with two sub-families, one for the +$\mu$ parameter of the distribution and another one for the $\sigma$ parameter. +Each of the sub-families is of type \code{"boost_family"} from package +\pkg{mboost}. Attributes specify the name and the quantile function +(\code{"qfun"}) of the distribution. + +In addition to the families implemented in the \pkg{gamboostLSS} package, there +are many more possible GAMLSS distributions available in the \pkg{gamlss.dist} +package \citep{pkg:gamlss.dist:4.3-0}. In order to make our boosting approach +available for these distributions as well, we provide an interface to +automatically convert available distributions of \pkg{gamlss.dist} to objects of +class \code{"families"} to be usable in the boosting framework via the function +\code{as.families()}. As input, a character string naming the +\code{"gamlss.family"}, or the function itself is required. The function +\code{as.families()} then automatically constructs a \code{"families"} object +for the \pkg{gamboostLSS} package. To use for example the gamma family as +parametrized in \pkg{gamlss.dist}, one can simply use \code{as.families("GA")} +and plug this into the fitting algorithms of \pkg{gamboostLSS}: + +\begin{Sinput} +R> gamboostLSS(y ~ x, families = as.families("GA")) +\end{Sinput} + +\begin{landscape} + \begin{table}[h!] + \centering + \begin{tabular}{lllllllp{0.5\textwidth}} + \toprule + & & Name & Response & $\mu$ & $\sigma$ & $\nu$ & Note\\ + \cmidrule{2-8} + + \multicolumn{8}{l}{\textbf{Continuous response}} \\ + & Gaussian & \code{GaussianLSS()} & cont. & id & log & & \\ + & Student's $t$ & \code{StudentTLSS()} & cont. & id & log & log & The 3rd parameter is denoted by \code{df} (degrees of freedom). \\ + \cmidrule{2-8} + + \multicolumn{8}{l}{\textbf{Continuous non-negative response}} \\ + & Gamma & \code{GammaLSS()} & cont. $>0$ & log & log & & \\ + \cmidrule{2-8} + + \multicolumn{8}{l}{\textbf{Fractions and bounded continuous response}} \\ + & Beta & \code{BetaLSS()} & $\in (0,1)$ & logit & log & & The 2nd parameter is denoted by \code{phi}.\\ + \cmidrule{2-8} + + \multicolumn{8}{l}{\textbf{Models for count data}} \\ + & Negative binomial & \code{NBinomialLSS()} & count & log & log & & For over-dispersed count data.\\ + & Zero inflated Poisson & \code{ZIPoLSS()} & count & log & logit & & For zero-inflated count data; the 2nd parameter is the probability parameter of the zero mixture component.\\ + & Zero inflated neg. binomial & \code{ZINBLSS()} & count & log & log & logit & For over-dispersed and zero-inflated count data; the 3rd parameter is the probability parameter of the zero mixture component.\\ + \cmidrule{2-8} + + \multicolumn{8}{l}{\textbf{Survival models} \citep[accelerated failure time models; see, e.g.,][]{klein03}} \\ + & Log-normal & \code{LogNormalLSS()} & cont. $>0$ & id & log && + \multirow{3}{8cm}{All three families assume that the data are subject to + right-censoring. Therefore the response must be a \code{Surv()} object.}\\ + & Weibull & \code{WeibullLSS()} & cont. $>0$ & id & log && \\ + & Log-logistic & \code{LogLogLSS()} & cont. $>0$ & id & log &&\\ + \bottomrule + \end{tabular} + \caption{Overview of \code{"families"} that are implemented in + \pkg{gamboostLSS}. For every distribution parameter the corresponding + link-function is displayed (id = identity link).} \label{tab:gamboostlss_families} +\end{table} +\end{landscape} + +With this interface, it is possible to apply boosting for any distribution +implemented in \pkg{gamlss.dist} and for all new distributions that will be +added in the future. Note that one can also fit censored or truncated +distributions by using \code{gen.cens()} \citep[from package +\pkg{gamlss.cens}; see][]{pkg:gamlss.cens:4.2-7} or \code{gen.trun()} \citep[from +package \pkg{gamlss.tr}; see][]{pkg:gamlss.tr:4.2.7}, respectively. An overview of +common GAMLSS distributions is given in Appendix~\ref{sec:additional-families}. +Minor differences in the model fit when applying a pre-specified distribution +(e.g., \code{GaussianLSS()}) and the transformation of the corresponding +distribution from \pkg{gamlss.dist} (e.g., \code{as.families("NO")}) can be +explained by possibly different offset values. + +\subsection{Base-learners}\label{sec:base-learners} + +For the base-learners, which carry out the fitting of the gradient vectors using +the covariates, the \pkg{gamboostLSS} package completely depends on the +infrastructure of \pkg{mboost}. Hence, every base-learner which is available in +\pkg{mboost} can also be applied to fit GAMLSS distributions via +\pkg{gamboostLSS}. The choice of base-learners is crucial for the application of +the \emph{gamboostLSS} algorithm, as they define the type(s) of effect(s) that +covariates will have on the predictors of the GAMLSS distribution parameters. +See \citet{Hofner:mboost:2014} for details and application notes on the +base-learners. + +The available base-learners include simple linear models for \textit{linear} +effects and penalized regression splines \citep[\textit{P}-splines, +][]{Eilers1996} for \textit{non-linear} effects. \textit{Spatial} or other +\textit{bivariate} effects can be incorporated by setting up a bivariate tensor +product extension of P-splines for two continuous variables \citep{kneibetal}. +Another way to include spatial effects is the adaptation of Markov random fields +for modeling a neighborhood structure \citep{sobotka12} or radial basis +functions \citep{Hofner:Dissertation:2011}. \textit{Constrained} effects such as +monotonic or cyclic effects can be specified as well +\citep{Hofner:monotonic:2011,Hofner:constrained:2014}. \textit{Random} effects +can be taken into account by using ridge-penalized base-learners for fitting +categorical grouping variables such as random intercepts or slopes \citep[see +supplementary material of][]{kneibetal}. + +\paragraph{Case study (\textit{cont'd}): Childhood malnutrition in India} +\hfill\newline First, we are going to set up and fit our model. Usually, one +could use \code{bmrf(mcdist, bnd = india.bnd)} to specify the spatial +base-learner using a Markov random field. However, as it is relatively +time-consuming to compute the neighborhood matrix from the boundary file and as +we need it several times, we pre-compute it once. Note that \pkg{R2BayesX} +\citep{pkg:R2BayesX:0.3-1} needs to be loaded in order to use this function: + +<>= +library("R2BayesX") +neighborhood <- bnd2gra(india.bnd) +@ + +The other effects can be directly specified without further care. We use smooth +effects for the age (\code{mage}) and BMI (\code{mbmi}) of the mother and smooth +effects for the age (\code{cage}) and BMI (\code{cbmi}) of the child. Finally, +we specify the spatial effect for the district in India where mother and child +live (\code{mcdist}). + +We set the options + +<>= +ctrl <- boost_control(trace = TRUE, mstop = c(mu = 1269, sigma = 84)) +@ + +and fit the boosting model + +<>= +mod_nonstab <- gamboostLSS(stunting ~ bbs(mage) + bbs(mbmi) + + bbs(cage) + bbs(cbmi) + + bmrf(mcdist, bnd = neighborhood), + data = india, + families = GaussianLSS(), + control = ctrl) +@ + +<>= +## abbreviate output to use less manuscript space +out <- capture.output( +<> +) +out <- out[c(1:5, 33:35)] +out[3:5] <- c("", "(...)", "") +@ +<>= +writeLines(out) +@ + +We specified the initial number of boosting iterations as \code{mstop = c(mu = + \Sexpr{mstop(mod_nonstab)["mu"]}, sigma = + \Sexpr{mstop(mod_nonstab)["sigma"]})}, i.e., we used +$\Sexpr{mstop(mod_nonstab)["mu"]}$ boosting iterations for the $\mu$ parameter +and only $\Sexpr{mstop(mod_nonstab)["sigma"]}$ for the $\sigma$ parameter. This +means that we cycle between the $\mu$ and $\sigma$ parameter until we have +computed $\Sexpr{mstop(mod_nonstab)["sigma"]}$ update steps in both sub-models. +Subsequently, we update only the $\mu$ model and leave the $\sigma$ model +unchanged. The selection of these tuning parameters will be +discussed in the next section. \hfill $\vardiamond$\\ + +Instead of optimizing the gradients per GAMLSS parameter in each boosting +iteration, one can potentially stabilize the estimation further by standardizing +the gradients in each step. Details and an explanation are given in +Appendix~\ref{sec:stab-ngrad}. + +\paragraph{Case study (\textit{cont'd}): Childhood malnutrition in India} +\label{page:stabilization} + +We now refit the model with the built-in median absolute deviation (MAD) +stabilization by setting \code{stabilization = "MAD"} in the definition of the +families: + +<>= +mod <- gamboostLSS(stunting ~ bbs(mage) + bbs(mbmi) + + bbs(cage) + bbs(cbmi) + + bmrf(mcdist, bnd = neighborhood), + data = india, + families = GaussianLSS(stabilization = "MAD"), + control = ctrl) +@ + +<>= +## abbreviate output to use less manuscript space +out <- capture.output( +<> +) +out <- out[c(1:5, 33:35)] +out[3:5] <- c("", "(...)", "") +@ +<>= +writeLines(out) +@ + +One can clearly see that the stabilization changes the model and reduces the +intermediate and final risks. \hfill $\vardiamond$ + +\subsection[Model complexity and diagnostic checks]{Model complexity and + diagnostic checks}\label{sec:model_complexity} + +Measuring the complexity of a GAMLSS is a crucial issue for model building and +parameter tuning, especially with regard to the determination of optimal +stopping iterations for gradient boosting (see next section). In the GAMLSS +framework, valid measures of the complexity of a fitted model are even more +important than in classical regression, since variable selection and model +choice have to be carried out in \emph{several} additive predictors within the +same model. + +In the original work by \cite{rs}, the authors suggested to evaluate AIC-type +information criteria to measure the complexity of a GAMLSS. Regarding the +complexity of a classical boosting fit with one predictor, AIC-type measures are +available for a limited number of distributions +\citep[see][]{BuhlmannHothorn07}. Still, there is no commonly accepted approach +to measure the degrees of freedom of a boosting fit, even in the classical +framework with only one additive predictor. This is mostly due to the +algorithmic nature of gradient boosting, which results in regularized model fits +for which complexity is difficult to evaluate \cite{Hast:comment:2007}. As a +consequence, the problem of deriving valid (and easy-to-compute) complexity +measures for boosting remains largely unsolved +\citep[Sec.~4]{Kneib:comment:2014}. + +In view of these considerations, and because it is not possible to use the +original information criteria specified for GAMLSS in the \emph{gamboostLSS} +framework, \cite{mayretal} suggested to use cross-validated estimates of the +empirical risk (i.e., of the predicted log-likelihood) to measure the complexity +of \emph{gamboostLSS} fits. Although this strategy is computationally expensive +and might be affected by the properties of the used cross-validation technique, +it is universally applicable to all \pkg{gamboostLSS} families and does not rely +on possibly biased estimators of the effective degrees of freedom. We therefore +decided to implement various resampling procedures in the function +\code{cvrisk()} to estimate model complexity of a \code{gamboostLSS} fit via +cross-validated empirical risks (see next section). + +A related problem is to derive valid diagnostic checks to compare different +families or link functions. For the original GAMLSS method, \cite{rs} proposed +to base diagnostic checks on normalized quantile residuals. In the boosting +framework, however, residual checks are generally difficult to derive because +boosting algorithms result in regularized fits that reflect the trade-off +between bias and variance of the effect estimators. As a consequence, residuals +obtained from boosting fits usually contain a part of the remaining structure of +the predictor effects, rendering an unbiased comparison of competing model +families via residual checks a highly difficult issue. While it is of course +possible to compute residuals from \code{gamboostLSS} models, valid comparisons +of competing models are more conveniently obtained by considering estimates of +the predictive risk. + +\paragraph{Case study (\textit{cont'd}): Childhood malnutrition in India} +To extract the empirical risk in the last boosting iteration (i.e., in the last +step) of the model which was fitted with stabilization (see +Page~\pageref{page:stabilization}) one can use +<<>>= +emp_risk <- risk(mod, merge = TRUE) +tail(emp_risk, n = 1) +@ +and compare it to the risk of the non-stabilized model +<<>>= +emp_risk_nonstab <- risk(mod_nonstab, merge = TRUE) +tail(emp_risk_nonstab, n = 1) +@ +In this case, the stabilized model has a lower (in-bag) risk than the +non-stabilized model. Note that usually both models should be tuned before the +empirical risk is compared. Here it merely shows that the risk of the stabilized +model decreases quicker. + +To compare the risk on new data sets, i.e., the predictive risk, one could +combine all data in one data set and use weights that equal zero for the new +data. Let us fit the model only on a random subset of 2000 observations. To +extract the risk for observations with zero weights, we need to additionally set +\code{risk = "oobag"}. +<<>>= +weights <- sample(c(rep(1, 2000), rep(0, 2000))) +mod_subset <- update(mod, weights = weights, risk = "oobag") +@ +Note that we could also specify the model anew via +<>= +mod_subset <- gamboostLSS(stunting ~ bbs(mage) + bbs(mbmi) + + bbs(cage) + bbs(cbmi) + + bmrf(mcdist, bnd = neighborhood), + data = india, + weights = weights, + families = GaussianLSS(), + control = boost_control(mstop = c(mu = 1269, sigma = 84), + risk = "oobag")) +@ +To refit the non-stabilized model we use +<<>>= +mod_nonstab_subset <- update(mod_nonstab, + weights = weights, risk = "oobag") +@ +Now we extract the predictive risks which are now computed on the 2000 ``new'' +observations: +<<>>= +tail(risk(mod_subset, merge = TRUE), 1) +tail(risk(mod_nonstab_subset, merge = TRUE), 1) +@ +Again, the stabilized model has a lower predictive risk.\hfill $\vardiamond$ + +\subsection{Model tuning: Early stopping to prevent overfitting}\label{sec:model_tuning} + +As for other component-wise boosting algorithms, the most important tuning +parameter of the \emph{gamboostLSS} algorithm is the stopping iteration +$\bm{m}_{\text{stop}}$ (here a $K$-dimensional vector). In some low-dimensional +settings it might be convenient to let the algorithm run until convergence +(i.e., use a large number of iterations for each of the $K$ distribution +parameters). In these cases, as they are optimizing the same likelihood, +boosting should converge to the same model as \pkg{gamlss} -- at least when the +same penalties are used for smooth effects. + +However, in most settings, where the application of boosting is favorable, it is +crucial that the algorithm is not run until convergence but some sort of early +stopping is applied \citep{Mayr:mstop:2012}. Early stopping results in shrunken +effect estimates, which has the advantage that predictions become more stable +since the variance of the estimates is reduced. Another advantage of early +stopping is that \emph{gamboostLSS} has an intrinsic mechanism for data-driven +variable selection, since only the best-fitting base-learner is updated in each +boosting iteration. Hence, the stopping iteration $m_{\text{stop},k}$ does not +only control the amount of shrinkage applied to the effect estimates but also +the complexity of the models for the distribution parameter~$\theta_k$. + +To find the optimal complexity, the resulting model should be evaluated +regarding the predictive risk on a large grid of stopping values by +cross-validation or resampling methods, using the function \code{cvrisk()}. In +case of \emph{gamboostLSS}, the predictive risk is computed as the negative log +likelihood of the out-of-bag sample. The search for the optimal +$\bm{m}_{\text{stop}}$ based on resampling is far more complex than for standard +boosting algorithms. Different stopping iterations can be chosen for the +parameters, thus allowing for different levels of complexity in each sub-model +(\emph{multi-dimensional} early stopping). In the package \pkg{gamboostLSS} a +multi-dimensional grid can be easily created utilizing the function +\code{make.grid()}. + +In most of the cases the $\mu$ parameter is of greatest interest in a GAMLSS +model and thus more care should be taken to accurately model this parameter. +\citet{pkg:gamlss:4.3-0}, the inventors of GAMLSS, stated on the help page for +the function \code{gamlss()}: ``Respect the parameter hierarchy when you are +fitting a model. For example a good model for $\mu$ should be fitted before a +model for $\sigma$ is fitted.''. Consequently, we provide an option +\code{dense_mu_grid} in the \code{make.grid()} function that allows to have a +finer grid for (a subset of) the $\mu$ parameter. Thus, we can better tune the +complexity of the model for $\mu$ which helps to avoid over- or underfitting of +the mean without relying to much on the grid. Details and explanations are given +in the following paragraphs. + +\paragraph{Case study (\textit{cont'd}): Childhood malnutrition in India} + +We first set up a grid for \code{mstop} values starting at $20$ and going in +$10$ equidistant steps on a logarithmic scale to $500$: + +<>= +grid <- make.grid(max = c(mu = 500, sigma = 500), min = 20, + length.out = 10, dense_mu_grid = FALSE) +@ + +Additionally, we can use the \code{dense\_mu\_grid} option to create a dense +grid for $\mu$. This means that we compute the risk for all iterations +$m_{\text{stop},\mu}$, if $m_{\text{stop},\mu} \geq m_{\text{stop},\sigma}$ and +do not use the values on the sparse grid only: + +<>= +densegrid <- make.grid(max = c(mu = 500, sigma = 500), min = 20, + length.out = 10, dense_mu_grid = TRUE) +plot(densegrid, pch = 20, cex = 0.2, + xlab = "Number of boosting iterations (mu)", + ylab = "Number of boosting iterations (sigma)") +abline(0,1) +points(grid, pch = 20, col = "red") +@ + +A comparison and an illustration of the sparse and the dense grids can be found +in Figure~\ref{fig:grid} (left). Red dots refer to all possible combinations of +$m_{\text{stop},\mu}$ and $m_{\text{stop},\sigma}$ on the sparse grid, whereas +the black lines refer to the additional combinations when a dense grid is used. +For a given $m_{\text{stop},\sigma}$, all iterations $m_{\text{stop},\mu} \geq +m_{\text{stop},\sigma}$ (i.e., below the bisecting line) can be computed without +additional computing time. For example, if we fit a model with \code{mstop = + c(mu = 30, sigma = 15)}, all $m_{\text{stop}}$ combinations on the red path +(Figure~\ref{fig:grid}, right) are computed. Until the point where +$m_{\text{stop}, \mu} = m_{\text{stop}, \sigma}$, we move along the bisecting +line. Then we stop increasing $m_{\text{stop}, \sigma}$ and increase +$m_{\text{stop}, \mu}$ only, i.e., we start moving along a horizontal line. +Thus, all iterations on this horizontal line are computed anyway. Note that it +is quite expensive to move from the computed model to one with \code{mstop = + c(mu = 30, sigma = 16)}. One cannot simply increase $m_{\text{stop},\sigma}$ +by 1 but needs to go along the black dotted path. As the dense grid does not +increase the run time (or only marginally), we recommend to always use this +option, which is also the default. + +\setkeys{Gin}{width = 0.45\textwidth} +\begin{figure}[h!] + \centering +<>= +par(mar = c(4, 4, 0.1, 0.1)) +<> +@ +\hfill +<>= +par(mar = c(4, 4, 0.1, 0.1)) +plot(1:30, type = "n", xlab = "Number of boosting iterations (mu)", + ylab = "Number of boosting iterations (sigma)") +j <- 0 +for (i in 0:29) { + lines(c(i, i + 1), c(j, j), col = "red", lwd = 2) + points(i, j, col = "red", pch = 20, cex = 1.3) + if (j < 15) { + lines(c(i + 1, i + 1), c(j, j + 1), col = "red", lwd = 2) + points(i + 1, j, col = "red", pch = 20, cex = 1.3) + j <- j + 1 + } +} +points(30, 15, col = "red", pch = 20, cex = 1.3) +lines(c(16, 16), c(15, 16), lwd = 2) +lines(c(16, 30), c(16, 16), lwd = 2) +points(c(16:30), rep(16, 15), col = "black", pch = 20, cex = 1.3) +abline(0,1) +lines(c(-1, 15), c(15, 15), lty = "dotted") +lines(c(15, 15), c(-1, 15), lty = "dotted") +legend("topleft", legend = c("mstop = c(mu = 30, sigma = 15)", "mstop = c(mu = 30, sigma = 16)"), + lty = "solid", pch = 20, cex = 1.1, col = c("red", "black"), bty = "n") +@ +\caption{\emph{Left:} Comparison between sparse grid (red) and dense $\mu$ grid + (black horizontal lines in addition to the sparse red grid). \emph{Right:} + Example of the path of the iteration counts.}\label{fig:grid} +\end{figure} + +The \code{dense\_mu\_grid} option also works for asymmetric grids (e.g., +\code{make.grid(max = c(mu = 100, sigma = 200))}) and for more than two +parameters (e.g., \code{make.grid(max = c(mu = 100, sigma = 200, nu = 20))}). +For an example in the latter case see the help file of \code{make.grid()}. + +Now we use the dense grid for cross-validation (or subsampling to be more +precise). The computation of the cross-validated risk using \code{cvrisk()} +takes more than one hour on a 64-bit Ubuntu machine using 2 cores. + +<>= +cores <- ifelse(grepl("linux|apple", R.Version()$platform), 2, 1) +if (!file.exists("cvrisk/cvr_india.Rda")) { + set.seed(1907) + folds <- cv(model.weights(mod), type = "subsampling") + densegrid <- make.grid(max = c(mu = 5000, sigma = 500), min = 20, + length.out = 10, dense_mu_grid = TRUE) + cvr <- cvrisk(mod, grid = densegrid, folds = folds, mc.cores = cores) + save("cvr", file = "cvrisk/cvr_india.Rda", compress = "xz") +} +@ + +By using more computing cores or a larger computer cluster the speed can be +easily increased. The usage of \code{cvrisk()} is practically identical to that +of \code{cvrisk()} from package \pkg{mboost}. See \citet{Hofner:mboost:2014} for +details on parallelization and grid computing. As Windows does not support +addressing multiple cores from \proglang{R}, on Windows we use only one core +whereas on Unix-based systems two cores are used. We then load the pre-computed +results of the cross-validated risk: + +<>= +load("cvrisk/cvr_india.Rda") +@ +\vspace{-2em} \hfill $\vardiamond$ + +\subsection{Methods to extract and display results}\label{sec:methods} + +In order to work with the results, methods to extract information both from +boosting models and the corresponding cross-validation results have been +implemented. Fitted \pkg{gamboostLSS} models (i.e., objects of type +\code{"mboostLSS"}) are lists of \code{"mboost"} objects. The most important +distinction from the methods implemented in \pkg{mboost} is the widespread +occurrence of the additional argument \code{parameter}, which enables the user +to apply the function on all parameters of a fitted GAMLSS model or only on one +(or more) specific parameters. + +Most importantly, one can extract the coefficients of a fitted model +(\code{coef()}) or plot the effects (\code{plot()}). Different versions of both +functions are available for linear GAMLSS models (i.e., models of class +\code{"glmboostLSS"}) and for non-linear GAMLSS models (e.g., models with +P-splines). Additionally, the user can extract the risk for all iterations using +the function \code{risk()}. Selected base-learners can be extracted using +\code{selected()}. Fitted values and predictions can be obtained by +\code{fitted()} and \code{predict()}. For details and examples, see the +corresponding help files and \citet{Hofner:mboost:2014}. Furthermore, a special +function for marginal prediction intervals is available (\code{predint()}) +together with a dedicated plot function (\code{plot.predint()}). + +For cross-validation results (objects of class \code{"cvriskLSS"}), there exists +a function to extract the estimated optimal number of boosting iterations +(\code{mstop()}). The results can also be plotted using a special \code{plot()} +function. Hence, convergence and overfitting behavior can be visually inspected. + +In order to increase or reduce the number of boosting steps to the appropriate +number (as e.g., obtained by cross-validation techniques) one can use the +function \code{mstop}. If we want to reduce our model, for example, to 10 +boosting steps for the \code{mu} parameter and 20 steps for the \code{sigma} +parameter we can use +\begin{Sinput} +R> mstop(mod) <- c(10, 20) +\end{Sinput} +This directly alters the object \code{mod}. Instead of specifying a vector with +separate values for each sub-family one can also use a single value, which then +is used for each sub-family (see Section~\ref{sec:model-fitting}). + +\paragraph{Case study (\textit{cont'd}): Childhood malnutrition in India} + +We first inspect the cross-validation results (see Figure~\ref{fig:cvr}): + +<>= +plot(cvr) +@ + +\setkeys{Gin}{width = 0.5\textwidth} +\begin{figure}[h!] + \centering +<>= +<> +@ +\includegraphics{fig-crossvalidation.pdf} +\caption{Cross-validated risk. Darker color represents higher predictive risks. + The optimal combination of stopping iterations is indicated by dashed red + lines.}\label{fig:cvr} +\end{figure} + +If the optimal stopping iteration is close to the boundary of the grid one +should re-run the cross-validation procedure with different \code{max} values +for the grid and/or more grid points. This is not the case here +(Figure~\ref{fig:cvr}). To extract the optimal stopping iteration one can now +use +\begin{Sinput} +R> mstop(cvr) + mu sigma + 1269 84 +\end{Sinput} + +<>= +## for the purpose of the tutorial we started already with the optimal number of +## boosting steps. Check if this is really true: +if (!isTRUE(all.equal(mstop(mod), mstop(cvr)))) + warning("Check mstop(mod) throughout the manuscript.") +@ + +To use the optimal model, i.e., the model with the iteration number from the +cross-validation, we set the model to these values: + +<>= +mstop(mod) <- mstop(cvr) +@ + +In the next step, the \code{plot()} function can be used to plot the partial +effects. A partial effect is the effect of a certain predictor only, i.e., all +other model components are ignored for the plot. Thus, the reference level of +the plot is arbitrary and even the actual size of the effect might not be +interpretable; only changes and hence the functional form are meaningful. If no +further arguments are specified, all \emph{selected} base-learners are plotted: + +<>= +par(mfrow = c(2, 5)) +plot(mod) +@ + +Special base-learners can be plotted using the argument \code{which} (to specify +the base-learner) and the argument \code{parameter} (to specify the parameter, +e.g., \code{"mu"}). Partial matching is used for \code{which}, i.e., one can +specify a sub-string of the base-learners' names. Consequently, all matching +base-learners are selected. Alternatively, one can specify an integer which +indicates the number of the effect in the model formula. Thus + +<>= +par(mfrow = c(2, 4), mar = c(5.1, 4.5, 4.1, 1.1)) +plot(mod, which = "bbs", type = "l") +@ + +plots \emph{all} P-spline base-learners irrespective if they where selected or +not. The partial effects in Figure~\ref{fig:smooth_effects} can be interpreted +as follows: The age of the mother seems to have a minor impact on stunting for +both the mean effect and the effect on the standard deviation. With increasing +BMI of the mother, the stunting score increases, i.e., the child is better +nourished. At the same time the variability increases until a BMI of roughly 25 +and then decreases again. The age of the child has a negative effect until the +age of approximately 1.5 years (18 months). The variability increases over the +complete range of age. The BMI of the child has a negative effect on stunting, +with lowest variability for an BMI of approximately 16. While all other effects +can be interpreted quite easily, this effect is more difficult to interpret. +Usually, one would expect that a child that suffers from malnutrition also has a +small BMI. However, the height of the child enters the calculation of the BMI in +the denominator, which means that a lower stunting score (i.e., small height) +should lead on average to higher BMI values if the weight of a child is fixed. + +\setkeys{Gin}{width = 0.99\textwidth} +\begin{figure}[h!] + \centering +<>= +par(cex.axis = 1.3, cex.lab = 1.3, mar = c(4, 5, 2, 1)) +<> +@ +\caption{Smooth partial effects of the estimated model with the rescaled + outcome. The effects for \code{sigma} are estimated and plotted on the + log-scale (see Equation~\ref{Gaussian:sigma}), i.e., we plot the predictor + against $\log(\hat{\sigma})$.}\label{fig:smooth_effects} +\end{figure} + +If we want to plot the effects of all P-spline base-learners for the $\mu$ +parameter, we can use + +<>= +plot(mod, which = "bbs", parameter = "mu") +@ + +Instead of specifying (sub-)strings for the two arguments one could use integer +values in both cases. For example, + +<>= +plot(mod, which = 1:4, parameter = 1) +@ + +results in the same plots. + +Prediction intervals for new observations can be easily constructed by computing +the quantiles of the conditional GAMLSS distribution. This is done by plugging +the estimates of the distribution parameters (e.g., $\hat{\mu}(x_{\text{new}}), +\hat{\sigma}(x_{\text{new}})$ for a new observation $x_{\text{new}}$) into the +quantile function \citep{mayretal}. + +Marginal prediction intervals, which reflect the effect of a single predictor on +the quantiles (keeping all other variables fixed), can be used to illustrate the +combined effect of this variable on various distribution parameters and hence +the shape of the distribution. For illustration purposes we plot the influence +of the children's BMI via \code{predint()}. To obtain marginal prediction +intervals, the function uses a grid for the variable of interest, while fixing +all others at their mean (continuous variables) or modus (categorical +variables). + +<>= +plot(predint(mod, pi = c(0.8, 0.9), which = "cbmi"), + lty = 1:3, lwd = 3, xlab = "BMI (child)", + ylab = "Stunting score") +@ + +To additionally highlight observations from Greater Mumbai, we use +<>= +points(stunting ~ cbmi, data = india, pch = 20, + col = rgb(1, 0, 0, 0.5), subset = mcdist == "381") +@ + + +\setkeys{Gin}{width = 0.45\textwidth} +\begin{figure}[h!] + \centering +<>= +<> +<> +@ +\caption{80\% (dashed) and 90\% (dotted) marginal prediction intervals for the + BMI of the children in the district of Greater Mumbai (which is the region + with the most observations). For all other variables we used average values + (i.e., a child with average age, and a mother with average age and BMI). The + solid line corresponds to the median prediction (which equals the mean for + symmetric distributions such as the Gaussian distribution). Observations from + Greater Mumbai are highlighted in red.}\label{fig:pred_interval} +\end{figure} + +The resulting marginal prediction intervals are displayed in +Figure~\ref{fig:pred_interval}. For the interpretation and evaluation of +prediction intervals, see \cite{MayrPI}. + +For the spatial \code{bmrf()} base-learner we need some extra work to plot the +effect(s). We need to obtain the (partial) predicted values per region using +either \code{fitted()} or \code{predict()}: +<>= +fitted_mu <- fitted(mod, parameter = "mu", which = "mcdist", + type = "response") +fitted_sigma <- fitted(mod, parameter = "sigma", which = "mcdist", + type = "response") +@ + +In case of \code{bmrf()} base-learners we then need to aggregate the data for +multiple observations in one region before we can plot the data. Here, one could +also plot the coefficients, which constitute the effect estimates per region. +Note that this interpretation is not possible for for other bivariate or spatial +base-learners such as \code{bspatial()} or \code{brad()}: + +<>= +fitted_mu <- tapply(fitted_mu, india$mcdist, FUN = mean) +fitted_sigma <- tapply(fitted_sigma, india$mcdist, FUN = mean) +plotdata <- data.frame(region = names(fitted_mu), + mu = fitted_mu, sigma = fitted_sigma) +par(mfrow = c(1, 2), mar = c(1, 0, 2, 0)) +plotmap(india.bnd, plotdata[, c(1, 2)], range = c(-0.62, 0.82), + main = "Mean", pos = "bottomright", mar.min = NULL) +plotmap(india.bnd, plotdata[, c(1, 3)], range = c(0.75, 1.1), + main = "Standard deviation", pos = "bottomright", mar.min = NULL) +@ + +\setkeys{Gin}{width = 0.9\textwidth} +\begin{figure}[h!] + \centering +<>= +<> +<> +@ +\caption{Spatial partial effects of the estimated model. Dashed regions + represent regions without data. Note that effect estimates for these regions + exist and could be extracted. \label{fig:spatial_effects}} +\end{figure} + +Figure~\ref{fig:spatial_effects} (left) shows a clear spatial pattern of +stunting. While children in the southern regions like Tamil Nadu and Kerala as +well as in the north-eastern regions around Assam and Arunachal Pradesh seem to +have a smaller risk for stunted growth, the central regions in the north of +India, especially Bihar, Uttar Pradesh and Rajasthan seem to be the most +problematic in terms of stunting due to malnutrition. Since we have also modeled +the scale of the distribution, we can gain much richer information concerning +the regional distribution of stunting: the regions in the south which seem to be +less affected by stunting do also have a lower partial effect with respect to +the expected standard deviation (Figure~\ref{fig:spatial_effects}, right), i.e., +a reduced standard deviation compared to the average region. This means that not +only the expected stunting score is smaller on average, but that the +distribution in this region is also narrower. This leads to a smaller size of +prediction intervals for children living in that area. In contrast, the regions +around Bihar in the central north, where India shares border with Nepal, do not +only seem to have larger problems with stunted growth but have a positive +partial effect with respect the scale parameter of the conditional distribution +as well. This leads to larger prediction intervals, which could imply a greater +risk for very small values of the stunting score for an individual child in that +region. On the other hand, the larger size of the interval also offers the +chance for higher values and could reflect higher differences between different +parts of the population. \hfill $\vardiamond$ + +\section{Summary} +\label{sec:summary} + +The GAMLSS model class has developed into one of the most flexible tools in +statistical modeling, as it can tackle nearly any regression setting of practical +relevance. Boosting algorithms, on the other hand, are one of the most flexible +estimation and prediction tools in the toolbox of a modern statistician +\citep{mayr_boosting_part1}. + +In this paper, we have presented the \proglang{R} package \pkg{gamboostLSS}, +which provides the first implementation of a boosting algorithm for GAMLSS. +Hence, beeing a combination of boosting and GAMLSS, \pkg{gamboostLSS} combines a +powerful machine learning tool with the world of statistical modeling +\citep{Breiman2001:TwoCultures}, offering the advantage of intrinsic model +choice and variable selection in potentially high-dimensional data situations. +The package also combines the advantages of both \pkg{mboost} (with a +well-established, well-tested modular structure in the back-end) and +\pkg{gamlss} (which implements a large amount of families that are available via +conversion with the \code{as.families()} function). + +While the implementation in the \proglang{R} package \pkg{gamlss} (provided by +the inventors of GAMLSS) must be seen as the gold standard for fitting GAMLSS, +the \pkg{gamboostLSS} package offers a flexible alternative, which can be +advantageous, amongst others, in following data settings: (i) models with a +large number of coefficients, where classical estimation approaches become +unfeasible; (ii) data situations where variable selection is of great interest; +(iii) models where a greater flexibility regarding the effect types is needed, +e.g., when spatial, smooth, random, or constrained effects should be included +and selected at the same time. + +\section*{Acknowledgments} + +We thank the editors and the two anonymous referees for their valuable comments +that helped to greatly improve the manuscript. We gratefully acknowledge the +help of Nora Fenske and Thomas Kneib, who provided code to prepare the data and +also gave valuable input on the package \pkg{gamboostLSS}. We thank Mikis +Stasinopoulos for his support in implementing \code{as.families} and Thorsten +Hothorn for his great work on \pkg{mboost}. The work of Matthias Schmid and +Andreas Mayr was supported by the Deutsche Forschungsgemeinschaft (DFG), grant +SCHM-2966/1-1, and the Interdisciplinary Center for Clinical Research (IZKF) of +the Friedrich-Alexander University Erlangen-N{\"u}rnberg, project J49. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Bibligraphy +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\bibliography{bib} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Appendix +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\clearpage +\appendix + +\section[The gamboostLSS algorithm]{The \emph{gamboostLSS} algorithm}\label{algorithm} + +Let $\bm{\theta} = (\theta_k)_{k = 1,\ldots,K}$ be the vector of distribution +parameters of a GAMLSS, where $\theta_k = g_k^{-1}(\eta_{\theta_k})$ with +parameter-specific link functions $g_k$ and additive predictor +$\eta_{\theta_k}$. The \emph{gamboostLSS} algorithm \citep{mayretal} circles +between the different distribution parameters $\theta_k,\, k=1, \ldots, K,$ and +fits all base-learners $h(\cdot)$ separately to the negative partial derivatives +of the loss function, i.e., in the GAMLSS context to the partial derivatives of +the log-likelihood with respect to the additive predictors $\eta_{\theta_k}$, +i.e., $\frac{\partial}{\partial \eta_{\theta_k}} l(\bm{y}, \bm{\theta})$. + + +\begin{enumerate} +\item[] \textbf{Initialize} + \begin{enumerate} + \item[(1)] Set the iteration counter $m := 0$. Initialize the additive + predictors $\hat{\eta}_{\theta_{k,i}}^{[m]},\, k = 1, \ldots, K,\, i=1, + \ldots, n,$ with offset values, e.g., $\hat{\eta}_{\theta_{k,i}}^{[0]} \equiv + \underset{c}{\operatorname{argmax}} \sum_{i=1}^n l(y_i, \theta_{k,i} = c)$. + \item[(2)] For each distribution parameter $\theta_k$, $k=1,\ldots, K$, + specify a set of base-learners: i.e., for parameter $\theta_k$ by $h_{k,1} + (\cdot),\ldots,h_{k,p_k} (\cdot)$, where $p_k$ is the cardinality of the set + of base-learners specified for $\theta_k$. + \end{enumerate} +\item[] \textbf{Boosting in multiple dimensions} + \begin{enumerate} + \item[(3)] \textbf{Start} a new boosting iteration: increase $m$ by 1 and set + $k := 0$. + \item[(4)] + \begin{enumerate} + \item[(a)] Increase $k$ by 1. \\ + \textbf{If} $m > m_{\rm{stop},\emph{k}}$ proceed to + step 4(e).\\ + \textbf{Else} compute the partial derivative $\frac{\partial}{\partial + \eta_{\theta_k}} l(y,\bm{\theta})$ and plug in the current estimates + $\hat{\bm{\theta}}_i^{[m-1]} = \left( \hat{\theta}_{1,i}^{[m-1]}, \ldots, + \hat{\theta}_{K,i}^{[m-1]} \right) = \left( + g^{-1}_1(\hat{\eta}_{\theta_{1,i}}^{[m-1]}), \ldots, + g^{-1}_K(\hat{\eta}_{\theta_{K,i}}^{[m-1]}) \right)$: + \begin{equation*} + u^{[m-1]}_{k,i} = \left. \frac{\partial}{\partial \eta_{\theta_k}} l(y_i, + \bm{\theta})\right|_{\bm{\theta} = \hat{\bm{\theta}}_i^{[m-1]}} ,\, i = 1,\ldots, n. + \end{equation*} + \item[(b)] \textbf{Fit} each of the base-learners contained in the set of + base-learners specified for the parameter $\theta_k$ in step (2) to the + gradient vector $\bm{u}^{[m-1]}_k$. + \item[(c)] \textbf{Select} the base-learner $j^*$ that best fits the + partial-derivative vector according to the least-squares criterion, i.e., + select the base-learner $h_{k,j^*}$ defined by + \begin{equation*} + j^* = \underset{1 \leq j \leq p_k}{\operatorname{argmin}}\sum_{i=1}^n (u_{k,i}^{[m-1]} - h_{k,j}(\cdot))^2 \ . + \end{equation*} + \item[(d)] \textbf{Update} the additive predictor $\eta_{\theta_k}$ as + follows: + \begin{equation*} + \hat{\eta}_{\theta_k}^{[m-1]} := \hat{\eta}_{\theta_k}^{[m-1]} + \nu_{\text{sl}} \cdot h_{k,j^*}(\cdot)\ , + \end{equation*} + where $\nu_{\text{sl}}$ is a small step-length ($0 < + \nu_{\text{sl}} \ll 1$). + \item[(e)] Set $\hat{\eta}_{\theta_k}^{[m]} := + \hat{\eta}_{\theta_k}^{[m-1]}$. + \item[(f)] \textbf{Iterate} steps 4(a) to 4(e) for $k=2,\ldots, K$. + \end{enumerate} + \end{enumerate} + +\item[] \textbf{Iterate} + \begin{enumerate} + \item[(5)] Iterate steps 3 and 4 until $m > m_{\text{stop},k}$ for all + $k=1,\ldots, K$. + \end{enumerate} +\end{enumerate} + +\section{Data pre-processing and stabilization of gradients}\label{sec:stab-ngrad} + +As the \emph{gamboostLSS} algorithm updates the parameter estimates in turn by +optimizing the gradients, it is important that these are comparable for +all GAMLSS parameters. Consider for example the standard Gaussian distribution +where the gradients of the log-likelihood with respect to $\eta_{\mu}$ and +$\eta_{\sigma}$ are +\begin{equation*} + \frac{\partial }{\partial \eta_{\mu}}\, l(y_i, g_{\mu}^{-1}(\eta_{\mu}), \hat{\sigma}) = + \frac{ y_i - \eta_{\mu i}}{\hat{\sigma}_i^2}, +\end{equation*} +with identity link, i.e., $g_{\mu}^{-1}(\eta_{\mu}) = \eta_{\mu}$, and +\begin{equation*} + \frac{\partial }{\partial \eta_{\sigma}}\, l(y_i, \hat{\mu}, g_{\sigma}^{-1}(\eta_{\sigma})) = + -1 + \frac{(y_i - \hat{\mu}_i)^2}{\exp(2\eta_{\sigma i})}, +\end{equation*} +with log link, i.e., $g_{\sigma}^{-1}(\eta_{\sigma}) = \exp(\eta_{\sigma})$. + +For small values of $\hat{\sigma}_i$, the gradient vector for $\mu$ will hence +inevitably become huge, while for large variances it will become very small. As +the base-learners are directly fitted to this gradient vector, this will have a +dramatic effect on convergence speed. Due to imbalances regarding the range of +$\frac{\partial }{\partial \eta_{\mu}} l(y_i, \mu, \sigma)$ and $\frac{\partial +}{\partial \eta_{\sigma}} l(y_i, \mu, \sigma)$, a potential bias might be +induced when the algorithm becomes so unstable that it does not converge to the +optimal solution (or converges very slowly). + +Consequently, one can use standardized gradients, where in \textbf{each step} +the gradient is divided by its median absolute deviation, i.e., it is divided by +\begin{equation} + \label{eq:MAD} + \text{MAD} = \text{median}_i(|u_{k,i} - \text{median}_j(u_{k,j})|), +\end{equation} +where $u_{k,i}$ is the gradient of the $k$th GAMLSS parameter in the current +boosting step $i$. If weights are specified (explicitly or implicitly as for +cross-validation) a weighted median is used. MAD-stabilization can be activated +by setting the argument \code{stabilization} to \code{"MAD"} in the fitting +families (see example on p.~\pageref{page:stabilization}). Using +\code{stabilization = "none"} explicitly switches off the stabilization. As this +is the current default, this is only needed for clarity. + +Another way to improve convergence speed might be to standardize the response +variable (and/or to use a larger step size $\nu_{\text{sl}}$). This is +especially useful if the range of the response differs strongly from the range +of the negative gradients. Both, the built in stabilization and the +standardization of the response are not always advised but need to be carefully +considered given the data at hand. If convergence speed is slow or if the +negative gradient even starts to become unstable, one should consider one or +both options to stabilize the fit. To judge the impact of these methods one can +run the \code{gamboostLSS} algorithm using different options and compare the +results via cross-validated predictive risks (see +Sections~\ref{sec:model_complexity} and~\ref{sec:model_tuning}). + +\pagebreak + +\section{Additional Families}\label{sec:additional-families} + +Table~\ref{tab:gamlss_families} gives an overview of common, additional GAMLSS +distributions and GAMLSS distributions with a different parametrization than in +\pkg{gamboostLSS}. For a comprehensive overview see the distribution tables +available at \url{www.gamlss.org} and the documentation of the +\pkg{gamlss.dist} package \citep{pkg:gamlss.dist:4.3-0}. Note that +\pkg{gamboostLSS} works only for more-parametric distributions, while in +\pkg{gamlss.dist} also a few one-parametric distributions are implemented. In +this case the \code{as.families()} function will construct a corresponding +\code{"boost\_family"} which one can use as \code{family} in \pkg{mboost} (a +corresponding advice is given in a warning message). + +\begin{landscape} + \begin{table}[h!] + \centering + \begin{tabular}{llllllllp{0.5\textwidth}} + \toprule + & & Name & Response & $\mu$ & $\sigma$ & $\nu$ & $\tau$ & Note\\ + \cmidrule{2-9} + + \multicolumn{9}{l}{\textbf{Continuous response}} \\ + & Generalized $t$ & \code{GT} & cont. & id & log & log& log & \\ + & Box-Cox $t$ & \code{BCT} & cont. & id & log & id& log& \\ + & Gumbel & \code{GU} & cont. & id & log & & & For moderately skewed data.\\ + & Reverse Gumbel & \code{RG} & cont. & id & log & & & Extreme value distribution.\\ + \cmidrule{2-9} + + \multicolumn{9}{l}{\textbf{Continuous non-negative response} (without censoring)} \\ + & Gamma & \code{GA} & cont. $>0$ & log & log & & & Also implemented as + \code{GammaLSS()}$\,^{a,b}$.\\ + & Inverse Gamma & \code{IGAMMA} & cont. $>0$ & log & log & & & \\ + & Zero-adjusted Gamma & \code{ZAGA} & cont. $\geq 0$ & log & log& logit + & & Gamma, additionally allowing for zeros.\\ + & Inverse Gaussian & \code{IG} & cont. $>0$ & log & log & & & \\ + & Log-normal & \code{LOGNO} & cont. $>0$ & log & log & & & For positively skewed data.\\ + & Box-Cox Cole and Green & \code{BCCG} & cont. $>0$ & id & log & id & & For positively and negatively skewed data.\\ + & Pareto & \code{PARETO2} & cont. $>0$ & log & log & & & \\ + & Box-Cox power exponential & \code{BCPE} & cont. $>0$ & id & log & id & log & Recommended for child growth centiles.\\ + \cmidrule{2-9} + + \multicolumn{9}{l}{\textbf{Fractions and bounded continuous response}} \\ + & Beta & \code{BE} & $\in (0,1)$ & logit & logit & & & Also implemented + as \code{BetaLSS()}$\,^{a,c}$.\\ + & Beta inflated & \code{BEINF} & $\in [0,1]$ & logit & logit & log &log + & Beta, additionally allowing for zeros and ones.\\ + \cmidrule{2-9} + + \multicolumn{9}{l}{\textbf{Models for count data}} \\ + & Beta binomial & \code{BB} & count & logit & log & & & \\ + & Negative binomial & \code{NBI} & count & log & log & & & For + over-dispersed count data; also implemented as + \code{NBinomialLSS()}$\,^{a,d}$.\\ + \bottomrule + \end{tabular} + \caption{\label{tab:gamlss_families} Overview of common, additional GAMLSS distributions that can be used via + \code{as.families()} in \pkg{gamboostLSS}. For every modeled + distribution parameter, the corresponding link-function is displayed. $^a$ + The parametrizations of the distribution functions in \pkg{gamboostLSS} + and \pkg{gamlss.dist} differ with respect to the variance. $\,^b$ + \code{GammaLSS(mu, sigma)} has $\VAR(y|x) = + \text{\code{mu}}^2/\text{\code{sigma}}$, and \code{as.families(GA)(mu, + sigma)} has $\VAR(y|x) = \text{\code{sigma}}^2\cdot \text{\code{mu}}^2$. + $\,^c$ \code{BetaLSS(mu, phi)} has $\VAR(y|x) = \text{\code{mu}}\cdot(1- + \text{\code{mu}}) \cdot(1 + \text{\code{phi}})^{-1}$, and + \code{as.families(BE)(mu, sigma)} has $\VAR(y|x) = + \text{\code{mu}}\cdot(1- \text{\code{mu}}) \cdot\text{\code{sigma}}^2$. + $\,^d$ \code{NBinomialLSS(mu, sigma)} has $\VAR(y|x) = \text{\code{mu}}+ + 1/\text{\code{sigma}}\cdot\text{\code{mu}}^2$, + and \code{as.families(NBI)(mu, sigma)} has $\VAR(y|x) = + \text{\code{mu}}+\text{\code{sigma}}\cdot\text{\code{mu}}^2$.} + \end{table} +\end{landscape} + +\end{document}