From d39f293e8d74b77eaae6acf4563240b733c88962 Mon Sep 17 00:00:00 2001 From: Joel Purra Date: Thu, 4 Dec 2014 11:37:18 +0100 Subject: [PATCH] Write about Disconnect's categories --- ...ts.non-failed.disconnect.counts.sorted.tsv | 144 ++++++++-------- report/report.lyx | 156 +++++++++++++++--- 2 files changed, 201 insertions(+), 99 deletions(-) diff --git a/report/datasets.non-failed.disconnect.counts.sorted.tsv b/report/datasets.non-failed.disconnect.counts.sorted.tsv index 5ce9727..72774af 100644 --- a/report/datasets.non-failed.disconnect.counts.sorted.tsv +++ b/report/datasets.non-failed.disconnect.counts.sorted.tsv @@ -1,73 +1,73 @@ Dataset Domains D Requests D Domains D Orgs D Cats DR/d (DR/d)/DO DD/T DO/T -alexa.2014-09-01.random.10000-http 8216 166702 704 481 5 20.289922103213243 0.04218279023537057 0.3271375464684015 0.49081632653061225 -alexa.2014-09-01.random.10000-http-www 8493 169685 704 476 5 19.979394795714118 0.041973518478391 0.3271375464684015 0.4857142857142857 -alexa.2014-09-01.random.10000-https 1135 23599 370 272 5 20.792070484581497 0.07644143560507903 0.17193308550185873 0.27755102040816326 -alexa.2014-09-01.random.10000-https-www 1224 16764 368 276 5 13.696078431372548 0.04962347257743677 0.17100371747211895 0.2816326530612245 -alexa.2014-09-01.top.10000-http 8545 274782 755 505 5 32.157050906963136 0.06367732852863987 0.3508364312267658 0.5153061224489796 -alexa.2014-09-01.top.10000-http-www 8682 276636 760 515 5 31.863165169315828 0.06187022362973947 0.35315985130111527 0.5255102040816326 -alexa.2014-09-01.top.10000-https 2507 67788 542 388 5 27.03948942959713 0.06968940574638435 0.25185873605947956 0.39591836734693875 -alexa.2014-09-01.top.10000-https-www 2957 73239 569 413 5 24.768008116334123 0.059970963962068094 0.26440520446096655 0.42142857142857143 -alexa.2014-09-01.top.dk.10000-http 2263 37832 282 205 5 16.717631462660187 0.08154942176907408 0.13104089219330856 0.20918367346938777 -alexa.2014-09-01.top.dk.10000-http-www 2310 38373 284 206 5 16.61168831168831 0.08063926364897239 0.13197026022304834 0.21020408163265306 -alexa.2014-09-01.top.dk.10000-https 339 5942 151 109 5 17.52802359882006 0.16080755595247762 0.07016728624535316 0.11122448979591837 -alexa.2014-09-01.top.dk.10000-https-www 441 6901 176 127 5 15.648526077097506 0.12321674076454729 0.08178438661710037 0.12959183673469388 -alexa.2014-09-01.top.se.10000-http 2797 52345 342 245 5 18.714694315337862 0.0763865074095423 0.15892193308550187 0.25 -alexa.2014-09-01.top.se.10000-http-www 2895 52398 351 255 5 18.099481865284975 0.07097836025601952 0.16310408921933087 0.2602040816326531 -alexa.2014-09-01.top.se.10000-https 438 7104 167 114 5 16.21917808219178 0.14227349194905073 0.07760223048327138 0.11632653061224489 -alexa.2014-09-01.top.se.10000-https-www 650 9510 199 146 5 14.63076923076923 0.10021074815595363 0.09247211895910781 0.1489795918367347 -com.2014-08-29.random.10000-http 7775 55666 404 273 5 7.159614147909968 0.026225692849487062 0.18773234200743494 0.2785714285714286 -com.2014-08-29.random.10000-http-www 7811 55955 405 277 5 7.163615414159518 0.0258614274879405 0.18819702602230484 0.2826530612244898 -com.2014-08-29.random.10000-https 50 446 47 26 5 8.92 0.34307692307692306 0.02184014869888476 0.026530612244897958 -com.2014-08-29.random.10000-https-www 55 477 49 28 5 8.672727272727272 0.3097402597402597 0.022769516728624536 0.02857142857142857 -dk.2014-07-23.random.10000-http 7180 36822 278 187 5 5.128412256267409 0.02742466447201823 0.129182156133829 0.19081632653061226 -dk.2014-07-23.random.10000-http-www 7378 35960 275 187 5 4.873949579831932 0.026063901496427445 0.12778810408921934 0.19081632653061226 -dk.2014-07-23.random.10000-https 23 150 26 15 5 6.521739130434782 0.43478260869565216 0.012081784386617101 0.015306122448979591 -dk.2014-07-23.random.10000-https-www 32 257 32 23 5 8.03125 0.3491847826086957 0.01486988847583643 0.02346938775510204 -net.2014-08-29.random.10000-http 7270 48379 412 291 5 6.654607977991747 0.0228680686528926 0.19144981412639406 0.2969387755102041 -net.2014-08-29.random.10000-http-www 7378 49471 411 293 5 6.705204662510165 0.02288465755122923 0.19098513011152415 0.29897959183673467 -net.2014-08-29.random.10000-https 26 203 21 8 4 7.8076923076923075 0.9759615384615384 0.009758364312267658 0.00816326530612245 -net.2014-08-29.random.10000-https-www 28 291 27 12 5 10.392857142857142 0.8660714285714285 0.01254646840148699 0.012244897959183673 -reach50.2014w35.se-http 43 843 92 66 5 19.6046511627907 0.2970401691331924 0.04275092936802974 0.0673469387755102 -reach50.2014w35.se-http-www 42 801 92 61 5 19.071428571428573 0.3126463700234192 0.04275092936802974 0.06224489795918367 -reach50.2014w35.se-https 18 265 41 24 5 14.722222222222221 0.6134259259259259 0.019052044609665426 0.024489795918367346 -reach50.2014w35.se-https-www 26 303 40 25 5 11.653846153846153 0.46615384615384614 0.01858736059479554 0.025510204081632654 -se.2014-07-10.random.100000-http 73605 395347 496 336 5 5.371197608858094 0.015985707169220518 0.23048327137546468 0.34285714285714286 -se.2014-07-10.random.100000-http-www 77261 406990 502 335 5 5.267728867086888 0.01572456378234892 0.23327137546468402 0.34183673469387754 -se.2014-07-10.random.100000-https 282 1962 94 66 5 6.957446808510638 0.10541586073500966 0.04368029739776952 0.0673469387755102 -se.2014-07-10.random.100000-https-www 328 2451 124 94 5 7.472560975609756 0.07949532952776336 0.05762081784386617 0.09591836734693877 -se.healthstatus.2013.counties-http 18 105 10 6 4 5.833333333333333 0.9722222222222222 0.004646840148698885 0.006122448979591836 -se.healthstatus.2013.counties-http-www 21 133 11 6 4 6.333333333333333 1.0555555555555556 0.005111524163568773 0.006122448979591836 -se.healthstatus.2013.counties-https 3 7 2 1 2 2.3333333333333335 2.3333333333333335 0.0009293680297397769 0.0010204081632653062 -se.healthstatus.2013.counties-https-www 6 20 4 1 2 3.3333333333333335 3.3333333333333335 0.0018587360594795538 0.0010204081632653062 -se.healthstatus.2013.domain-registrars-http 127 886 66 49 5 6.9763779527559056 0.1423750602603246 0.03066914498141264 0.05 -se.healthstatus.2013.domain-registrars-http-www 134 872 62 45 5 6.507462686567164 0.14461028192371475 0.028810408921933085 0.04591836734693878 -se.healthstatus.2013.domain-registrars-https 40 430 46 32 4 10.75 0.3359375 0.02137546468401487 0.0326530612244898 -se.healthstatus.2013.domain-registrars-https-www 42 327 40 25 4 7.785714285714286 0.31142857142857144 0.01858736059479554 0.025510204081632654 -se.healthstatus.2013.financial-services-http 67 378 49 36 5 5.641791044776119 0.15671641791044777 0.022769516728624536 0.036734693877551024 -se.healthstatus.2013.financial-services-http-www 72 415 50 35 5 5.763888888888889 0.16468253968253968 0.023234200743494422 0.03571428571428571 -se.healthstatus.2013.financial-services-https 16 95 24 15 5 5.9375 0.3958333333333333 0.011152416356877323 0.015306122448979591 -se.healthstatus.2013.financial-services-https-www 31 228 32 19 5 7.354838709677419 0.3870967741935484 0.01486988847583643 0.019387755102040816 -se.healthstatus.2013.gocs-http 49 501 45 28 5 10.224489795918368 0.3651603498542274 0.020910780669144983 0.02857142857142857 -se.healthstatus.2013.gocs-http-www 57 577 47 30 5 10.12280701754386 0.3374269005847953 0.02184014869888476 0.030612244897959183 -se.healthstatus.2013.gocs-https 4 64 21 11 4 16 1.4545454545454546 0.009758364312267658 0.011224489795918367 -se.healthstatus.2013.gocs-https-www 9 91 27 16 5 10.11111111111111 0.6319444444444444 0.01254646840148699 0.0163265306122449 -se.healthstatus.2013.higher-education-http 40 270 24 12 4 6.75 0.5625 0.011152416356877323 0.012244897959183673 -se.healthstatus.2013.higher-education-http-www 47 308 26 12 4 6.553191489361702 0.5460992907801419 0.012081784386617101 0.012244897959183673 -se.healthstatus.2013.higher-education-https 9 104 16 7 4 11.555555555555555 1.6507936507936507 0.007434944237918215 0.007142857142857143 -se.healthstatus.2013.higher-education-https-www 24 182 22 11 4 7.583333333333333 0.6893939393939393 0.010223048327137546 0.011224489795918367 -se.healthstatus.2013.isps-http 18 271 47 37 5 15.055555555555555 0.4069069069069069 0.02184014869888476 0.03775510204081633 -se.healthstatus.2013.isps-http-www 19 317 55 45 5 16.68421052631579 0.3707602339181287 0.025557620817843865 0.04591836734693878 -se.healthstatus.2013.isps-https 6 152 41 35 5 25.333333333333332 0.7238095238095238 0.019052044609665426 0.03571428571428571 -se.healthstatus.2013.isps-https-www 10 163 43 34 5 16.3 0.47941176470588237 0.019981412639405203 0.03469387755102041 -se.healthstatus.2013.media-http 26 1101 81 57 5 42.34615384615385 0.742914979757085 0.03763940520446097 0.058163265306122446 -se.healthstatus.2013.media-http-www 28 1234 79 57 5 44.07142857142857 0.7731829573934836 0.03671003717472119 0.058163265306122446 -se.healthstatus.2013.media-https 4 186 24 15 4 46.5 3.1 0.011152416356877323 0.015306122448979591 -se.healthstatus.2013.media-https-www 5 204 28 17 4 40.8 2.4 0.013011152416356878 0.017346938775510204 -se.healthstatus.2013.municipalities-http 249 2367 39 19 5 9.506024096385541 0.5003170577045022 0.01812267657992565 0.019387755102040816 -se.healthstatus.2013.municipalities-http-www 271 2447 39 19 5 9.029520295202952 0.47523791027383955 0.01812267657992565 0.019387755102040816 -se.healthstatus.2013.municipalities-https 44 305 18 9 4 6.931818181818182 0.7702020202020202 0.008364312267657992 0.009183673469387756 -se.healthstatus.2013.municipalities-https-www 54 394 18 9 4 7.296296296296297 0.8106995884773663 0.008364312267657992 0.009183673469387756 -se.healthstatus.2013.public-authorities-http 170 935 48 31 5 5.5 0.1774193548387097 0.022304832713754646 0.03163265306122449 -se.healthstatus.2013.public-authorities-http-www 203 945 48 31 5 4.655172413793103 0.15016685205784203 0.022304832713754646 0.03163265306122449 -se.healthstatus.2013.public-authorities-https 18 64 9 6 5 3.5555555555555554 0.5925925925925926 0.004182156133828996 0.006122448979591836 -se.healthstatus.2013.public-authorities-https-www 37 200 23 14 5 5.405405405405405 0.3861003861003861 0.010687732342007435 0.014285714285714285 +alexa.2014-09-01.random.10000-http 8216 166702 704 481 5 20.289922103213243 0.04218279023537057 0.3272896327289633 0.49081632653061225 +alexa.2014-09-01.random.10000-http-www 8493 169685 704 476 5 19.979394795714118 0.041973518478391 0.3272896327289633 0.4857142857142857 +alexa.2014-09-01.random.10000-https 1135 23599 370 272 5 20.792070484581497 0.07644143560507903 0.17201301720130172 0.27755102040816326 +alexa.2014-09-01.random.10000-https-www 1224 16764 368 276 5 13.696078431372548 0.04962347257743677 0.17108321710832172 0.2816326530612245 +alexa.2014-09-01.top.10000-http 8545 274782 755 505 5 32.157050906963136 0.06367732852863987 0.3509995350999535 0.5153061224489796 +alexa.2014-09-01.top.10000-http-www 8682 276636 760 515 5 31.863165169315828 0.06187022362973947 0.35332403533240353 0.5255102040816326 +alexa.2014-09-01.top.10000-https 2507 67788 542 388 5 27.03948942959713 0.06968940574638435 0.2519758251975825 0.39591836734693875 +alexa.2014-09-01.top.10000-https-www 2957 73239 569 413 5 24.768008116334123 0.059970963962068094 0.26452812645281265 0.42142857142857143 +alexa.2014-09-01.top.dk.10000-http 2263 37832 282 205 5 16.717631462660187 0.08154942176907408 0.13110181311018132 0.20918367346938777 +alexa.2014-09-01.top.dk.10000-http-www 2310 38373 284 206 5 16.61168831168831 0.08063926364897239 0.1320316132031613 0.21020408163265306 +alexa.2014-09-01.top.dk.10000-https 339 5942 151 109 5 17.52802359882006 0.16080755595247762 0.0701999070199907 0.11122448979591837 +alexa.2014-09-01.top.dk.10000-https-www 441 6901 176 127 5 15.648526077097506 0.12321674076454729 0.08182240818224082 0.12959183673469388 +alexa.2014-09-01.top.se.10000-http 2797 52345 342 245 5 18.714694315337862 0.0763865074095423 0.1589958158995816 0.25 +alexa.2014-09-01.top.se.10000-http-www 2895 52398 351 255 5 18.099481865284975 0.07097836025601952 0.16317991631799164 0.2602040816326531 +alexa.2014-09-01.top.se.10000-https 438 7104 167 114 5 16.21917808219178 0.14227349194905073 0.07763830776383078 0.11632653061224489 +alexa.2014-09-01.top.se.10000-https-www 650 9510 199 146 5 14.63076923076923 0.10021074815595363 0.09251510925151092 0.1489795918367347 +com.2014-08-29.random.10000-http 7775 55666 404 273 5 7.159614147909968 0.026225692849487062 0.18781961878196188 0.2785714285714286 +com.2014-08-29.random.10000-http-www 7811 55955 405 277 5 7.163615414159518 0.0258614274879405 0.18828451882845187 0.2826530612244898 +com.2014-08-29.random.10000-https 50 446 47 26 5 8.92 0.34307692307692306 0.02185030218503022 0.026530612244897958 +com.2014-08-29.random.10000-https-www 55 477 49 28 5 8.672727272727272 0.3097402597402597 0.02278010227801023 0.02857142857142857 +dk.2014-07-23.random.10000-http 7180 36822 278 187 5 5.128412256267409 0.02742466447201823 0.1292422129242213 0.19081632653061226 +dk.2014-07-23.random.10000-http-www 7378 35960 275 187 5 4.873949579831932 0.026063901496427445 0.1278475127847513 0.19081632653061226 +dk.2014-07-23.random.10000-https 23 150 26 15 5 6.521739130434782 0.43478260869565216 0.01208740120874012 0.015306122448979591 +dk.2014-07-23.random.10000-https-www 32 257 32 23 5 8.03125 0.3491847826086957 0.014876801487680148 0.02346938775510204 +net.2014-08-29.random.10000-http 7270 48379 412 291 5 6.654607977991747 0.0228680686528926 0.1915388191538819 0.2969387755102041 +net.2014-08-29.random.10000-http-www 7378 49471 411 293 5 6.705204662510165 0.02288465755122923 0.1910739191073919 0.29897959183673467 +net.2014-08-29.random.10000-https 26 203 21 8 4 7.8076923076923075 0.9759615384615384 0.009762900976290097 0.00816326530612245 +net.2014-08-29.random.10000-https-www 28 291 27 12 5 10.392857142857142 0.8660714285714285 0.012552301255230125 0.012244897959183673 +reach50.2014w35.se-http 43 843 92 66 5 19.6046511627907 0.2970401691331924 0.04277080427708043 0.0673469387755102 +reach50.2014w35.se-http-www 42 801 92 61 5 19.071428571428573 0.3126463700234192 0.04277080427708043 0.06224489795918367 +reach50.2014w35.se-https 18 265 41 24 5 14.722222222222221 0.6134259259259259 0.01906090190609019 0.024489795918367346 +reach50.2014w35.se-https-www 26 303 40 25 5 11.653846153846153 0.46615384615384614 0.018596001859600187 0.025510204081632654 +se.2014-07-10.random.100000-http 73605 395347 496 336 5 5.371197608858094 0.015985707169220518 0.23059042305904232 0.34285714285714286 +se.2014-07-10.random.100000-http-www 77261 406990 502 335 5 5.267728867086888 0.01572456378234892 0.23337982333798232 0.34183673469387754 +se.2014-07-10.random.100000-https 282 1962 94 66 5 6.957446808510638 0.10541586073500966 0.04370060437006044 0.0673469387755102 +se.2014-07-10.random.100000-https-www 328 2451 124 94 5 7.472560975609756 0.07949532952776336 0.05764760576476058 0.09591836734693877 +se.healthstatus.2013.counties-http 18 105 10 6 4 5.833333333333333 0.9722222222222222 0.004649000464900047 0.006122448979591836 +se.healthstatus.2013.counties-http-www 21 133 11 6 4 6.333333333333333 1.0555555555555556 0.005113900511390051 0.006122448979591836 +se.healthstatus.2013.counties-https 3 7 2 1 2 2.3333333333333335 2.3333333333333335 0.0009298000929800093 0.0010204081632653062 +se.healthstatus.2013.counties-https-www 6 20 4 1 2 3.3333333333333335 3.3333333333333335 0.0018596001859600185 0.0010204081632653062 +se.healthstatus.2013.domain-registrars-http 127 886 66 49 5 6.9763779527559056 0.1423750602603246 0.030683403068340307 0.05 +se.healthstatus.2013.domain-registrars-http-www 134 872 62 45 5 6.507462686567164 0.14461028192371475 0.02882380288238029 0.04591836734693878 +se.healthstatus.2013.domain-registrars-https 40 430 46 32 4 10.75 0.3359375 0.021385402138540215 0.0326530612244898 +se.healthstatus.2013.domain-registrars-https-www 42 327 40 25 4 7.785714285714286 0.31142857142857144 0.018596001859600187 0.025510204081632654 +se.healthstatus.2013.financial-services-http 67 378 49 36 5 5.641791044776119 0.15671641791044777 0.02278010227801023 0.036734693877551024 +se.healthstatus.2013.financial-services-http-www 72 415 50 35 5 5.763888888888889 0.16468253968253968 0.023245002324500233 0.03571428571428571 +se.healthstatus.2013.financial-services-https 16 95 24 15 5 5.9375 0.3958333333333333 0.011157601115760111 0.015306122448979591 +se.healthstatus.2013.financial-services-https-www 31 228 32 19 5 7.354838709677419 0.3870967741935484 0.014876801487680148 0.019387755102040816 +se.healthstatus.2013.gocs-http 49 501 45 28 5 10.224489795918368 0.3651603498542274 0.02092050209205021 0.02857142857142857 +se.healthstatus.2013.gocs-http-www 57 577 47 30 5 10.12280701754386 0.3374269005847953 0.02185030218503022 0.030612244897959183 +se.healthstatus.2013.gocs-https 4 64 21 11 4 16 1.4545454545454546 0.009762900976290097 0.011224489795918367 +se.healthstatus.2013.gocs-https-www 9 91 27 16 5 10.11111111111111 0.6319444444444444 0.012552301255230125 0.0163265306122449 +se.healthstatus.2013.higher-education-http 40 270 24 12 4 6.75 0.5625 0.011157601115760111 0.012244897959183673 +se.healthstatus.2013.higher-education-http-www 47 308 26 12 4 6.553191489361702 0.5460992907801419 0.01208740120874012 0.012244897959183673 +se.healthstatus.2013.higher-education-https 9 104 16 7 4 11.555555555555555 1.6507936507936507 0.007438400743840074 0.007142857142857143 +se.healthstatus.2013.higher-education-https-www 24 182 22 11 4 7.583333333333333 0.6893939393939393 0.010227801022780102 0.011224489795918367 +se.healthstatus.2013.isps-http 18 271 47 37 5 15.055555555555555 0.4069069069069069 0.02185030218503022 0.03775510204081633 +se.healthstatus.2013.isps-http-www 19 317 55 45 5 16.68421052631579 0.3707602339181287 0.025569502556950254 0.04591836734693878 +se.healthstatus.2013.isps-https 6 152 41 35 5 25.333333333333332 0.7238095238095238 0.01906090190609019 0.03571428571428571 +se.healthstatus.2013.isps-https-www 10 163 43 34 5 16.3 0.47941176470588237 0.0199907019990702 0.03469387755102041 +se.healthstatus.2013.media-http 26 1101 81 57 5 42.34615384615385 0.742914979757085 0.03765690376569038 0.058163265306122446 +se.healthstatus.2013.media-http-www 28 1234 79 57 5 44.07142857142857 0.7731829573934836 0.03672710367271037 0.058163265306122446 +se.healthstatus.2013.media-https 4 186 24 15 4 46.5 3.1 0.011157601115760111 0.015306122448979591 +se.healthstatus.2013.media-https-www 5 204 28 17 4 40.8 2.4 0.01301720130172013 0.017346938775510204 +se.healthstatus.2013.municipalities-http 249 2367 39 19 5 9.506024096385541 0.5003170577045022 0.01813110181311018 0.019387755102040816 +se.healthstatus.2013.municipalities-http-www 271 2447 39 19 5 9.029520295202952 0.47523791027383955 0.01813110181311018 0.019387755102040816 +se.healthstatus.2013.municipalities-https 44 305 18 9 4 6.931818181818182 0.7702020202020202 0.008368200836820083 0.009183673469387756 +se.healthstatus.2013.municipalities-https-www 54 394 18 9 4 7.296296296296297 0.8106995884773663 0.008368200836820083 0.009183673469387756 +se.healthstatus.2013.public-authorities-http 170 935 48 31 5 5.5 0.1774193548387097 0.022315202231520222 0.03163265306122449 +se.healthstatus.2013.public-authorities-http-www 203 945 48 31 5 4.655172413793103 0.15016685205784203 0.022315202231520222 0.03163265306122449 +se.healthstatus.2013.public-authorities-https 18 64 9 6 5 3.5555555555555554 0.5925925925925926 0.0041841004184100415 0.006122448979591836 +se.healthstatus.2013.public-authorities-https-www 37 200 23 14 5 5.405405405405405 0.3861003861003861 0.010692701069270108 0.014285714285714285 diff --git a/report/report.lyx b/report/report.lyx index 9c79920..396b1a7 100644 --- a/report/report.lyx +++ b/report/report.lyx @@ -3546,6 +3546,13 @@ name "tab:SE-Health-Status-HTTPS-coverage-2008-2013" \begin_layout Section Cat and Mouse +\begin_inset CommandInset label +LatexCommand label +name "sec:Cat-and-Mouse" + +\end_inset + + \end_layout \begin_layout Standard @@ -3612,11 +3619,11 @@ numprint{1326} \end_layout \begin_layout Standard -The paper also discusses serving content alongside advertisments as a way +The paper also discusses serving content alongside advertisements as a way to avoid blocking of trackers \begin_inset CommandInset ref LatexCommand eqref -reference "sub:Top-categories" +reference "sub:Disconnect-categories-coverage" \end_inset @@ -15607,7 +15614,7 @@ reference "sub:Disconnect-Organizations-in-more-than-one-category" \end_inset , so their total organization coverage is higher. - This first table excludes top Google domains, which dominates the top list + This first table excludes top Google domains, which dominate the top list for all datasets. \end_layout @@ -15644,7 +15651,7 @@ reference "sub:Disconnect-category" , and are not reflected in the social category aggregates \begin_inset CommandInset ref LatexCommand eqref -reference "sub:Top-categories" +reference "sub:Disconnect-categories-coverage" \end_inset @@ -15668,7 +15675,17 @@ reference "sub:Public-suffix-list" blocking list -- because of the variety of services being hosted on subdomains, it cannot be blocked as for example advertisement even if it is being hosted there. - It can be seen as a flaw in the Disconnect way of blocking. + It can be seen as a flaw in the Disconnect way of blocking, even though + listing individual subdomains in other categories might be a way to override + the content bypass -- but as these non-branded domains can be seen as throw-awa +y domains, it can become a game of cat and mouse +\begin_inset CommandInset ref +LatexCommand eqref +reference "sec:Cat-and-Mouse" + +\end_inset + +. \end_layout \begin_layout Standard @@ -15781,7 +15798,7 @@ Here we see DoubleClick's, one of Google's ad services, coverage. and it skews the numbers for the advertisement category \begin_inset CommandInset ref LatexCommand eqref -reference "sub:Top-categories" +reference "sub:Disconnect-categories-coverage" \end_inset @@ -15870,10 +15887,10 @@ gle.sorted.tsv}{Top Disconnect Google domain match coverage}{}{} \end_layout \begin_layout Subsection -Top categories +Categories \begin_inset CommandInset label LatexCommand label -name "sub:Top-categories" +name "sub:Disconnect-categories-coverage" \end_inset @@ -15881,20 +15898,94 @@ name "sub:Top-categories" \end_layout \begin_layout Standard -Categories and their coverage across different datasets. +Disconnect's categories and their coverage across different datasets are + shown in the table below. + As mentioned earlier, the special Disconnect category contains major Facebook, + Google and Twitter domains -- domains that could also have been listed + as advertising, analytics or social domains +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Disconnect-category" + +\end_inset + +. + The content category, which bypasses Disconnect's blocking by default, + can for this reason be seen as the most accurate in terms of coverage, + as domains have presumably been added as content in a manual process of + whitelisting +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Disconnect-Content" + +\end_inset + +. \end_layout \begin_layout Standard -\begin_inset Note Greyedout -status open +The highest coverage being connected with the Disconnect category explains + the low coverage of advertising, analytics and social. + If, for example, the two domains facebook.com and twitter.com would be included + in the social category, coverage would be 35-56% percentage points higher + for top domains and 9-11% percentage points higher for random domains +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Top-domains-General" -\begin_layout Plain Layout -Write more. +\end_inset + + -- and more accurate. + The same goes for advertising and doubleclick.net (30-50%, 7-36%) and analytics + and google-analytics.com (63-76%, 24-32%) +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Top-domains-Google" + +\end_inset + +. \end_layout +\begin_layout Standard +What is surprising is the high coverage of content from known trackers. + While the Disconnect category has the highest coverage overall, the content + category is the second largest -- significantly larger than the advertising, + analytics and social categories in most datasets. + While a large portion of this is due to extensive usage of Google's hosted + services +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Top-domains-Google" + +\end_inset + +, all organizations with only content domains as well as those with +\begin_inset Quotes eld +\end_inset + +mixed +\begin_inset Quotes erd \end_inset + domains (Table +\begin_inset CommandInset ref +LatexCommand ref +reference "tab:Organizations-in-more-than-one-category" +\end_inset + +) are let through to 67-78% of top domains and 38-56% or random domains. + Mixing advertisement, or in this case tracking in general, with content + has previously been discussed as way for organizations to avoid in-browser + blocking +\begin_inset CommandInset ref +LatexCommand eqref +reference "sec:Cat-and-Mouse" + +\end_inset + + -- and it seems prevalent. \end_layout \begin_layout Standard @@ -15911,11 +16002,15 @@ Add column showing all and any Disconnect coverage. \end_layout \begin_layout Standard -\begin_inset Note Greyedout +\begin_inset ERT status open \begin_layout Plain Layout -Write about high content trackers. + + +\backslash +tsvtablewidedatasets{datasets.non-failed.disconnect.categories.coverage.external.sorte +d.tsv}{Disconnect category match coverage}{}{} \end_layout \end_inset @@ -15924,11 +16019,14 @@ Write about high content trackers. \end_layout \begin_layout Standard -\begin_inset Note Greyedout +\begin_inset ERT status open \begin_layout Plain Layout -Write about low advertisement, compare to DoubleClick. + + +\backslash +begin{futurework} \end_layout \end_inset @@ -15937,16 +16035,21 @@ Write about low advertisement, compare to DoubleClick. \end_layout \begin_layout Standard -\begin_inset Note Greyedout -status open - -\begin_layout Plain Layout -Write about low analytics, compare to Google Analytics. -\end_layout +The current analysis performed for this thesis is built in such a way that + the Disconnect blocking list used for matching can easily be replaced with + an updated version. + This also opens up the possibility of using a locally modified blocking + list, re-categorizing each of the Disconnect category's domains as either + advertising, analytics or social. + The per-organization aggregate analysis would still produce the same numbers + +\begin_inset CommandInset ref +LatexCommand eqref +reference "sub:Top-organizations" \end_inset - +. \end_layout \begin_layout Standard @@ -15957,8 +16060,7 @@ status open \backslash -tsvtablewidedatasets{datasets.non-failed.disconnect.categories.coverage.external.sorte -d.tsv}{Disconnect category match coverage}{}{} +end{futurework} \end_layout \end_inset @@ -16311,7 +16413,7 @@ x has been added to show their collective Disconnect category coverage \begin_inset CommandInset ref LatexCommand eqref -reference "sub:Top-categories" +reference "sub:Disconnect-categories-coverage" \end_inset