From 725754b6234a4e2c970dfd678ea0bb8f80b31f37 Mon Sep 17 00:00:00 2001 From: Joel Purra Date: Wed, 17 Sep 2014 16:52:38 +0200 Subject: [PATCH] Some more multiset table extractions --- ...ed.classification.domain-scope.coverage.sh | 53 ++++++++++++++++++ ...n-failed.classification.secure.coverage.sh | 46 +++++++++++++++ .../non-failed.disconnect.categories.sh | 48 ++++++++++++++++ src/multiset/non-failed.url.counts.sh | 56 +++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100755 src/multiset/non-failed.classification.domain-scope.coverage.sh create mode 100755 src/multiset/non-failed.classification.secure.coverage.sh create mode 100755 src/multiset/non-failed.disconnect.categories.sh create mode 100755 src/multiset/non-failed.url.counts.sh diff --git a/src/multiset/non-failed.classification.domain-scope.coverage.sh b/src/multiset/non-failed.classification.domain-scope.coverage.sh new file mode 100755 index 0000000..ca7276b --- /dev/null +++ b/src/multiset/non-failed.classification.domain-scope.coverage.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -e + +aggregatesAnalysisJson="aggregates.analysis.json" + +read -d '' getNonFailedClassificationDomainScope <<-'EOF' || true +{ + path: $path, + "non-failed-domains": .successfulOrigin.origin.counts.count, + "is-same-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-same-domain", + "is-subdomain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-subdomain", + "is-superdomain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-superdomain", + "is-internal-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-internal-domain", + "is-external-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-external-domain", +} +EOF + +read -d '' mapData <<-'EOF' || true +{ + dataset: (.path | split("/")[-2:] | join("/")), + "non-failed-domains", + requests, + "is-same-domain", + "is-subdomain", + "is-superdomain", + "is-internal-domain", + "is-external-domain", +} +EOF + +read -d '' renameForTsvColumnOrdering <<-'EOF' || true +map( + { + "01--Dataset": .dataset, + "02--Non-failed domains": ."non-failed-domains", + "03--Same domain": ."is-same-domain", + "04--Subdomain": ."is-subdomain", + "05--Superdomain": ."is-superdomain", + "06--Internal domain": ."is-internal-domain", + "07--External domain": ."is-external-domain", + } +) +EOF + +read -d '' sortObjects <<-'EOF' || true +sort_by(.dataset) +EOF + +"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedClassificationDomainScope'" >"datasets.non-failed.classification.domain-scope.coverage.json" + +<"datasets.non-failed.classification.domain-scope.coverage.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.classification.domain-scope.coverage.sorted.json" + +<"datasets.non-failed.classification.domain-scope.coverage.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.classification.domain-scope.coverage.sorted.tsv" diff --git a/src/multiset/non-failed.classification.secure.coverage.sh b/src/multiset/non-failed.classification.secure.coverage.sh new file mode 100755 index 0000000..d5bfa2d --- /dev/null +++ b/src/multiset/non-failed.classification.secure.coverage.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -e + +aggregatesAnalysisJson="aggregates.analysis.json" + +read -d '' getNonFailedClassificationSecure <<-'EOF' || true +{ + path: $path, + "non-failed-domains": .successfulOrigin.origin.counts.count, + requests: .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-secure-request", + "internal-secure-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-secure-request", + "external-secure-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-secure-request", +} +EOF + +read -d '' mapData <<-'EOF' || true +{ + dataset: (.path | split("/")[-2:] | join("/")), + "non-failed-domains", + requests, + "internal-secure-coverage", + "external-secure-coverage", +} +EOF + +read -d '' renameForTsvColumnOrdering <<-'EOF' || true +map( + { + "01--Dataset": .dataset, + "02--Non-failed domains": ."non-failed-domains", + "03--All secure": .requests, + "04--Internal secure": ."internal-secure-coverage", + "05--External secure": ."external-secure-coverage", + } +) +EOF + +read -d '' sortObjects <<-'EOF' || true +sort_by(.dataset) +EOF + +"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedClassificationSecure'" >"datasets.non-failed.classification.secure.coverage.json" + +<"datasets.non-failed.classification.secure.coverage.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.classification.secure.coverage.sorted.json" + +<"datasets.non-failed.classification.secure.coverage.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.classification.secure.coverage.sorted.tsv" diff --git a/src/multiset/non-failed.disconnect.categories.sh b/src/multiset/non-failed.disconnect.categories.sh new file mode 100755 index 0000000..69c18f8 --- /dev/null +++ b/src/multiset/non-failed.disconnect.categories.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -e + +aggregatesAnalysisJson="aggregates.analysis.json" + +read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true +{ + path: $path, + "non-failed-domains": .successfulOrigin.origin.counts.count, + categories: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.categories +} +EOF + +read -d '' mapData <<-'EOF' || true +{ + dataset: (.path | split("/")[-2:] | join("/")), + "non-failed-domains", + "Disconnect": (.categories.Disconnect // 0), + "Content": (.categories.Content // 0), + "Advertising": (.categories.Advertising // 0), + "Analytics": (.categories.Analytics // 0), + "Social": (.categories.Social // 0) +} +EOF + +read -d '' renameForTsvColumnOrdering <<-'EOF' || true +map( + { + "01--Dataset": .dataset, + "02--Non-failed domains": ."non-failed-domains", + "03--Disconnect": .Disconnect, + "04--Content": .Content, + "05--Advertising": .Advertising, + "06--Analytics": .Analytics, + "07--Social": .Social + } +) +EOF + +read -d '' sortObjects <<-'EOF' || true +sort_by(.dataset) +EOF + +"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedDisconnectCategoryCounts'" >"datasets.non-failed.disconnect.categories.json" + +<"datasets.non-failed.disconnect.categories.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.disconnect.categories.sorted.json" + +<"datasets.non-failed.disconnect.categories.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.disconnect.categories.sorted.tsv" diff --git a/src/multiset/non-failed.url.counts.sh b/src/multiset/non-failed.url.counts.sh new file mode 100755 index 0000000..cf602f2 --- /dev/null +++ b/src/multiset/non-failed.url.counts.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -e + +aggregatesAnalysisJson="aggregates.analysis.json" + +read -d '' getNonFailedUrlCounts <<-'EOF' || true +{ + path: $path, + "non-failed-domains": .successfulOrigin.origin.counts.count, + requests: .successfulOrigin.unfilteredUrls.requestedUrls.counts.count, + "internal-requests": .successfulOrigin.internalUrls.requestedUrls.counts.count, + "external-requests": .successfulOrigin.externalUrls.requestedUrls.counts.count, +} +EOF + +read -d '' mapData <<-'EOF' || true +{ + dataset: (.path | split("/")[-2:] | join("/")), + "non-failed-domains", + requests, + "internal-requests", + "external-requests", + #"internal-requests-ratio": (."internal-requests" / .requests), + "external-requests-ratio": (."external-requests" / .requests), + "requests-per-domain": (.requests / ."non-failed-domains"), + #"internal-requests-per-domain": (."internal-requests" / ."non-failed-domains"), + "external-requests-per-domain": (."external-requests" / ."non-failed-domains"), +} +EOF + +read -d '' renameForTsvColumnOrdering <<-'EOF' || true +map( + { + "01--Dataset": .dataset, + "02--Non-failed domains": ."non-failed-domains", + "03--Requests": .requests, + "04--Internal requests": ."internal-requests", + "05--External requests": ."external-requests", + #"06--Internal ratio": ."internal-requests-ratio", + "07--External ratio": ."external-requests-ratio", + "08--Requests per domain": ."requests-per-domain", + #"09--Internal requests per domain": ."internal-requests-per-domain", + "10--External requests per domain": ."external-requests-per-domain", + } +) +EOF + +read -d '' sortObjects <<-'EOF' || true +sort_by(.dataset) +EOF + +"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedUrlCounts'" >"datasets.non-failed.url.counts.json" + +<"datasets.non-failed.url.counts.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.url.counts.sorted.json" + +<"datasets.non-failed.url.counts.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.url.counts.sorted.tsv"