Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Some more multiset table extractions
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed Sep 17, 2014
1 parent 4f8c2cf commit 725754b
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 0 deletions.
53 changes: 53 additions & 0 deletions src/multiset/non-failed.classification.domain-scope.coverage.sh
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
set -e

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getNonFailedClassificationDomainScope <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"is-same-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-same-domain",
"is-subdomain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-subdomain",
"is-superdomain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-superdomain",
"is-internal-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-internal-domain",
"is-external-domain": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-external-domain",
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-2:] | join("/")),
"non-failed-domains",
requests,
"is-same-domain",
"is-subdomain",
"is-superdomain",
"is-internal-domain",
"is-external-domain",
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Non-failed domains": ."non-failed-domains",
"03--Same domain": ."is-same-domain",
"04--Subdomain": ."is-subdomain",
"05--Superdomain": ."is-superdomain",
"06--Internal domain": ."is-internal-domain",
"07--External domain": ."is-external-domain",
}
)
EOF

read -d '' sortObjects <<-'EOF' || true
sort_by(.dataset)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedClassificationDomainScope'" >"datasets.non-failed.classification.domain-scope.coverage.json"

<"datasets.non-failed.classification.domain-scope.coverage.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.classification.domain-scope.coverage.sorted.json"

<"datasets.non-failed.classification.domain-scope.coverage.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.classification.domain-scope.coverage.sorted.tsv"
46 changes: 46 additions & 0 deletions src/multiset/non-failed.classification.secure.coverage.sh
@@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -e

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getNonFailedClassificationSecure <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
requests: .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-secure-request",
"internal-secure-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-secure-request",
"external-secure-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-secure-request",
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-2:] | join("/")),
"non-failed-domains",
requests,
"internal-secure-coverage",
"external-secure-coverage",
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Non-failed domains": ."non-failed-domains",
"03--All secure": .requests,
"04--Internal secure": ."internal-secure-coverage",
"05--External secure": ."external-secure-coverage",
}
)
EOF

read -d '' sortObjects <<-'EOF' || true
sort_by(.dataset)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedClassificationSecure'" >"datasets.non-failed.classification.secure.coverage.json"

<"datasets.non-failed.classification.secure.coverage.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.classification.secure.coverage.sorted.json"

<"datasets.non-failed.classification.secure.coverage.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.classification.secure.coverage.sorted.tsv"
48 changes: 48 additions & 0 deletions src/multiset/non-failed.disconnect.categories.sh
@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -e

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
categories: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.categories
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-2:] | join("/")),
"non-failed-domains",
"Disconnect": (.categories.Disconnect // 0),
"Content": (.categories.Content // 0),
"Advertising": (.categories.Advertising // 0),
"Analytics": (.categories.Analytics // 0),
"Social": (.categories.Social // 0)
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Non-failed domains": ."non-failed-domains",
"03--Disconnect": .Disconnect,
"04--Content": .Content,
"05--Advertising": .Advertising,
"06--Analytics": .Analytics,
"07--Social": .Social
}
)
EOF

read -d '' sortObjects <<-'EOF' || true
sort_by(.dataset)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedDisconnectCategoryCounts'" >"datasets.non-failed.disconnect.categories.json"

<"datasets.non-failed.disconnect.categories.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.disconnect.categories.sorted.json"

<"datasets.non-failed.disconnect.categories.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.disconnect.categories.sorted.tsv"
56 changes: 56 additions & 0 deletions src/multiset/non-failed.url.counts.sh
@@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -e

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getNonFailedUrlCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
requests: .successfulOrigin.unfilteredUrls.requestedUrls.counts.count,
"internal-requests": .successfulOrigin.internalUrls.requestedUrls.counts.count,
"external-requests": .successfulOrigin.externalUrls.requestedUrls.counts.count,
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-2:] | join("/")),
"non-failed-domains",
requests,
"internal-requests",
"external-requests",
#"internal-requests-ratio": (."internal-requests" / .requests),
"external-requests-ratio": (."external-requests" / .requests),
"requests-per-domain": (.requests / ."non-failed-domains"),
#"internal-requests-per-domain": (."internal-requests" / ."non-failed-domains"),
"external-requests-per-domain": (."external-requests" / ."non-failed-domains"),
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Non-failed domains": ."non-failed-domains",
"03--Requests": .requests,
"04--Internal requests": ."internal-requests",
"05--External requests": ."external-requests",
#"06--Internal ratio": ."internal-requests-ratio",
"07--External ratio": ."external-requests-ratio",
"08--Requests per domain": ."requests-per-domain",
#"09--Internal requests per domain": ."internal-requests-per-domain",
"10--External requests per domain": ."external-requests-per-domain",
}
)
EOF

read -d '' sortObjects <<-'EOF' || true
sort_by(.dataset)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedUrlCounts'" >"datasets.non-failed.url.counts.json"

<"datasets.non-failed.url.counts.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.url.counts.sorted.json"

<"datasets.non-failed.url.counts.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.url.counts.sorted.tsv"

0 comments on commit 725754b

Please sign in to comment.