Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Use only domains with internal/external requests in domain count columns
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed Dec 12, 2014
1 parent 824c170 commit 4e5d166
Show file tree
Hide file tree
Showing 14 changed files with 79 additions and 50 deletions.
8 changes: 4 additions & 4 deletions src/multiset/non-failed.classification.disconnect.coverage.sh
Expand Up @@ -7,27 +7,27 @@ read -d '' getNonFailedClassificationDisconnect <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-internal-requests": .successfulOrigin.internalUrls.requestedUrlsDistinct.counts.countDistinct,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
"all-disconnect-coverage": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"all-not-disconnect-coverage": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"internal-disconnect-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"internal-not-disconnect-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"non-failed-domains-with-internal-requests": .successfulOrigin.internalUrls.requestedUrlsDistinct.counts.countDistinct,
"external-disconnect-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"external-not-disconnect-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-internal-requests",
"non-failed-domains-with-external-requests",
"all-disconnect-coverage",
"all-not-disconnect-coverage",
"non-failed-domains-with-internal-requests",
"internal-disconnect-coverage",
"internal-not-disconnect-coverage",
"non-failed-domains-with-external-requests",
"external-disconnect-coverage",
"external-not-disconnect-coverage",
"all-mixed-coverage": (1 - ."all-disconnect-coverage" - ."all-not-disconnect-coverage"),
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
categories: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.categories
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"Disconnect": (.categories.Disconnect // 0),
"Content": (.categories.Content // 0),
"Advertising": (.categories.Advertising // 0),
Expand All @@ -27,7 +27,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains": ."non-failed-domains-with-external-requests",
"03--Disconnect": .Disconnect,
"04--Content": .Content,
"05--Advertising": .Advertising,
Expand Down
8 changes: 4 additions & 4 deletions src/multiset/non-failed.disconnect.counts.sh
Expand Up @@ -7,7 +7,7 @@ disconnectAnalysisFile="prepared.disconnect.services.analysis.json"
read -d '' getNonFailedDisconnectCount <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
disconnectRequests: (.successfulOrigin.externalUrls.requestedUrls.counts.blocks.disconnect.domains | add),
Expand All @@ -22,10 +22,10 @@ read -d '' getNonFailedDisconnectCount <<-'EOF' || true
EOF

read -d '' mapData <<-'EOF' || true
(.disconnectRequests / ."non-failed-domains") as $disconnectRequestsPerDomain
(.disconnectRequests / ."non-failed-domains-with-external-requests") as $disconnectRequestsPerDomain
| {
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
disconnectRequests,
disconnectDomainCount,
Expand All @@ -48,7 +48,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains": ."non-failed-domains-with-external-requests",
"03--D Requests": .disconnectRequests,
"04--D Domains": .disconnectDomainCount,
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
domains: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.domains
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"www.google.com": (.domains."www.google.com" // 0),
"doubleclick.net": (.domains."doubleclick.net" // 0),
"google-analytics.com": (.domains."google-analytics.com" // 0),
Expand All @@ -30,7 +30,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ ext": ."non-failed-domains-with-external-requests",
"03--www.google.com": ."www.google.com",
"04--doubleclick.net": ."doubleclick.net",
"05--google-analytics.com": ."google-analytics.com",
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
domains: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.domains
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"facebook.com": (.domains."facebook.com" // 0),
"twitter.com": (.domains."twitter.com" // 0),
"cloudfront.net": (.domains."cloudfront.net" // 0),
Expand All @@ -30,7 +30,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ ext": ."non-failed-domains-with-external-requests",
"03--facebook.com": ."facebook.com",
"04--twitter.com": ."twitter.com",
"05--cloudfront.net": ."cloudfront.net",
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
organizations: .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.blocks.organizations
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"Google": (.organizations."Google" // 0),
"Facebook": (.organizations."Facebook" // 0),
"Twitter": (.organizations."Twitter" // 0),
Expand All @@ -38,7 +38,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ ext": ."non-failed-domains-with-external-requests",
"03--Google": ."Google",
"04--Facebook": ."Facebook",
"05--Twitter": ."Twitter",
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedMimeTypes <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
"kinds-resource-groups": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage."kinds-resource".groups
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"html": (."kinds-resource-groups"."html" // 0),
"script": (."kinds-resource-groups"."script" // 0),
"style": (."kinds-resource-groups"."style" // 0),
Expand All @@ -33,7 +33,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ ext": ."non-failed-domains-with-external-requests",
"03--html": ."html",
"04--script": ."script",
"05--style": ."style",
Expand Down
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedMimeTypes <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-internal-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
"kinds-resource-groups": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage."kinds-resource".groups
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-internal-requests",
"html": (."kinds-resource-groups"."html" // 0),
"script": (."kinds-resource-groups"."script" // 0),
"style": (."kinds-resource-groups"."style" // 0),
Expand All @@ -33,7 +33,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ int": ."non-failed-domains-with-internal-requests",
"03--html": ."html",
"04--script": ."script",
"05--style": ."style",
Expand Down
6 changes: 3 additions & 3 deletions src/multiset/non-failed.public-suffix.coverage.external.sh
Expand Up @@ -6,15 +6,15 @@ aggregatesAnalysisJson="aggregates.analysis.json"
read -d '' getNonFailedDisconnectCategoryCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
"public-suffixes": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.urls."public-suffixes"
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-external-requests",
"se": (."public-suffixes"."se" // 0),
"dk": (."public-suffixes"."dk" // 0),
"com": (."public-suffixes"."com" // 0),
Expand All @@ -36,7 +36,7 @@ read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"02--Domains w/ ext": ."non-failed-domains-with-external-requests",
"03--se": ."se",
"04--dk": ."dk",
"05--com": ."com",
Expand Down
58 changes: 41 additions & 17 deletions src/multiset/non-failed.requests.counts.sh
Expand Up @@ -7,9 +7,12 @@ read -d '' getNonFailedUrlCounts <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"non-failed-domains-with-internal-requests": .successfulOrigin.internalUrls.requestedUrlsDistinct.counts.countDistinct,
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
unfilteredRequests: .successfulOrigin.unfilteredUrls.requestedUrls.counts.count,
internalRequests: .successfulOrigin.internalUrls.requestedUrls.counts.count,
externalRequests: .successfulOrigin.externalUrls.requestedUrls.counts.count,
# TODO: use a proper count instead of adding domain counts.
externalRequestsDisconnectMatches: (.successfulOrigin.externalUrls.requestedUrls.counts.blocks.disconnect.domains | add),
}
EOF
Expand All @@ -18,38 +21,57 @@ read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"non-failed-domains-with-internal-requests",
"non-failed-domains-with-external-requests",
withInternalRatio: (."non-failed-domains-with-internal-requests" / ."non-failed-domains"),
withExternalRatio: (."non-failed-domains-with-external-requests" / ."non-failed-domains"),
unfilteredRequests,
internalRequests,
externalRequests,
externalRequestsDisconnectMatches,
unfilteredRequestsPerDomain: (.unfilteredRequests / ."non-failed-domains"),
internalRequestsPerDomain: (.internalRequests / ."non-failed-domains"),
externalRequestsPerDomain: (.externalRequests / ."non-failed-domains"),
externalRequestsDisconnectMatchesPerDomain: (.externalRequestsDisconnectMatches / ."non-failed-domains"),
internalRequestsPerDomain: (.internalRequests / ."non-failed-domains-with-internal-requests"),
externalRequestsPerDomain: (.externalRequests / ."non-failed-domains-with-external-requests"),
externalRequestsDisconnectMatchesPerDomain: (.externalRequestsDisconnectMatches / ."non-failed-domains-with-external-requests"),
internalRequestsRatio: (.internalRequests / .unfilteredRequests),
externalRequestsRatio: (.externalRequests / .unfilteredRequests),
externalRequestsDisconnectMatchesRatio: (.externalRequestsDisconnectMatches / .unfilteredRequests),
internalRequestsPerInternalRequests: (.externalRequests / .internalRequests),
externalRequestsDisconnectMatchesPerExternalRequests: (.externalRequestsDisconnectMatches / .externalRequests),
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
read -d '' renameForTsvColumnOrderingCounts <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"03--All": ."unfilteredRequests",
"04--Ext": ."externalRequests",
"05--Int": ."internalRequests",
"06--Disco.": ."externalRequestsDisconnectMatches",
"07--A/d": ."unfilteredRequestsPerDomain",
"08--I/d": ."internalRequestsPerDomain",
"09--E/d": ."externalRequestsPerDomain",
"10--D/d": ."externalRequestsDisconnectMatchesPerDomain",
"11--I/A": ."internalRequestsRatio",
"12--E/A": ."externalRequestsRatio",
"13--D/A": ."externalRequestsDisconnectMatchesRatio",
"14--D/E": ."externalRequestsDisconnectMatchesPerExternalRequests",
"03--w/ int": ."non-failed-domains-with-internal-requests",
"04--w/ ext": ."non-failed-domains-with-external-requests",
"05--All requests": .unfilteredRequests,
"06--Int": .internalRequests,
"07--Ext": .externalRequests,
"08--Disco.": .externalRequestsDisconnectMatches,
}
)
EOF

read -d '' renameForTsvColumnOrderingRatios <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"03--w/ int": .withInternalRatio,
"04--w/ ext": .withExternalRatio,
"05--A/d": .unfilteredRequestsPerDomain,
"06--I/di": .internalRequestsPerDomain,
"07--E/de": .externalRequestsPerDomain,
"08--D/de": .externalRequestsDisconnectMatchesPerDomain,
"09--I/A": .internalRequestsRatio,
"10--E/A": .externalRequestsRatio,
"11--D/A": .externalRequestsDisconnectMatchesRatio,
"12--E/I": .internalRequestsPerInternalRequests,
"13--D/E": .externalRequestsDisconnectMatchesPerExternalRequests,
}
)
EOF
Expand All @@ -62,4 +84,6 @@ EOF

<"datasets.non-failed.requests.counts.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.requests.counts.sorted.json"

<"datasets.non-failed.requests.counts.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.requests.counts.sorted.tsv"
# Two output files.
<"datasets.non-failed.requests.counts.sorted.json" jq "$renameForTsvColumnOrderingCounts" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.requests.counts.sorted.tsv"
<"datasets.non-failed.requests.counts.sorted.json" jq "$renameForTsvColumnOrderingRatios" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.requests.ratios.sorted.tsv"
2 changes: 1 addition & 1 deletion src/multiset/origin-redirects.sh
Expand Up @@ -58,7 +58,7 @@ map(
{
"01--Dataset": .dataset,
"02--Domains": .nonFailedDomainCount,
"03--With R": .domainWithRedirectCount,
"03--w/ R": .domainWithRedirectCount,
"04--DWR/D": ."domains-with-redirect-ratio",
# "xxxxxxx--Redirects": .redirectCount,
"05--R/DWR": ."redirects-per-domain",
Expand Down
4 changes: 3 additions & 1 deletion src/multiset/ratio-buckets.sh
Expand Up @@ -8,6 +8,7 @@ read -d '' getOriginRedirectAggregates <<-'EOF' || true
path: $path,
domainCount,
nonFailedDomainCount,
nonFailedDomainWithRequestCount,
requestCount,
# Lowercase to allow simple file name generation.
"is-same-domain": .ratios.isSameDomain.normalized.cumulative,
Expand Down Expand Up @@ -94,7 +95,8 @@ map(
| {
"001--Dataset": .dataset,
# "002--Domains": .nonFailedDomainCount,
# "003--Requests": .requestCount,
# "003--Domains with requests": .nonFailedDomainWithRequestCount,
# "004--Requests": .requestCount,
}
+ (
# 101 buckets because it's [0,100].
Expand Down
3 changes: 2 additions & 1 deletion src/one-shot/multiset.sh
Expand Up @@ -10,9 +10,10 @@ set -e
"${BASH_SOURCE%/*}/../multiset/non-failed.mime-types.groups.coverage.origin.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.mime-types.groups.coverage.internal.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.mime-types.groups.coverage.external.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.public-suffix.coverage.external.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.classification.domain-scope.coverage.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.classification.secure.coverage.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.public-suffix.coverage.external.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.classification.disconnect.coverage.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.disconnect.categories.coverage.external.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.disconnect.domains.coverage.external.sh" "$@"
"${BASH_SOURCE%/*}/../multiset/non-failed.disconnect.domains.coverage.external.google.sh" "$@"
Expand Down

0 comments on commit 4e5d166

Please sign in to comment.