Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Merge branch 'is-disconnect-match'
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed Dec 8, 2014
2 parents a1c7309 + 84e8341 commit f3fb44d
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 11 deletions.
2 changes: 2 additions & 0 deletions src/aggregate/analysis.sh
Expand Up @@ -126,6 +126,8 @@ def mangleShared:
"is-same-primary-domain": .classification.isSamePrimaryDomain,
"is-internal-domain": .classification.isInternalDomain,
"is-external-domain": .classification.isExternalDomain,
"is-disconnect-match": .classification.isDisconnectMatch,
"is-not-disconnect-match": .classification.isNotDisconnectMatch,
"is-successful-request": .classification.isSuccessful,
"is-unsuccessful-request": .classification.isUnsuccessful,
"is-failed-request": .classification.isFailed,
Expand Down
4 changes: 4 additions & 0 deletions src/aggregate/prepare.sh
Expand Up @@ -166,6 +166,8 @@ def distinctMangle:
isSamePrimaryDomain: (.[0].classification.isSamePrimaryDomain // false),
isInternalDomain: (.[0].classification.isInternalDomain // false),
isExternalDomain: (.[0].classification.isExternalDomain // false),
isDisconnectMatch: (.[0].classification.isDisconnectMatch // false),
isNotDisconnectMatch: (.[0].classification.isNotDisconnectMatch // false),
isSuccessful: (.[0].classification.isSuccessful // false),
isUnsuccessful: (.[0].classification.isUnsuccessful // false),
isFailed: (.[0].classification.isFailed // false),
Expand Down Expand Up @@ -196,6 +198,8 @@ def distinctMangle:
| .classification.isSamePrimaryDomain = (.classification.isSamePrimaryDomain and $request.classification.isSamePrimaryDomain)
| .classification.isInternalDomain = (.classification.isInternalDomain and $request.classification.isInternalDomain)
| .classification.isExternalDomain = (.classification.isExternalDomain and $request.classification.isExternalDomain)
| .classification.isDisconnectMatch = (.classification.isDisconnectMatch and $request.classification.isDisconnectMatch)
| .classification.isNotDisconnectMatch = (.classification.isNotDisconnectMatch and $request.classification.isNotDisconnectMatch)
| .classification.isSuccessful = (.classification.isSuccessful and $request.classification.isSuccessful)
| .classification.isUnsuccessful = (.classification.isUnsuccessful and $request.classification.isUnsuccessful)
| .classification.isFailed = (.classification.isFailed and $request.classification.isFailed)
Expand Down
8 changes: 8 additions & 0 deletions src/aggregate/prepare2.sh
Expand Up @@ -136,6 +136,8 @@ def base:
isSamePrimaryDomain: 0,
isInternalDomain: 0,
isExternalDomain: 0,
isDisconnectMatch: 0,
isNotDisconnectMatch: 0,
isSuccessful: 0,
isUnsuccessful: 0,
isFailed: 0,
Expand Down Expand Up @@ -179,6 +181,8 @@ def mangleClassification(request):
| .classification.isSamePrimaryDomain += ($request.classification.isSamePrimaryDomain | boolToInt)
| .classification.isInternalDomain += ($request.classification.isInternalDomain | boolToInt)
| .classification.isExternalDomain += ($request.classification.isExternalDomain | boolToInt)
| .classification.isDisconnectMatch += ($request.classification.isDisconnectMatch | boolToInt)
| .classification.isNotDisconnectMatch += ($request.classification.isNotDisconnectMatch | boolToInt)
| .classification.isSuccessful += ($request.classification.isSuccessful | boolToInt)
| .classification.isUnsuccessful += ($request.classification.isUnsuccessful | boolToInt)
| .classification.isFailed += ($request.classification.isFailed | boolToInt)
Expand Down Expand Up @@ -236,6 +240,8 @@ def distinctBase:
isSamePrimaryDomain: 0,
isInternalDomain: 0,
isExternalDomain: 0,
isDisconnectMatch: 0,
isNotDisconnectMatch: 0,
isSuccessful: 0,
isUnsuccessful: 0,
isFailed: 0,
Expand Down Expand Up @@ -279,6 +285,8 @@ def distinctMangleClassification(request):
| .classification.isSamePrimaryDomain += ($request.classification.isSamePrimaryDomain | boolToInt)
| .classification.isInternalDomain += ($request.classification.isInternalDomain | boolToInt)
| .classification.isExternalDomain += ($request.classification.isExternalDomain | boolToInt)
| .classification.isDisconnectMatch += ($request.classification.isDisconnectMatch | boolToInt)
| .classification.isNotDisconnectMatch += ($request.classification.isNotDisconnectMatch | boolToInt)
| .classification.isSuccessful += ($request.classification.isSuccessful | boolToInt)
| .classification.isUnsuccessful += ($request.classification.isUnsuccessful | boolToInt)
| .classification.isFailed += ($request.classification.isFailed | boolToInt)
Expand Down
6 changes: 4 additions & 2 deletions src/classification/disconnect/add.sh
Expand Up @@ -76,10 +76,12 @@ def mangle:
| deleteNullKey("blocks")
else
.
end;
end
| .classification.isDisconnectMatch = (.blocks and .blocks.disconnect and ((.blocks.disconnect | length) > 0))
| .classification.isNotDisconnectMatch = (.classification.isDisconnectMatch | not);
.origin |= mangle
| .requestedUrls[] |= mangle
| .requestedUrls |= map(mangle)
EOF

cat | jq "$classifyExpandedParts" --argfile "disconnect" "$disconnectClassificationFile"
73 changes: 73 additions & 0 deletions src/multiset/non-failed.classification.disconnect.coverage.sh
@@ -0,0 +1,73 @@
#!/usr/bin/env bash
set -e

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getNonFailedClassificationDisconnect <<-'EOF' || true
{
path: $path,
"non-failed-domains": .successfulOrigin.origin.counts.count,
"all-disconnect-coverage": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"all-not-disconnect-coverage": .successfulOrigin.unfilteredUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"internal-disconnect-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"internal-not-disconnect-coverage": .successfulOrigin.internalUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"non-failed-domains-with-internal-requests": .successfulOrigin.internalUrls.requestedUrlsDistinct.counts.countDistinct,
"external-disconnect-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-disconnect-match",
"external-not-disconnect-coverage": .successfulOrigin.externalUrls.requestedUrlsDistinct.coverage.classification."is-not-disconnect-match",
"non-failed-domains-with-external-requests": .successfulOrigin.externalUrls.requestedUrlsDistinct.counts.countDistinct,
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-1:][0]),
"non-failed-domains",
"all-disconnect-coverage",
"all-not-disconnect-coverage",
"non-failed-domains-with-internal-requests",
"internal-disconnect-coverage",
"internal-not-disconnect-coverage",
"non-failed-domains-with-external-requests",
"external-disconnect-coverage",
"external-not-disconnect-coverage",
"all-mixed-coverage": (1 - ."all-disconnect-coverage" - ."all-not-disconnect-coverage"),
"internal-mixed-coverage": (1 - ."internal-disconnect-coverage" - ."internal-not-disconnect-coverage"),
"external-mixed-coverage": (1 - ."external-disconnect-coverage" - ."external-not-disconnect-coverage"),
"all-some-coverage": (1 - ."all-not-disconnect-coverage"),
"internal-some-coverage": (1 - ."internal-not-disconnect-coverage"),
"external-some-coverage": (1 - ."external-not-disconnect-coverage"),
}
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": ."non-failed-domains",
"03--Domains w/ int": ."non-failed-domains-with-internal-requests",
"04--Int non-D": ."internal-not-disconnect-coverage",
# "xxxxxxxxx--Mix int D": ."internal-mixed-coverage",
# "xxxxxxxxx--Int D": ."internal-disconnect-coverage",
"05--Some int D": ."internal-some-coverage",
"06--Domains w/ ext": ."non-failed-domains-with-external-requests",
"07--Ext non-D": ."external-not-disconnect-coverage",
# "xxxxxxxxx--Mix ext D": ."external-mixed-coverage",
# "xxxxxxxxx--Ext D": ."external-disconnect-coverage",
"08--Some ext D": ."external-some-coverage",
"09--All non-D": ."all-not-disconnect-coverage",
# "xxxxxxxxx--Mix D": ."all-mixed-coverage",
# "xxxxxxxxx--All D": ."all-disconnect-coverage",
"10--Some D": ."all-some-coverage",
}
)
EOF

read -d '' sortObjects <<-'EOF' || true
sort_by(.dataset)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getNonFailedClassificationDisconnect'" >"datasets.non-failed.classification.disconnect.coverage.json"

<"datasets.non-failed.classification.disconnect.coverage.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$sortObjects" >"datasets.non-failed.classification.disconnect.coverage.sorted.json"

<"datasets.non-failed.classification.disconnect.coverage.sorted.json" jq "$renameForTsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-tsv.sh" | "${BASH_SOURCE%/*}/../util/clean-tsv-sorted-header.sh" >"datasets.non-failed.classification.disconnect.coverage.sorted.tsv"
6 changes: 6 additions & 0 deletions src/multiset/origin-redirects.sh
Expand Up @@ -16,6 +16,8 @@ read -d '' getOriginRedirectAggregates <<-'EOF' || true
isSamePrimaryDomain: .all."per-domain-with-redirect-coverage".isSamePrimaryDomain,
isInternalDomain: .all."per-domain-with-redirect-coverage".isInternalDomain,
isExternalDomain: .all."per-domain-with-redirect-coverage".isExternalDomain,
isDisconnectMatch: .all."per-domain-with-redirect-coverage".isDisconnectMatch,
isNotDisconnectMatch: .all."per-domain-with-redirect-coverage".isNotDisconnectMatch,
isInsecure: .all."per-domain-with-redirect-coverage".isInsecure,
isSecure: .all."per-domain-with-redirect-coverage".isSecure,
hasMissingClassification: .coverage."per-domain-with-redirect-coverage".hasMissingClassification,
Expand All @@ -40,6 +42,8 @@ read -d '' mapData <<-'EOF' || true
isSamePrimaryDomain,
isInternalDomain,
isExternalDomain,
isDisconnectMatch,
isNotDisconnectMatch,
isInsecure,
isSecure,
hasMissingClassification,
Expand All @@ -65,6 +69,8 @@ map(
"06--I": .isInternalDomain,
"07--Mix I+E": .mixedInternalAndExternal,
"08--E": .isExternalDomain,
# "xxxxxxx--D": .isDisconnectMatch,
# "xxxxxxx--NotD": .isNotDisconnectMatch,
"09--Insec": .isInsecure,
"10--Mix sec": .mixedSecurity,
"11--Sec": .isSecure,
Expand Down
11 changes: 6 additions & 5 deletions src/multiset/ratio-buckets.sh
Expand Up @@ -16,13 +16,14 @@ read -d '' getOriginRedirectAggregates <<-'EOF' || true
"is-same-primary-domain": .ratios.isSamePrimaryDomain.normalized.cumulative,
"is-internal-domain": .ratios.isInternalDomain.normalized.cumulative,
"is-external-domain": .ratios.isExternalDomain.normalized.cumulative,
"is-disconnect-match": .ratios.isDisconnectMatch.normalized.cumulative,
"is-not-disconnect-match": .ratios.isNotDisconnectMatch.normalized.cumulative,
"is-insecure": .ratios.isInsecure.normalized.cumulative,
"is-secure": .ratios.isSecure.normalized.cumulative,
"is-disconnect": .ratios.isDisconnect.normalized.cumulative,
"disconnect-domains": .occurrences.disonnectDomains.values.values,
"disconnect-organizations": .occurrences.disonnectOrganizations.values.values,
"disconnect-categories": .occurrences.disonnectCategories.values.values,
"disconnect-domains": .occurrences.disonnectDomains.normalized.cumulative,
"disconnect-organizations": .occurrences.disonnectOrganizations.normalized.cumulative,
"disconnect-categories": .occurrences.disonnectCategories.normalized.cumulative,
}
EOF

Expand Down Expand Up @@ -138,5 +139,5 @@ splitIntoFilesPerBucket() {

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$ratioBucketsAggregateJson" '&&' cat "$ratioBucketsAggregateJson" '|' jq --arg path '"$PWD"' "'$getOriginRedirectAggregates'" >"datasets.non-failed.ratio-buckets.normalized.cumulative.json"

splitIntoFilesPerBucket "ratio" "is-secure" "is-internal-domain"
splitIntoFilesPerBucket "ratio" "is-secure" "is-internal-domain" "is-disconnect-match"
splitIntoFilesPerBucket "occurrences" "disconnect-organizations"
4 changes: 4 additions & 0 deletions src/questions/origin-redirects.sh
Expand Up @@ -189,6 +189,8 @@ def getFinalIsSecure:
| .counts.isSamePrimaryDomain = redirectClassificationCount(.isSamePrimaryDomain)
| .counts.isInternalDomain = redirectClassificationCount(.isInternalDomain)
| .counts.isExternalDomain = redirectClassificationCount(.isExternalDomain)
| .counts.isDisconnectMatch = redirectClassificationCount(.isDisconnectMatch)
| .counts.isNotDisconnectMatch = redirectClassificationCount(.isNotDisconnectMatch)
| .counts.isSecure = redirectClassificationCount(.isSecure)
| .counts.isInsecure = redirectClassificationCount(.isInsecure)
# hasMissingClassification is a debugging counter, to check if any redirects didn't have a matching subsequent request.
Expand All @@ -205,6 +207,8 @@ def getFinalIsSecure:
| .all.isSamePrimaryDomain = redirectClassificationCoverage(.isSamePrimaryDomain)
| .all.isInternalDomain = redirectClassificationCoverage(.isInternalDomain)
| .all.isExternalDomain = redirectClassificationCoverage(.isExternalDomain)
| .all.isDisconnectMatch = redirectClassificationCoverage(.isDisconnectMatch)
| .all.isNotDisconnectMatch = redirectClassificationCoverage(.isNotDisconnectMatch)
| .all.isSecure = redirectClassificationCoverage(.isSecure)
| .all.isInsecure = redirectClassificationCoverage(.isInsecure)
| .all.hasMissingClassification = ((.counts.hasMissingClassification == .count) | boolToInt)
Expand Down
4 changes: 2 additions & 2 deletions src/questions/ratio-buckets.aggregate.sh
Expand Up @@ -131,10 +131,10 @@ def ratioBucketsBase:
isSamePrimaryDomain: ratioBucket,
isInternalDomain: ratioBucket,
isExternalDomain: ratioBucket,
isDisconnectMatch: ratioBucket,
isNotDisconnectMatch: ratioBucket,
isSecure: ratioBucket,
isInsecure: ratioBucket,
isDisconnect: ratioBucket,
};
def occurrencesBucketsBase:
Expand Down
4 changes: 2 additions & 2 deletions src/questions/ratio-buckets.sh
Expand Up @@ -51,10 +51,10 @@ def requestDisconnectCount(prop):
isSamePrimaryDomain: requestClassificationCount(.isSamePrimaryDomain),
isInternalDomain: requestClassificationCount(.isInternalDomain),
isExternalDomain: requestClassificationCount(.isExternalDomain),
isDisconnectMatch: requestClassificationCount(.isDisconnectMatch),
isNotDisconnectMatch: requestClassificationCount(.isNotDisconnectMatch),
isSecure: requestClassificationCount(.isSecure),
isInsecure: requestClassificationCount(.isInsecure),
isDisconnect: ($disconnectUrls | length),
},
uniqueCounts: {
disonnectDomains: ($disconnectUrlEntries | requestDisconnectCount(.domain)),
Expand Down

0 comments on commit f3fb44d

Please sign in to comment.