Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Calculate differences in failed versus non-failed downloads in subseq…
Browse files Browse the repository at this point in the history
…uent datasets where failed domains have been retried.
  • Loading branch information
joelpurra committed Aug 27, 2014
1 parent 3270bf0 commit 1d8b7ed
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions src/multiset/download-retries.sh
@@ -0,0 +1,92 @@
#!/usr/bin/env bash
set -e

# Calculate differences in failed versus non-failed downloads in subsequent datasets where failed domains have been retried.
#
# USAGE:
# "$0" <folder(s)>
#
# Each folder must contain the file "$aggregatesAnalysisJson".
#
# OUTPUT:
# "datasets.retries.json" Retry counts and coverage.
# "datasets.retries.rates.json" Rates and rate of change calculated.
# "datasets.retries.rates.csv" CSV version.

aggregatesAnalysisJson="aggregates.analysis.json"

read -d '' getRetriesCountsQueries <<-'EOF' || true
{
path: $path,
domains: {
counts: {
all: .unfiltered.origin.counts.count,
failed: .unfiltered.origin.counts.classification."is-failed-request",
"not-failed": .successfulOrigin.origin.counts.count
}
}
}
| .domains.coverage = {
all: 1,
failed: (.domains.counts.failed / .domains.counts.all),
"not-failed": (.domains.counts."not-failed" / .domains.counts.all)
}
EOF

read -d '' mapData <<-'EOF' || true
{
dataset: (.path | split("/")[-2:] | join("/")),
domains: .domains.counts.all,
failed: .domains.counts.failed,
rate: .domains.coverage.failed
}
EOF

read -d '' calculateRate <<-'EOF' || true
def rateOfChange(previous; current):
previous as $previous
| current as $current
| ($current - $previous) as $delta
| ($delta/$previous);
sort_by(.dataset)
| .[0].rateOfChange = "-"
| reduce .[1:][] as $item (
[
.[0]
];
. as $current
| $current[-1:][0].rate as $prevRate
| $current
+ [
$item
+ {
rateOfChange: (
if $prevRate == 0 then
"-"
else
rateOfChange($prevRate; $item.rate)
end
)
}
]
)
EOF

read -d '' renameForCsvColumnOrdering <<-'EOF' || true
map(
{
"01--Dataset": .dataset,
"02--Domains": .domains,
"03--Failed": .failed,
"04--Failure Rate": .rate,
"05--Rate of Change": .rateOfChange
}
)
EOF

"${BASH_SOURCE%/*}/../util/dataset-query.sh" "$@" -- test -e "$aggregatesAnalysisJson" '&&' cat "$aggregatesAnalysisJson" '|' jq --arg path '"$PWD"' "'$getRetriesCountsQueries'" >"datasets.retries.json"

<"datasets.retries.json" jq "$mapData" | "${BASH_SOURCE%/*}/../util/to-array.sh" | jq "$calculateRate" >"datasets.retries.rates.json"

<"datasets.retries.rates.json" jq "$renameForCsvColumnOrdering" | "${BASH_SOURCE%/*}/../util/array-of-objects-to-csv.sh" | sed '1s/"[[:digit:]][[:digit:]]--/"/g' >"datasets.retries.rates.csv"

0 comments on commit 1d8b7ed

Please sign in to comment.