diff --git a/src/one-shot/questions.sh b/src/one-shot/questions.sh index 969eef1..20e24fa 100755 --- a/src/one-shot/questions.sh +++ b/src/one-shot/questions.sh @@ -15,6 +15,8 @@ set -e <"domains.parts.expanded.public-suffix.classified.disconnect.json" "${BASH_SOURCE%/*}/../util/parallel-chunks.sh" "${BASH_SOURCE%/*}/../questions/ratio-buckets.sh" > "ratio-buckets.json" # TODO: parallelize question aggregation? <"ratio-buckets.json" "${BASH_SOURCE%/*}/../questions/ratio-buckets.aggregate.sh" > "ratio-buckets.aggregate.json" +# TODO: parallelize question analysis? +<"ratio-buckets.aggregate.json" "${BASH_SOURCE%/*}/../questions/ratio-buckets.aggregate.analysis.sh" > "ratio-buckets.aggregate.analysis.json" # Regroup disconnect's categories and organizations <"aggregates.analysis.json" "${BASH_SOURCE%/*}/../questions/disconnect.categories.organizations.sh" "prepared.disconnect.services.json" > "aggregate.disconnect.categories.organizations.json" \ No newline at end of file diff --git a/src/questions/ratio-buckets.aggregate.analysis.sh b/src/questions/ratio-buckets.aggregate.analysis.sh new file mode 100755 index 0000000..dd60544 --- /dev/null +++ b/src/questions/ratio-buckets.aggregate.analysis.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -e + +read -d '' getRatioBucketAggregateAnalysis <<-'EOF' || true +def averageIndices: + . as $array + | reduce range(0; length) as $index( + []; + . + [ + { + index: $index, + value: $array[$index] + } + ] + ) + | map( + .diff = ( + .value - 0.5 + | if . < 0 then . * -1 else . end + ) + ) + | (map(.diff) | min) as $min + | map(select(.diff == $min)) + | map(.index); + +def averageIndexMinimum: + 0 as $min + | .[$min]; + +def averageIndexMiddle: + (((length - 1) / 2) | floor) as $mid + | .[$mid]; + +def averageIndexMaximum: + (length - 1) as $max + | .[$max]; + +def getAverageIndiceRange: + averageIndices + | { + minimum: averageIndexMinimum, + middle: averageIndexMiddle, + maximum: averageIndexMaximum, + }; + +def getAverageIndiceRanges: + with_entries( + .value.analysis = {} + | .value.analysis.index = {} + | .value.analysis.index.average = (.value.normalized.cumulative | getAverageIndiceRange) + ); + +.ratios |= getAverageIndiceRanges +| .occurrences |= getAverageIndiceRanges +EOF + +jq "$getRatioBucketAggregateAnalysis"