Skip to content

Commit

Permalink
Scripts to generate data tables
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed Sep 12, 2014
1 parent f720710 commit 6e6d717
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 0 deletions.
93 changes: 93 additions & 0 deletions data/extract/disconnect.categories.sh
@@ -0,0 +1,93 @@
#!/usr/bin/env bash
set -e

setThesisBaseDirs(){
# TODO: share this function between scripts.
local projectRoot="$(cd -- "${BASH_SOURCE%/*}"; cd -- "$(git rev-parse --git-dir)/../"; echo "$PWD")"

# TODO: use http://gitslave.sourceforge.net/ instead?
local thesisBaseDir="$projectRoot/../"
local thesisBaseDirAbsolute=$(cd -- "$thesisBaseDir"; echo "$PWD")
heedlessBaseDir="$thesisBaseDirAbsolute/har-heedless/src"
dulcifyBaseDir="$thesisBaseDirAbsolute/har-dulcify/src"
}

setThesisBaseDirs

read -d '' mapData <<-'EOF' || true
def lookupMap(from; to):
(from | explode) as $from
| (to | explode) as $to
| ($from | length) as $length
| reduce range(0; $length) as $index (
{};
($from[$index:$index + 1] | implode) as $fromChar
| ($to[$index:$index + 1] | implode) as $toChar
| .[$fromChar] = $toChar
);
def mapOrSameCharacter(lookupMap):
. as $char
| lookupMap as $lookupMap
| if ($lookupMap | has($char)) then
$lookupMap[$char]
else
$char
end;
def stringToChars:
explode
| map([ . ] | implode);
def charsToString:
join("");
def mapOrSame(lookupMap):
lookupMap as $lookupMap
| stringToChars
| map(mapOrSameCharacter($lookupMap))
| charsToString;
def toUppercase:
lookupMap("abcdefghijklmnopqrstuvwxyz"; "ABCDEFGHIJKLMNOPQRSTUVWXYZ") as $alphabetLookupMap
| mapOrSame($alphabetLookupMap);
def toLowercase:
lookupMap("ABCDEFGHIJKLMNOPQRSTUVWXYZ"; "abcdefghijklmnopqrstuvwxyz") as $alphabetLookupMap
| mapOrSame($alphabetLookupMap);
def sort_by_string_caseinsensitive(property):
# Caches the converted { sortable: (property | toLowercase), value: . } for speedups?
sort_by(property | toLowercase);
. as $root
| $root."domains-per-category"
| with_entries(
.value |= {
"domains-per-category": .
}
)
| . * (
$root."organizations-per-category".count
| with_entries(
.value |= {
"organizations-per-category": .
}
)
)
| to_entries
| sort_by_string_caseinsensitive(.key)
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Category": .key,
"02--Domains": .value."domains-per-category",
"03--Organizations": .value."organizations-per-category",
}
)
EOF

# "prepared.disconnect.services.analysis.json"
cat | jq "$mapData" | jq "$renameForTsvColumnOrdering" | "$dulcifyBaseDir/util/array-of-objects-to-tsv.sh" | "$dulcifyBaseDir/util/clean-tsv-sorted-header.sh"
32 changes: 32 additions & 0 deletions data/extract/disconnect.domains-per-organization.sh
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
set -e

setThesisBaseDirs(){
# TODO: share this function between scripts.
local projectRoot="$(cd -- "${BASH_SOURCE%/*}"; cd -- "$(git rev-parse --git-dir)/../"; echo "$PWD")"

# TODO: use http://gitslave.sourceforge.net/ instead?
local thesisBaseDir="$projectRoot/../"
local thesisBaseDirAbsolute=$(cd -- "$thesisBaseDir"; echo "$PWD")
heedlessBaseDir="$thesisBaseDirAbsolute/har-heedless/src"
dulcifyBaseDir="$thesisBaseDirAbsolute/har-dulcify/src"
}

setThesisBaseDirs

read -d '' mapData <<-'EOF' || true
."domains-per-organization"."group-by-count"
| sort_by(.domains)
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
map(
{
"01--Domains per organization": .domains,
"02--Organizations": .organizations,
}
)
EOF

# "prepared.disconnect.services.analysis.json"
cat | jq "$mapData" | jq "$renameForTsvColumnOrdering" | "$dulcifyBaseDir/util/array-of-objects-to-tsv.sh" | "$dulcifyBaseDir/util/clean-tsv-sorted-header.sh"
81 changes: 81 additions & 0 deletions data/extract/disconnect.organizations-in-more-than-one-category.sh
@@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -e

setThesisBaseDirs(){
# TODO: share this function between scripts.
local projectRoot="$(cd -- "${BASH_SOURCE%/*}"; cd -- "$(git rev-parse --git-dir)/../"; echo "$PWD")"

# TODO: use http://gitslave.sourceforge.net/ instead?
local thesisBaseDir="$projectRoot/../"
local thesisBaseDirAbsolute=$(cd -- "$thesisBaseDir"; echo "$PWD")
heedlessBaseDir="$thesisBaseDirAbsolute/har-heedless/src"
dulcifyBaseDir="$thesisBaseDirAbsolute/har-dulcify/src"
}

setThesisBaseDirs

read -d '' mapData <<-'EOF' || true
def lookupMap(from; to):
(from | explode) as $from
| (to | explode) as $to
| ($from | length) as $length
| reduce range(0; $length) as $index (
{};
($from[$index:$index + 1] | implode) as $fromChar
| ($to[$index:$index + 1] | implode) as $toChar
| .[$fromChar] = $toChar
);
def mapOrSameCharacter(lookupMap):
. as $char
| lookupMap as $lookupMap
| if ($lookupMap | has($char)) then
$lookupMap[$char]
else
$char
end;
def stringToChars:
explode
| map([ . ] | implode);
def charsToString:
join("");
def mapOrSame(lookupMap):
lookupMap as $lookupMap
| stringToChars
| map(mapOrSameCharacter($lookupMap))
| charsToString;
def toUppercase:
lookupMap("abcdefghijklmnopqrstuvwxyz"; "ABCDEFGHIJKLMNOPQRSTUVWXYZ") as $alphabetLookupMap
| mapOrSame($alphabetLookupMap);
def toLowercase:
lookupMap("ABCDEFGHIJKLMNOPQRSTUVWXYZ"; "abcdefghijklmnopqrstuvwxyz") as $alphabetLookupMap
| mapOrSame($alphabetLookupMap);
def sort_by_string_caseinsensitive(property):
# Caches the converted { sortable: (property | toLowercase), value: . } for speedups?
sort_by(property | toLowercase);
."organizations-per-category"."more-than-one".values
| to_entries
| group_by(.value)
| reverse
| map(
sort_by_string_caseinsensitive(.key)
)
| .[][]
EOF

read -d '' renameForTsvColumnOrdering <<-'EOF' || true
{
"01--Organization": .key,
"02--Categories": .value,
}
EOF

# "prepared.disconnect.services.analysis.json"
cat | jq "$mapData" | jq "$renameForTsvColumnOrdering" | "$dulcifyBaseDir/util/to-array.sh" | "$dulcifyBaseDir/util/array-of-objects-to-tsv.sh" | "$dulcifyBaseDir/util/clean-tsv-sorted-header.sh"

0 comments on commit 6e6d717

Please sign in to comment.