Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Re-focus code on downloading URLs, albeit only domain root URLs at th…
Browse files Browse the repository at this point in the history
…is point in time
  • Loading branch information
joelpurra committed Sep 1, 2014
1 parent d0a3609 commit cf23c15
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 33 deletions.
10 changes: 3 additions & 7 deletions src/domain/parallel.sh
@@ -1,12 +1,8 @@
#!/usr/bin/env bash
set -e

[[ -z `which parallel` || ! ($(parallel --version 2>/dev/null) =~ ^GNU.parallel.*) ]] && { echo "GNU parallel is required"; exit 1; }
prefix="$1"
shift

parallelLimit="${1:-10}"

enableScreenshot=false
[[ ("$2" == "--screenshot") && ("$3" == "true") ]] && enableScreenshot=true

cat | parallel --jobs "$parallelLimit" --load "80%" --line-buffer "echo \"{}\"; \"${BASH_SOURCE%/*}/single.sh\" \"{}\" --screenshot \"$enableScreenshot\";"
cat | sed "s_.*_$prefix&/_" | "${BASH_SOURCE%/*}/../url/parallel.sh" "$@"

10 changes: 4 additions & 6 deletions src/domain/serial.sh
@@ -1,10 +1,8 @@
#!/usr/bin/env bash
set -e

enableScreenshot=false
[[ ("$1" == "--screenshot") && ("$2" == "true") ]] && enableScreenshot=true
prefix="$1"
shift

cat | sed "s_.*_$prefix&/_" | "${BASH_SOURCE%/*}/../url/serial.sh" "$@"

while read domain; do
echo "$domain"
"${BASH_SOURCE%/*}/single.sh" "$domain" --screenshot "$enableScreenshot"
done
23 changes: 3 additions & 20 deletions src/domain/single.sh
@@ -1,25 +1,8 @@
#!/usr/bin/env bash
set -e

domain="$1"
timestamp=$(date -u +%FT%TZ | tr -d ':')
url="http://$domain/"
prefix="$1"
shift

enableScreenshot=false
[[ ("$2" == "--screenshot") && ("$3" == "true") ]] && enableScreenshot=true
cat | sed "s_.*_$prefix&/_" | "${BASH_SOURCE%/*}/../url/single.sh" "$@"

outdir="./$domain"
outfilebase="$domain.$timestamp"
outpathhar="$outdir/$outfilebase.har"
outpathpng="$outdir/$outfilebase.png"

mkdir -p "$outdir"

result=$("${BASH_SOURCE%/*}/../get/har.sh" "$url" --screenshot "$enableScreenshot")

if [[ $enableScreenshot == true ]]
then
echo "$result" | jq --raw-output '.screenshot' | base64 --decode > "$outpathpng"
fi

echo "$result" | jq 'del(.screenshot)' > "$outpathhar"
12 changes: 12 additions & 0 deletions src/url/parallel.sh
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e

[[ -z `which parallel` || ! ($(parallel --version 2>/dev/null) =~ ^GNU.parallel.*) ]] && { echo "GNU parallel is required"; exit 1; }

parallelLimit="${1:-10}"

enableScreenshot=false
[[ ("$2" == "--screenshot") && ("$3" == "true") ]] && enableScreenshot=true

cat | parallel --jobs "$parallelLimit" --load "80%" --line-buffer "echo \"{}\"; \"${BASH_SOURCE%/*}/single.sh\" \"{}\" --screenshot \"$enableScreenshot\";"

10 changes: 10 additions & 0 deletions src/url/serial.sh
@@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -e

enableScreenshot=false
[[ ("$1" == "--screenshot") && ("$2" == "true") ]] && enableScreenshot=true

while read domain; do
echo "$domain"
"${BASH_SOURCE%/*}/single.sh" "$domain" --screenshot "$enableScreenshot"
done
30 changes: 30 additions & 0 deletions src/url/single.sh
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -e

url="$1"

timestamp=$(date -u +%FT%TZ | tr -d ':')

enableScreenshot=false
[[ ("$2" == "--screenshot") && ("$3" == "true") ]] && enableScreenshot=true

getDomain(){
cut -d'/' -f 3 | cut -d':' -f 1
}

domain="$(echo "$url" | getDomain)"
outdir="./$domain"
outfilebase="$domain.$timestamp"
outpathhar="$outdir/$outfilebase.har"
outpathpng="$outdir/$outfilebase.png"

mkdir -p "$outdir"

result=$("${BASH_SOURCE%/*}/../get/har.sh" "$url" --screenshot "$enableScreenshot")

if [[ $enableScreenshot == true ]]
then
echo "$result" | jq --raw-output '.screenshot' | base64 --decode > "$outpathpng"
fi

echo "$result" | jq 'del(.screenshot)' > "$outpathhar"

0 comments on commit cf23c15

Please sign in to comment.