Skip to content

Commit

Permalink
.functions: Improve httpcompression and move it to its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
alrra authored and dndhm committed Nov 11, 2021
1 parent 926829b commit e2a2028
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 6 deletions.
6 changes: 0 additions & 6 deletions .functions
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,6 @@ function gz() {
printf "gzip: %d bytes (%2.2f%%)\n" "$gzipsize" "$ratio"
}

# Test if HTTP compression (RFC 2616 + SDCH) is enabled for a given URL.
# Send a fake UA string for sites that sniff it instead of using the Accept-Encoding header. (Looking at you, ajax.googleapis.com!)
function httpcompression() {
local encoding="$(curl -LIs -H 'User-Agent: Mozilla/5 Gecko' -H 'Accept-Encoding: gzip,deflate,compress,sdch' "$1" | grep '^Content-Encoding:')" && echo "$1 is encoded using ${encoding#* }" || echo "$1 is not using any encoding"
}

# Syntax-highlight JSON strings or files
# Usage: `json '{"foo":42}'` or `echo '{"foo":42}' | json`
function json() {
Expand Down
127 changes: 127 additions & 0 deletions bin/httpcompression
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env bash

# Test if HTTP compression (RFC 2616 + SDCH) is enabled for a given URL

declare -r hUA="Mozilla/5.0 Gecko"
declare -r hAE="Accept-Encoding: gzip, deflate, sdch"
declare -r maxConTime=15
declare -r maxTime=30

declare availDicts="" dict="" dictClientID="" dicts="" headers="" i="" \
indent="" url="" encoding="" urlHeaders=""

headers="$( curl --connect-timeout $maxConTime \
-A "$hUA" `# Send a fake UA string for sites
# that sniff it instead of using
# the Accept-Encoding header` \
-D - `# Get response headers` \
-H "$hAE" \
-L `# If the page was moved to a different
# location, redo the request` \
-m $maxTime \
-s `# Don\'t show the progress meter` \
-S `# Show error messages` \
-o /dev/null `# Ignore content` \
"$1" )" \
&& ( \

url="$1"

# Iterate over the headers of all redirects
while [ -n "$headers" ]; do

# Get headers for the "current" URL
urlHeaders="$( printf "%s" "$headers" |
sed -n '1,/^HTTP/p' )"

# Remove the headers for the "current" URL
headers="${headers/"$urlHeaders"/}"

# ----------------------------------------------------------------------
# | SDCH |
# ----------------------------------------------------------------------

# SDCH Specification:
# - www.blogs.zeenor.com/wp-content/uploads/2011/01/Shared_Dictionary_Compression_over_HTTP.pdf

# Check if the server advertised any dictionaries
dicts="$( printf "%s" "$urlHeaders" |
grep -i 'Get-Dictionary:' |
cut -d':' -f2 |
sed s/,/\ /g )"

if [ -n "$dicts" ]; then

availDicts=""
dict=""

for i in $dicts; do

# Check If the dictionary location is specified as a path,
# and if so, construct it's URL from the host name of the
# referrer URL
[[ "$i" != http* ]] \
&& dict="$(printf "$url" |
sed -En 's/([^/]*\/\/)?([^/]*)\/?.*/\1\2/p')"

dict="$dict$i"

# Request the dictionaries from the server and
# construct the `Avail-Dictionary` header value
#
# [ The user agent identifier for a dictionary is defined
# as the URL-safe base64 encoding (as described in RFC
# 3548, section 4 [RFC3548]) of the first 48 bits (bits
# 0..47) of the dictionary's SHA-256 digest ]
#
dictClientID="$( curl --connect-timeout $maxConTime \
-A "$hUA" -LsS -m $maxTime "$dict" |
openssl dgst -sha256 -binary |
openssl base64 |
cut -c 1-8 |
sed -e 's/\+/-/' -e 's/\//_/' )"

[ -n $availDicts ] && availDicts="$adics,$dictClientID" \
|| availDicts="$dictClientID"

done

# Redo the request (advertising the available dictionaries)
# and replace the old resulted headers with the new ones
urlHeaders="$( curl --connect-timeout $maxConTime \
-A "$hUA" -D - -H "$hAE" \
-H "Avail-Dictionary: $availDicts" \
-m $maxTime -o /dev/null -sS "$1" )"
fi

# ----------------------------------------------------------------------

# Get the content encoding header values
encoding="$( printf "%s" "$urlHeaders" |
grep -i 'Content-Encoding:' |
cut -d' ' -f2 |
tr "\r" "," |
tr -d "\n" |
sed 's/,$//' )"

[ -n "$encoding" ] && encoding="[$encoding]"

# Print the output for the "current" URL
if [ "$url" != "$1" ]; then
printf "%s\n" "$indent$url $encoding"
indent=" "$indent
else
printf "\n%s\n" " $1 $encoding"
indent=""
fi

# Get the next URL value
url="$( printf "%s" "$urlHeaders" |
grep -i 'Location' |
sed -e 's/Location://' |
tr -d '\r' )"

done
printf "\n"

) || printf ""

0 comments on commit e2a2028

Please sign in to comment.