Skip to content

Commit

Permalink
Added option to remove redundant subdomains. Implements #68
Browse files Browse the repository at this point in the history
  • Loading branch information
hectorm committed Jan 17, 2021
1 parent 261850d commit 21b4c3d
Show file tree
Hide file tree
Showing 52 changed files with 427 additions and 39 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -34,7 +34,7 @@ The latest available version can also be installed manually by running the follo

```sh
curl -o /tmp/hblock 'https://raw.githubusercontent.com/hectorm/hblock/v3.1.3/hblock' \
&& echo '95dc0cd1c38ad97c1f929a7d18587a07b184b9357aad35d2c4d509ae0efb038b /tmp/hblock' | shasum -c \
&& echo '7e074fdbffd22fd7ad69aa21246840ef091089cfe62b37cb14de6fe8c700bda3 /tmp/hblock' | shasum -c \
&& sudo mv /tmp/hblock /usr/local/bin/hblock \
&& sudo chown 0:0 /usr/local/bin/hblock \
&& sudo chmod 755 /usr/local/bin/hblock
Expand Down
35 changes: 35 additions & 0 deletions hblock
Expand Up @@ -96,6 +96,7 @@ optParse() {
'-C'*|'--comment') optArgStr "${@-}"; comment="${optArg?}"; shift "${optShift:?}" ;;
'-l' |'--lenient'|'--no-lenient') optArgBool "${@-}"; lenient="${optArg:?}" ;;
'-r' |'--regex'|'--no-regex') optArgBool "${@-}"; regex="${optArg:?}" ;;
'-f' |'--filter-subdomains'|'--no-filter-subdomains') optArgBool "${@-}"; filterSubdomains="${optArg:?}" ;;
'-c' |'--continue'|'--no-continue') optArgBool "${@-}"; continue="${optArg:?}" ;;
'-q' |'--quiet'|'--no-quiet') optArgBool "${@-}"; quiet="${optArg:?}" ;;
'-x'*|'--color') optArgStr "${@-}"; color="${optArg?}"; shift "${optShift:?}" ;;
Expand Down Expand Up @@ -215,6 +216,11 @@ showHelp() {
-r, --[no-]regex, \${HBLOCK_REGEX}%NL
Use POSIX BREs in the allowlist instead of fixed strings.%NL
(default: ${regex?})%NL
-f, --[no-]filter-subdomains, \${HBLOCK_FILTER_SUBDOMAINS}%NL
Do not include subdomains when the parent domain is also blocked.
Useful for reducing the blocklist size in cases such as when DNS blocking
makes these subdomains redundant.%NL
(default: ${filterSubdomains?})%NL
-c, --[no-]continue, \${HBLOCK_CONTINUE}%NL
Do not abort if a download error occurs.%NL
(default: ${continue?})%NL
Expand Down Expand Up @@ -400,6 +406,9 @@ main() {
# Use POSIX BREs instead of fixed strings.
regex="${HBLOCK_REGEX-"false"}"

# Do not include subdomains when the parent domain is also blocked.
filterSubdomains="${HBLOCK_FILTER_SUBDOMAINS-"false"}"

# Abort if a download error occurs.
continue="${HBLOCK_CONTINUE-"false"}"

Expand Down Expand Up @@ -578,6 +587,32 @@ main() {
-- "${blocklistFile:?}" > "${blocklistFile:?}.aux" \
&& mv -f -- "${blocklistFile:?}.aux" "${blocklistFile:?}"

if [ "${filterSubdomains:?}" = 'true' ]; then
printList 'Removing redundant subdomains'
awkReverseScript="$(cat <<-'EOF'
BEGIN { FS = "." }
{
for (i = NF; i > 0; i--) {
printf("%s%s", $i, (i > 1 ? FS : RS))
}
}
EOF
)"
awkFilterScript="$(cat <<-'EOF'
BEGIN { p = "." }
{
if (index($0, p) != 1) {
print($0); p = $0"."
}
}
EOF
)"
awk "${awkReverseScript:?}" < "${blocklistFile:?}" \
| sort | awk "${awkFilterScript:?}" \
| awk "${awkReverseScript:?}" > "${blocklistFile:?}.aux" \
&& mv -f -- "${blocklistFile:?}.aux" "${blocklistFile:?}"
fi

printList 'Sorting entries'
sort -- "${blocklistFile:?}" | uniq | sed -e '/^$/d' > "${blocklistFile:?}.aux" \
&& mv -f -- "${blocklistFile:?}.aux" "${blocklistFile:?}"
Expand Down
8 changes: 8 additions & 0 deletions hblock.1
Expand Up @@ -148,6 +148,14 @@ Use POSIX BREs in the allowlist instead of fixed strings.
.IP
(default: false)
.HP
\fB\-f\fR, \fB\-\-[no\-]filter\-subdomains\fR, ${HBLOCK_FILTER_SUBDOMAINS}
.IP
Do not include subdomains when the parent domain is also blocked.
Useful for reducing the blocklist size in cases such as when DNS blocking
makes these subdomains redundant.
.IP
(default: false)
.HP
\fB\-c\fR, \fB\-\-[no\-]continue\fR, ${HBLOCK_CONTINUE}
.IP
Do not abort if a download error occurs.
Expand Down
8 changes: 8 additions & 0 deletions hblock.1.md
Expand Up @@ -162,6 +162,14 @@ ${HBLOCK\_DENYLIST\_FILE}
>
> (default: false)
**-f**, **--\[no-\]filter-subdomains**, ${HBLOCK\_FILTER\_SUBDOMAINS}

> Do not include subdomains when the parent domain is also blocked.
> Useful for reducing the blocklist size in cases such as when DNS
> blocking makes these subdomains redundant.
>
> (default: false)
**-c**, **--\[no-\]continue**, ${HBLOCK\_CONTINUE}

> Do not abort if a download error occurs.
Expand Down
2 changes: 1 addition & 1 deletion hblock.sha256
@@ -1 +1 @@
95dc0cd1c38ad97c1f929a7d18587a07b184b9357aad35d2c4d509ae0efb038b hblock
7e074fdbffd22fd7ad69aa21246840ef091089cfe62b37cb14de6fe8c700bda3 hblock
1 change: 1 addition & 0 deletions resources/alt-formats/adblock.txt.sh
Expand Up @@ -25,6 +25,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='false'
export HBLOCK_CONTINUE='false'

"${hblock:?}" -qO "${target:?}"
Expand Down
1 change: 1 addition & 0 deletions resources/alt-formats/dnsmasq.conf.sh
Expand Up @@ -25,6 +25,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='true'
export HBLOCK_CONTINUE='false'

"${hblock:?}" -qO "${target:?}"
Expand Down
1 change: 1 addition & 0 deletions resources/alt-formats/domains.txt.sh
Expand Up @@ -25,6 +25,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='false'
export HBLOCK_CONTINUE='false'

"${hblock:?}" -qO "${target:?}"
Expand Down
1 change: 1 addition & 0 deletions resources/alt-formats/rpz.txt.sh
Expand Up @@ -31,6 +31,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='false'
export HBLOCK_CONTINUE='false'

"${hblock:?}" -qO "${target:?}"
Expand Down
1 change: 1 addition & 0 deletions resources/alt-formats/unbound.conf.sh
Expand Up @@ -25,6 +25,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='true'
export HBLOCK_CONTINUE='false'

"${hblock:?}" -qO "${target:?}"
Expand Down
1 change: 1 addition & 0 deletions resources/alt-formats/windows.ps1.sh
Expand Up @@ -71,6 +71,7 @@ main() {

export HBLOCK_LENIENT='false'
export HBLOCK_REGEX='false'
export HBLOCK_FILTER_SUBDOMAINS='false'
export HBLOCK_CONTINUE='false'

CR="$(printf '\rx')"; CR="${CR%x}"
Expand Down
Expand Up @@ -18,6 +18,15 @@ single-entry-013.localdomain
single-entry-014.localhost
single-entry-015.test
single-entry-016.com single-entry-016.com
sub-001.single-entry-017.com
sub-000.single-entry-017.com
sub-000.sub-000.sub-000.single-entry-017.com
single-entry-017.com
sub-000.sub-000.single-entry-017.com
sub-000.single-entry-018.com
sub-000.sub-000.single-entry-018.com
sub-001.single-entry-018.com
sub-000.sub-000.sub-000.single-entry-019.com

# Entries with prefix
0.0.0.0 entry-with-prefix-000.com
Expand Down
File renamed without changes.
11 changes: 10 additions & 1 deletion resources/tests/test-main-argument-double-dash-arg.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 17
# Blocked domains: 26

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -31,4 +31,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
2 changes: 1 addition & 1 deletion resources/tests/test-main-argument-double-dash-arg.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"

printf -- 'Test - Main - Argument: Double dash argument\n'
actual="$(runInTestShell "${SCRIPT_DIR:?}/../../hblock" -qO- -- -v)"
Expand Down
2 changes: 1 addition & 1 deletion resources/tests/test-main-argument-invalid-long-opt.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"

printf -- 'Test - Main - Argument: Invalid long option\n'
actual="$(runInTestShell "${SCRIPT_DIR:?}/../../hblock" -qO- --invalid='VALUE')"
Expand Down
2 changes: 1 addition & 1 deletion resources/tests/test-main-argument-invalid-short-opt.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"

printf -- 'Test - Main - Argument: Invalid short option\n'
actual="$(runInTestShell "${SCRIPT_DIR:?}/../../hblock" -qO- -i 'VALUE')"
Expand Down
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-allowlist-builtin.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 17
# Blocked domains: 26

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -31,4 +31,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-allowlist-none.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 17
# Blocked domains: 26

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -31,4 +31,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-allowlist.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 15
# Blocked domains: 24

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -29,4 +29,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
2 changes: 1 addition & 1 deletion resources/tests/test-main-feature-allowlist.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"
export HBLOCK_ALLOWLIST_FILE=''

printf -- 'Test - Main - Allowlist: "-A" short option\n'
Expand Down
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-comment.out
@@ -1,5 +1,5 @@
# % Generated with: https://github.com/hectorm/hblock
# % Blocked domains: 17
# % Blocked domains: 26

# % BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -31,4 +31,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# % END BLOCKLIST
2 changes: 1 addition & 1 deletion resources/tests/test-main-feature-comment.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"
export HBLOCK_COMMENT='#'

printf -- 'Test - Main - Comment: "-C" short option\n'
Expand Down
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-denylist-builtin.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 17
# Blocked domains: 26

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -31,4 +31,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-denylist-none.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 16
# Blocked domains: 25

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -30,4 +30,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
11 changes: 10 additions & 1 deletion resources/tests/test-main-feature-denylist.out
@@ -1,5 +1,5 @@
# Generated with: https://github.com/hectorm/hblock
# Blocked domains: 18
# Blocked domains: 27

# BEGIN HEADER
127.0.0.1 localhost hblock
Expand Down Expand Up @@ -32,4 +32,13 @@ ff02::3 ip6-allhosts
0.0.0.0 single-entry-001.com
0.0.0.0 single-entry-002.com
0.0.0.0 single-entry-003.com
0.0.0.0 single-entry-017.com
0.0.0.0 sub-000.single-entry-017.com
0.0.0.0 sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.single-entry-018.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-017.com
0.0.0.0 sub-000.sub-000.sub-000.single-entry-019.com
0.0.0.0 sub-001.single-entry-017.com
0.0.0.0 sub-001.single-entry-018.com
# END BLOCKLIST
2 changes: 1 addition & 1 deletion resources/tests/test-main-feature-denylist.sh
Expand Up @@ -13,7 +13,7 @@ SCRIPT_DIR="$(CDPATH='' cd -- "$(dirname -- "${0:?}")" && pwd -P)"
. "${SCRIPT_DIR:?}"/env.sh

main() {
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/sources.txt"
export HBLOCK_SOURCES="file://${SCRIPT_DIR:?}/test-domains-source.txt"
export HBLOCK_DENYLIST_FILE=''

printf -- 'Test - Main - Denylist: "-D" short option\n'
Expand Down

0 comments on commit 21b4c3d

Please sign in to comment.