diff --git a/src/classification/basic.sh b/src/classification/basic.sh index 37fc1bc..5f9e23b 100755 --- a/src/classification/basic.sh +++ b/src/classification/basic.sh @@ -16,22 +16,22 @@ def isSecure: def classifyUrl(origin): origin as $origin | { - isSameDomain: (.domain | isSameDomain($origin.domain)), - isSubdomain: (.domain | isSubdomain($origin.domain)), + # TODO: work on .domain.parts, not .domain.original? + isSameDomain: (.domain.original | isSameDomain($origin.domain.original)), + isSubdomain: (.domain.original | isSubdomain($origin.domain.original)), isSecure: (.protocol | isSecure) }; def mangle(origin): origin as $origin - | .url as $urlParts | . + { - classification : $urlParts | classifyUrl($origin) + classification : .url | classifyUrl($origin) }; .origin.url as $origin | { origin: .origin | mangle($origin), - requestedUrls: .requestedUrls | map(mangle(($origin))) + requestedUrls: .requestedUrls | map(mangle($origin)) } EOF diff --git a/src/classification/disconnect/add.sh b/src/classification/disconnect/add.sh index debdcfd..851307e 100755 --- a/src/classification/disconnect/add.sh +++ b/src/classification/disconnect/add.sh @@ -35,13 +35,8 @@ def deleteEmptyArrayKey(key): def matchDisconnect: # Match the domain to disconnect's list. # If the domain is a subdomain of a domain in disconnect's list, include it too. - . as $domain - | ($domain | split(".")) as $domainParts - # Negative range to build the domain from parts from the right. - | [ range((($domainParts | length) * -1); -1) ] - | map( - # Assemble the domain, longest domain combination first. - ($domainParts[.:] | join(".")) as $subdomain + map( + . as $subdomain | if $disconnect | has($subdomain) then ( # Inject the matched service domain into the returned object. @@ -59,7 +54,7 @@ def matchDisconnect: def mangle: .blocks += ({ - disconnect: .url.domain | matchDisconnect + disconnect: .url.domain.parts | matchDisconnect } | deleteEmptyArrayKey("disconnect")) | deleteNullKey("blocks"); diff --git a/src/extract/request/expand-parts.sh b/src/extract/request/expand-parts.sh index 43211d2..55ddde1 100755 --- a/src/extract/request/expand-parts.sh +++ b/src/extract/request/expand-parts.sh @@ -2,6 +2,24 @@ set -e read -d '' expandParts <<-'EOF' || true +def splitDomainToPartsArray: + split(".") as $domainParts + # Negative range to build the domain from parts from the right. + | [ range((($domainParts | length) * -1); 0) ] + | map( + # Assemble the domain, longest domain combination first. + $domainParts[.:] | join(".") + ); + +def splitDomainToParts: + . as $domain + | splitDomainToPartsArray as $domainParts + | { + original: $domain, + parts: $domainParts, + tld: $domainParts[-1:][0] + }; + def splitUrlToParts: split("://") as $protocolParts | if ($protocolParts | length) == 1 then @@ -12,18 +30,10 @@ def splitUrlToParts: { original: ., protocol: $protocolParts[0], - domain: ($protocolParts[1] | split("/")[0]) + domain: ($protocolParts[1] | split("/")[0] | splitDomainToParts) } end; -def classifyUrl(origin): - origin as $origin - | { - isSameDomain: (.domain == $origin.domain), - isSubdomain: ((.domain // "") | endswith("." + $origin.domain)), - isSecure: (.protocol == "https") - }; - def trim(str): str as $str | ltrimstr($str) | rtrimstr($str); @@ -52,9 +62,8 @@ def splitMime: } | deleteNullKeys; -def mangle(origin): - origin as $origin - | (.url | splitUrlToParts) as $urlParts +def mangle: + (.url | splitUrlToParts) as $urlParts | { url: $urlParts, status: .status, @@ -64,10 +73,9 @@ def mangle(origin): } | deleteNullKeys; -(.origin.url | splitUrlToParts) as $origin -| { - origin: .origin | mangle($origin), - requestedUrls: .requestedUrls | map(mangle(($origin))) +{ + origin: .origin | mangle, + requestedUrls: .requestedUrls | map(mangle) } EOF