Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Split domains to parts
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed Jun 27, 2014
1 parent a4034e7 commit ab91940
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 29 deletions.
10 changes: 5 additions & 5 deletions src/classification/basic.sh
Expand Up @@ -16,22 +16,22 @@ def isSecure:
def classifyUrl(origin):
origin as $origin
| {
isSameDomain: (.domain | isSameDomain($origin.domain)),
isSubdomain: (.domain | isSubdomain($origin.domain)),
# TODO: work on .domain.parts, not .domain.original?
isSameDomain: (.domain.original | isSameDomain($origin.domain.original)),
isSubdomain: (.domain.original | isSubdomain($origin.domain.original)),
isSecure: (.protocol | isSecure)
};
def mangle(origin):
origin as $origin
| .url as $urlParts
| . + {
classification : $urlParts | classifyUrl($origin)
classification : .url | classifyUrl($origin)
};
.origin.url as $origin
| {
origin: .origin | mangle($origin),
requestedUrls: .requestedUrls | map(mangle(($origin)))
requestedUrls: .requestedUrls | map(mangle($origin))
}
EOF

Expand Down
11 changes: 3 additions & 8 deletions src/classification/disconnect/add.sh
Expand Up @@ -35,13 +35,8 @@ def deleteEmptyArrayKey(key):
def matchDisconnect:
# Match the domain to disconnect's list.
# If the domain is a subdomain of a domain in disconnect's list, include it too.
. as $domain
| ($domain | split(".")) as $domainParts
# Negative range to build the domain from parts from the right.
| [ range((($domainParts | length) * -1); -1) ]
| map(
# Assemble the domain, longest domain combination first.
($domainParts[.:] | join(".")) as $subdomain
map(
. as $subdomain
| if $disconnect | has($subdomain) then
(
# Inject the matched service domain into the returned object.
Expand All @@ -59,7 +54,7 @@ def matchDisconnect:
def mangle:
.blocks += ({
disconnect: .url.domain | matchDisconnect
disconnect: .url.domain.parts | matchDisconnect
}
| deleteEmptyArrayKey("disconnect"))
| deleteNullKey("blocks");
Expand Down
40 changes: 24 additions & 16 deletions src/extract/request/expand-parts.sh
Expand Up @@ -2,6 +2,24 @@
set -e

read -d '' expandParts <<-'EOF' || true
def splitDomainToPartsArray:
split(".") as $domainParts
# Negative range to build the domain from parts from the right.
| [ range((($domainParts | length) * -1); 0) ]
| map(
# Assemble the domain, longest domain combination first.
$domainParts[.:] | join(".")
);
def splitDomainToParts:
. as $domain
| splitDomainToPartsArray as $domainParts
| {
original: $domain,
parts: $domainParts,
tld: $domainParts[-1:][0]
};
def splitUrlToParts:
split("://") as $protocolParts
| if ($protocolParts | length) == 1 then
Expand All @@ -12,18 +30,10 @@ def splitUrlToParts:
{
original: .,
protocol: $protocolParts[0],
domain: ($protocolParts[1] | split("/")[0])
domain: ($protocolParts[1] | split("/")[0] | splitDomainToParts)
}
end;
def classifyUrl(origin):
origin as $origin
| {
isSameDomain: (.domain == $origin.domain),
isSubdomain: ((.domain // "") | endswith("." + $origin.domain)),
isSecure: (.protocol == "https")
};
def trim(str):
str as $str
| ltrimstr($str) | rtrimstr($str);
Expand Down Expand Up @@ -52,9 +62,8 @@ def splitMime:
}
| deleteNullKeys;
def mangle(origin):
origin as $origin
| (.url | splitUrlToParts) as $urlParts
def mangle:
(.url | splitUrlToParts) as $urlParts
| {
url: $urlParts,
status: .status,
Expand All @@ -64,10 +73,9 @@ def mangle(origin):
}
| deleteNullKeys;
(.origin.url | splitUrlToParts) as $origin
| {
origin: .origin | mangle($origin),
requestedUrls: .requestedUrls | map(mangle(($origin)))
{
origin: .origin | mangle,
requestedUrls: .requestedUrls | map(mangle)
}
EOF

Expand Down

0 comments on commit ab91940

Please sign in to comment.