Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FormatURL parsing safelinks #28032

Merged
merged 9 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions Packs/CommonScripts/ReleaseNotes/1_11_99.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

#### Scripts

##### FormatURL
- Updated the Docker image to: *demisto/python3:3.10.12.63474*.

- Updated the regex for URL wrappers to allow safelinks without a scheme (i.e. - https).
31 changes: 13 additions & 18 deletions Packs/CommonScripts/Scripts/FormatURL/FormatURL.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import tldextract
import urllib.parse
from CommonServerPython import *
from typing import Match
from re import Match


class URLError(Exception):
pass


class URLType(object):
class URLType:
"""
A class to represent an url and its parts
"""
Expand All @@ -30,7 +30,7 @@ def __str__(self):
f'Path = {self.path}\nQuery = {self.query}\nFragment = {self.fragment}')


class URLCheck(object):
class URLCheck:
"""
This class will build and validate a URL based on "URL Living Standard" (https://url.spec.whatwg.org)
"""
Expand Down Expand Up @@ -268,7 +268,7 @@ def host_check(self):
elif self.modified_url[index] == "]":

if not self.inside_brackets:
if self.check_domain(host) and all([char in self.brackets for char in self.modified_url[index:]]):
if self.check_domain(host) and all(char in self.brackets for char in self.modified_url[index:]):
# Domain is valid with trailing "]" and brackets, the formatter will remove the extra chars
self.done = True
return
Expand All @@ -290,8 +290,7 @@ def host_check(self):
self.inside_brackets = False
break

else:
raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")
raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")

else:
self.output += self.modified_url[index]
Expand Down Expand Up @@ -485,11 +484,7 @@ def check_codepoint_validity(char: str) -> bool:
elif char in url_code_points:
return True

elif unicode_code_points["start"] <= char <= unicode_code_points["end"]:
return True

else:
return False
return unicode_code_points['start'] <= char <= unicode_code_points['end']

def check_domain(self, host: str) -> bool:
"""
Expand Down Expand Up @@ -597,14 +592,15 @@ def remove_leading_chars(self):
self.modified_url = self.modified_url[beginning:end + 1]


class URLFormatter(object):
class URLFormatter:

# URL Security Wrappers
ATP_regex = re.compile('https://.*?\.safelinks\.protection\.outlook\.com/\?url=(.*?)&', re.I)
ATP_regex = re.compile('.*?\.safelinks\.protection\.outlook\.com/\?url=(.*?)&', re.I)
fireeye_regex = re.compile('.*?fireeye[.]com.*?&u=(.*)', re.I)
proofpoint_regex = re.compile('(?i)(?:proofpoint.com/v[1-2]/(?:url\?u=)?(.+?)(?:&amp|&d|$)|'
'https?(?::|%3A)//urldefense[.]\w{2,3}/v3/__(.+?)(?:__;|$))')
trendmicro_regex = re.compile('https://.*?trendmicro\.com(?::443)?/wis/clicktime/.*?/?url==3d(.*?)&', re.I)
trendmicro_regex = re.compile('.*?trendmicro\.com(?::443)?/wis/clicktime/.*?/?url==3d(.*?)&', # disable-secrets-detection
re.I)

# Scheme slash fixer
scheme_fix = re.compile("https?(:[/|\\\]*)")
Expand Down Expand Up @@ -675,8 +671,7 @@ def strip_wrappers(url: str) -> str:
else:
wrapper = False

else:
return url
return url

@staticmethod
def extract_url_proofpoint(url: str) -> str:
Expand All @@ -692,7 +687,7 @@ def extract_url_proofpoint(url: str) -> str:

if url[0]:
# Proofpoint v1 and v2
return urllib.parse.unquote((url[0].replace("-", "%").replace("_", "/")))
return urllib.parse.unquote(url[0].replace("-", "%").replace("_", "/"))

else:
# Proofpoint v3
Expand All @@ -714,7 +709,7 @@ def correct_and_refang_url(url: str) -> str:
url = url.replace("[.]", ".")
url = url.replace("[:]", ":")
lower_url = url.lower()
if lower_url.startswith("hxxp") or lower_url.startswith('meow'):
if lower_url.startswith(('hxxp', 'meow')):
url = re.sub(schemas, "http", url, count=1)

def fix_scheme(match: Match) -> str:
Expand Down
2 changes: 1 addition & 1 deletion Packs/CommonScripts/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Common Scripts",
"description": "Frequently used scripts pack.",
"support": "xsoar",
"currentVersion": "1.11.98",
"currentVersion": "1.11.99",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down