Skip to content

Commit

Permalink
Introduction of the --no-special|-ns argument and logic.
Browse files Browse the repository at this point in the history
Indeed, before this patch the end-user could not disable the usage of the SPECIAL rule.

This patch fix #20
  • Loading branch information
funilrys committed Jan 22, 2019
1 parent 2bfe7e9 commit 7a5b64f
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 118 deletions.
2 changes: 2 additions & 0 deletions .PyFunceble_production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ local: False
logs: True
# Enable / Disable the generation of any file(s).
no_files: False
# Enable / Disable the usage of the SPECIAL rule(s).
no_special: False
# Enable / Disable the usage of whois in the tests.
no_whois: False
# Enable / Disable the URL/domain mining.
Expand Down
19 changes: 17 additions & 2 deletions PyFunceble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
# We set our project name.
NAME = "PyFunceble"
# We set out project version.
VERSION = "1.8.0.dev (Blue Bontebok: Maggot)"
VERSION = "1.9.0.dev (Blue Bontebok: Maggot)"

if "PYFUNCEBLE_OUTPUT_DIR" in environ: # pragma: no cover
# We handle the case that the `PYFUNCEBLE_OUTPUT_DIR` environnement variable is set.
Expand Down Expand Up @@ -771,6 +771,18 @@ def _command_line(): # pragma: no cover pylint: disable=too-many-branches,too-m
),
)

PARSER.add_argument(
"-ns",
"--no-special",
action="store_true",
help="Switch the value of the usage of the SPECIAL rules. %s"
% (
CURRENT_VALUE_FORMAT
+ repr(CONFIGURATION["no_special"])
+ Style.RESET_ALL
),
)

PARSER.add_argument(
"-nu",
"--no-unified",
Expand All @@ -779,7 +791,7 @@ def _command_line(): # pragma: no cover pylint: disable=too-many-branches,too-m
"under the output directory. %s"
% (
CURRENT_VALUE_FORMAT
+ repr(not CONFIGURATION["unified"])
+ repr(CONFIGURATION["unified"])
+ Style.RESET_ALL
),
)
Expand Down Expand Up @@ -1087,6 +1099,9 @@ def _command_line(): # pragma: no cover pylint: disable=too-many-branches,too-m
if ARGS.no_logs:
CONFIGURATION.update({"logs": Core.switch("logs")})

if ARGS.no_special:
CONFIGURATION.update({"no_special": Core.switch("no_special")})

if ARGS.no_unified:
CONFIGURATION.update({"unified": Core.switch("unified")})

Expand Down
214 changes: 115 additions & 99 deletions PyFunceble/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,50 +517,97 @@ def _special_blogspot(self):
Handle the blogspot SPECIAL case.
"""

# We initate a variable whith a regex which will match all blogpost domain.
regex_blogspot = ".blogspot."
if not PyFunceble.CONFIGURATION["no_special"]:
# We can run/check the special rule.

# We iniate a list of elements in the HTML which will tell us more about
# the status of the domain.
regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"]
# We initate a variable whith a regex which will match all blogpost domain.
regex_blogspot = ".blogspot."

if PyFunceble.INTERN["to_test_type"] == "domain":
# The element we are testing is a domain.
# We iniate a list of elements in the HTML which will tell us more about
# the status of the domain.
regex_blogger = ["create-blog.g?", "87065", "doesn’t exist"]

# We construct the url to get.
url_to_get = "http://%s" % self.tested
elif PyFunceble.INTERN["to_test_type"] == "url":
# The element we are testing is a URL.
if PyFunceble.INTERN["to_test_type"] == "domain":
# The element we are testing is a domain.

# We construct the url to get.
url_to_get = self.tested
else:
raise Exception("Unknow test type.")
# We construct the url to get.
url_to_get = "http://%s" % self.tested
elif PyFunceble.INTERN["to_test_type"] == "url":
# The element we are testing is a URL.

if Regex(self.tested, regex_blogspot, return_data=False, escape=True).match():
# The element we are testing is a blogspot subdomain.
# We construct the url to get.
url_to_get = self.tested
else:
raise Exception("Unknow test type.")

if Regex(
self.tested, regex_blogspot, return_data=False, escape=True
).match():
# The element we are testing is a blogspot subdomain.

# We get the HTML of the home page.
blogger_content_request = requests.get(url_to_get, headers=self.headers)

for regx in regex_blogger:
# We loop through the list of regex to match.

if (
regx in blogger_content_request.text
or Regex(
blogger_content_request.text,
regx,
return_data=False,
escape=False,
).match()
):
# The content match the currently read regex.

# We update the source.
self.source = "SPECIAL"

# We update the domain status.
self.domain_status = PyFunceble.STATUS["official"]["down"]

# We update the output file.
self.output = (
self.output_parent_dir
+ PyFunceble.OUTPUTS["splited"]["directory"]
+ self.domain_status
)

# And we break the loop as we matched something.
break

# We get the HTML of the home page.
blogger_content_request = requests.get(url_to_get, headers=self.headers)
def _special_wordpress_com(self):
"""
Handle the wordpress.com special case.
"""

for regx in regex_blogger:
# We loop through the list of regex to match.
if not PyFunceble.CONFIGURATION["no_special"]:
# We can run/check the special rule.

if (
regx in blogger_content_request.text
or Regex(
blogger_content_request.text,
regx,
return_data=False,
escape=False,
).match()
):
# The content match the currently read regex.
# We initiate the domain to match.
wordpress_com = ".wordpress.com"

# We initiate a variable which whill have to be into the HTML
# in order to be considered as inactive.
does_not_exist = "doesn’t exist"

if self.tested.endswith(wordpress_com):
# The currently tested element ends with wordpress.com.

# We get the content of the page.
wordpress_com_content = requests.get(
"http://%s:80" % self.tested, headers=self.headers
)

if does_not_exist in wordpress_com_content.text:
# The marker is into the page content.

# We update the source.
self.source = "SPECIAL"

# We update the domain status.
# We update the status.
self.domain_status = PyFunceble.STATUS["official"]["down"]

# We update the output file.
Expand All @@ -570,45 +617,6 @@ def _special_blogspot(self):
+ self.domain_status
)

# And we break the loop as we matched something.
break

def _special_wordpress_com(self):
"""
Handle the wordpress.com special case.
"""

# We initiate the domain to match.
wordpress_com = ".wordpress.com"

# We initiate a variable which whill have to be into the HTML
# in order to be considered as inactive.
does_not_exist = "doesn’t exist"

if self.tested.endswith(wordpress_com):
# The currently tested element ends with wordpress.com.

# We get the content of the page.
wordpress_com_content = requests.get(
"http://%s:80" % self.tested, headers=self.headers
)

if does_not_exist in wordpress_com_content.text:
# The marker is into the page content.

# We update the source.
self.source = "SPECIAL"

# We update the status.
self.domain_status = PyFunceble.STATUS["official"]["down"]

# We update the output file.
self.output = (
self.output_parent_dir
+ PyFunceble.OUTPUTS["splited"]["directory"]
+ self.domain_status
)

def up_status_file(self):
"""
Logic behind the up status when generating the status file.
Expand All @@ -633,38 +641,41 @@ def up_status_file(self):
# We generate the analytics files.
self.analytic_file("potentially_down", self.domain_status)

# We initiate a list of domain which are actually.
# down if they return for example 404 as status code.
domain_to_match = [
".canalblog.com",
".doubleclick.net",
".liveadvert.com",
".skyrock.com",
".tumblr.com",
]
if not PyFunceble.CONFIGURATION["no_special"]:
# We can run/check the special rule.

# We initiate a list of domain which are actually.
# down if they return for example 404 as status code.
domain_to_match = [
".canalblog.com",
".doubleclick.net",
".liveadvert.com",
".skyrock.com",
".tumblr.com",
]

for domain_to_handle in domain_to_match:
# We loop through the list of of domain to handle.
for domain_to_handle in domain_to_match:
# We loop through the list of of domain to handle.

if self.tested.endswith(domain_to_handle):
# The currently tested domain is endswith
# the curerntly read domain to handle.
if self.tested.endswith(domain_to_handle):
# The currently tested domain is endswith
# the curerntly read domain to handle.

# We update the source.
self.source = "SPECIAL"
# We update the source.
self.source = "SPECIAL"

# We update the status.
self.domain_status = PyFunceble.STATUS["official"]["down"]
# We update the status.
self.domain_status = PyFunceble.STATUS["official"]["down"]

# We update the output file.s
self.output = (
self.output_parent_dir
+ PyFunceble.OUTPUTS["splited"]["directory"]
+ self.domain_status
)
# We update the output file.s
self.output = (
self.output_parent_dir
+ PyFunceble.OUTPUTS["splited"]["directory"]
+ self.domain_status
)

# We check again the special blogspot case.
self._special_blogspot()
# We check again the special blogspot case.
self._special_blogspot()
elif (
PyFunceble.HTTP_CODE["active"]
and PyFunceble.INTERN["http_code"]
Expand Down Expand Up @@ -754,8 +765,13 @@ def down_status_file(self):
# We generate the analytic file(s).
self.analytic_file("potentially_up", self.domain_status)

if Check(self.tested).is_ip_range():
# The element we are currently testing is an IPv4 with range.
if (
not PyFunceble.CONFIGURATION["no_special"]
and Check(self.tested).is_ip_range()
):
# * We can run/check the special rule.
# and
# * The element we are currently testing is an IPv4 with range.

# We update the source.
self.source = "SPECIAL"
Expand Down
9 changes: 9 additions & 0 deletions docs/configuration/core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@

**Description:** Enable / Disable the generation of any file(s).

:code:`no_special`
----------------

**Type:** :code:`boolean`

**Default value:** :code:`False`

**Description:** Enable / Disable the usage of the SPECIAL rules - which are discribes in the source column section.

:code:`no_whois`
----------------

Expand Down

0 comments on commit 7a5b64f

Please sign in to comment.