Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added client check failure handling

  • Loading branch information...
commit c13d88587d73f1cd470d307194242e75e0cb2209 1 parent beb4e17
@desbma desbma authored
Showing with 21 additions and 5 deletions.
  1. +13 −5 ddc_client.py
  2. +4 −0 ddc_process.py
  3. +4 −0 ddc_server.py
View
18 ddc_client.py
@@ -71,18 +71,26 @@ def start(self,):
# check domains
logging.getLogger().info("Got %d domains to check from server" % (domain_count) )
- domains_state = [ False for i in range(domain_count) ]
+ spam_domain_indexes = set()
+ failed_domain_indexes = set()
for (i, xml_domain) in enumerate(xml_domains):
domain = xml_domain.get("name")
logging.getLogger().debug("Checking domain '%s'" % (domain) )
- domains_state[i] = ddc_process.is_spam(domain)
- # TODO should add a special XML attribute for when a domain check fails (network, etc.)
+ try:
+ if ddc_process.is_spam(domain):
+ spam_domain_indexes.add(i)
+ except ddc_process.FailedAnalysis:
+ failed_domain_indexes.add(i)
# prepare POST request content
xml_root = xml.etree.ElementTree.Element("ddc")
xml_domain_list = xml_response.find("domainlist") # reuse the previous XML domain list
- for (xml_domain, is_spam) in zip(xml_domain_list.iterfind("domain"),domains_state):
- xml_domain.set("spam",str(int(is_spam)))
+ for (i, xml_domain) in enumerate(xml_domain_list.iterfind("domain")):
+ if i in failed_domain_indexes:
+ xml_domain.set("failed","1")
+ else:
+ is_spam = (i in spam_domain_indexes)
+ xml_domain.set("spam",str(int(is_spam)))
xml_root.append(xml_domain_list)
# send POST request
View
4 ddc_process.py
@@ -7,6 +7,10 @@
VERSION = 1
+class FailedAnalysis(Exception):
+ pass
+
+
def is_spam(domain):
# returns dummy result, but consistent for a domain
hasher = hashlib.md5()
View
4 ddc_server.py
@@ -248,6 +248,10 @@ def do_POST(self):
# read domain analysis results
for xml_domain in xml_post_data.iterfind("domainlist/domain"):
domain = xml_domain.get("name")
+ if xml_domain.get("failed") == "1":
+ logging.getLogger().warning("Client failed to check domain '%s'" % (domain) )
+ # TODO exclude domain if too many clients have fail too check it?
+ continue
logging.getLogger().debug("Got client analysis for domain '%s'" % (domain) )
is_spam = (xml_domain.get("spam") == "1")
if domain in DistributedCrawlerServer.checked_domains:
Please sign in to comment.
Something went wrong with that request. Please try again.