Skip to content
This repository has been archived by the owner on Jan 10, 2019. It is now read-only.

Commit

Permalink
Added client check failure handling
Browse files Browse the repository at this point in the history
  • Loading branch information
desbma committed Feb 16, 2012
1 parent beb4e17 commit c13d885
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
18 changes: 13 additions & 5 deletions ddc_client.py
Expand Up @@ -71,18 +71,26 @@ def start(self,):

# check domains
logging.getLogger().info("Got %d domains to check from server" % (domain_count) )
domains_state = [ False for i in range(domain_count) ]
spam_domain_indexes = set()
failed_domain_indexes = set()
for (i, xml_domain) in enumerate(xml_domains):
domain = xml_domain.get("name")
logging.getLogger().debug("Checking domain '%s'" % (domain) )
domains_state[i] = ddc_process.is_spam(domain)
# TODO should add a special XML attribute for when a domain check fails (network, etc.)
try:
if ddc_process.is_spam(domain):
spam_domain_indexes.add(i)
except ddc_process.FailedAnalysis:
failed_domain_indexes.add(i)

# prepare POST request content
xml_root = xml.etree.ElementTree.Element("ddc")
xml_domain_list = xml_response.find("domainlist") # reuse the previous XML domain list
for (xml_domain, is_spam) in zip(xml_domain_list.iterfind("domain"),domains_state):
xml_domain.set("spam",str(int(is_spam)))
for (i, xml_domain) in enumerate(xml_domain_list.iterfind("domain")):
if i in failed_domain_indexes:
xml_domain.set("failed","1")
else:
is_spam = (i in spam_domain_indexes)
xml_domain.set("spam",str(int(is_spam)))
xml_root.append(xml_domain_list)

# send POST request
Expand Down
4 changes: 4 additions & 0 deletions ddc_process.py
Expand Up @@ -7,6 +7,10 @@
VERSION = 1


class FailedAnalysis(Exception):
pass


def is_spam(domain):
# returns dummy result, but consistent for a domain
hasher = hashlib.md5()
Expand Down
4 changes: 4 additions & 0 deletions ddc_server.py
Expand Up @@ -248,6 +248,10 @@ def do_POST(self):
# read domain analysis results
for xml_domain in xml_post_data.iterfind("domainlist/domain"):
domain = xml_domain.get("name")
if xml_domain.get("failed") == "1":
logging.getLogger().warning("Client failed to check domain '%s'" % (domain) )
# TODO exclude domain if too many clients have fail too check it?
continue
logging.getLogger().debug("Got client analysis for domain '%s'" % (domain) )
is_spam = (xml_domain.get("spam") == "1")
if domain in DistributedCrawlerServer.checked_domains:
Expand Down

0 comments on commit c13d885

Please sign in to comment.