Permalink
Browse files

Initial client/server HTTP protocol

  • Loading branch information...
1 parent 4d6a87a commit c08c4e0c00d93a7b99f388554fb0f93a48c8aee5 @desbma desbma committed Jan 8, 2012
Showing with 145 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +60 −0 ddc_client.py
  3. +6 −0 ddc_process.py
  4. +60 −0 ddc_server.py
  5. +18 −0 test.sh
View
@@ -0,0 +1 @@
+/__pycache__
View
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse, logging, urllib.parse, urllib.request
+import ddc_process
+
+
+class DistributedCrawlerClient():
+
+ PROTOCOL_VERSION = 1
+ PROCESS_COMPONENT_VERSION = 1
+
+ def __init__(self,server,port):
+ self.base_url = "http://%s:%d/rest" % (server,port)
+
+ def start(self,):
+ # see ddc_server.py for params description
+ response = self.request({ 'action' : 'getdomains',
+ 'version' : str(self.PROTOCOL_VERSION),
+ 'pc_version' : str(self.PROCESS_COMPONENT_VERSION) }).decode("utf-8")
+ print(response)
+
+ def request(self,params):
+ # construct url
+ url = "%s?%s" % (self.base_url,urllib.parse.urlencode(params))
+ # send request
+ logging.getLogger().debug("Fetching '%s' ..." % (url) )
+ response = urllib.request.urlopen(url)
+ # read response
+ content = response.read()
+ response.close()
+ return content
+
+
+if __name__ == '__main__':
+
+ # setup logger
+ logger = logging.getLogger()
+ logger.setLevel(logging.DEBUG)
+
+ # parse args
+ cli_parser = argparse.ArgumentParser()
+ cli_parser.add_argument("-s",
+ "--server",
+ action="store",
+ required=True,
+ dest="server",
+ help="Server IP or domain to connect to")
+ cli_parser.add_argument("-p",
+ "--port",
+ action="store",
+ required=True,
+ type=int,
+ dest="port",
+ help="Network port to use to communicate with server")
+ options = cli_parser.parse_args()
+
+ # start client
+ client = DistributedCrawlerClient(options.server,options.port)
+ client.start()
View
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+
+def is_spam(domain):
+ return len(strip(domain))%2 > 0
View
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse, http.server, logging, urllib.parse
+
+
+class DistributedCrawlerServer(http.server.HTTPServer):
+
+ PROTOCOL_VERSION = 1
+
+ def __init__(self,port):
+ super().__init__(("127.0.0.1",port),RequestHandler)
+
+ def start(self):
+ self.serve_forever()
+
+
+class RequestHandler(http.server.BaseHTTPRequestHandler):
+
+ server_version = "DDC Server" # overrides http.server.BaseHTTPRequestHandler.server_version
+
+ def do_GET(self):
+ # parse request url
+ parsed_url = urllib.parse.urlsplit(self.path)
+
+ if parsed_url.path == "/upgrade":
+ # serve file (might short-circuit that part with an Apache/Nginx URL rediretion directly to the static content)
+ pass
+ elif parsed_url.path == "/rest":
+ # handle parameters
+ params = urllib.parse.parse_qs(parsed_url.query)
+ if "action" in params and params["action"] and params["action"][0] == "getdomains":
+ # TODO
+ pass
+ self.send_response(200,"I'm the server")
+ else:
+ # buggy client, crawler, or someone else we don't care about...
+ self.send_error(404)
+
+
+if __name__ == "__main__":
+
+ # setup logger
+ logger = logging.getLogger()
+ logger.setLevel(logging.DEBUG)
+
+ # parse args
+ cli_parser = argparse.ArgumentParser()
+ cli_parser.add_argument("-p",
+ "--port",
+ action="store",
+ required=True,
+ type=int,
+ dest="port",
+ help="Network port to use to communicate with clients")
+ options = cli_parser.parse_args()
+
+ # start server
+ server = DistributedCrawlerServer(options.port)
+ server.start()
View
18 test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+cd "$(dirname -- $0)"
+
+# start server
+./ddc_server.py -p 10001 &
+server_pid=$?
+
+# wait a bit to be sure the server is ready
+sleep 1s
+
+# start client
+./ddc_client.py -s 127.0.0.1 -p 10001 &
+client_pid=$?
+
+# kill server after X s
+sleep 10s
+kill $server_pid

0 comments on commit c08c4e0

Please sign in to comment.