removed whitespaces

jmg · Nov 14, 2011 · fad959e · fad959e
1 parent a14ee46
commit fad959e
Show file tree

Hide file tree

Showing 27 changed files with 236 additions and 237 deletions.
diff --git a/crawley/crawlers/base.py b/crawley/crawlers/base.py
@@ -113,8 +113,8 @@ def _initialize_scrapers(self):
         """
             Instanciates all the scraper classes
         """
-        
-        self.scrapers = [scraper_class(settings=self.settings) for scraper_class in self.scrapers]            
+
+        self.scrapers = [scraper_class(settings=self.settings) for scraper_class in self.scrapers]
 
     def _make_request(self, url, data=None):
         """

diff --git a/crawley/crawlers/fast.py b/crawley/crawlers/fast.py
@@ -2,8 +2,8 @@
 from crawley.http.managers import FastRequestManager
 
 class FastCrawler(BaseCrawler):
-    
+
     def __init__(self, *args, **kwargs):
-        
+
         BaseCrawler.__init__(self, *args, **kwargs)
         self.request_manager = FastRequestManager()
diff --git a/crawley/crawlers/offline.py b/crawley/crawlers/offline.py
@@ -4,40 +4,40 @@
 from StringIO import StringIO
 
 class OffLineCrawler(BaseCrawler):
-    
+
     def __init__(self, *args, **kwargs):
-        
+
         BaseCrawler.__init__(self, *args, **kwargs)
-        
+
     def _get_response(self, url, data=None):
-        
+
         response = BaseCrawler._get_response(self, url, data)
-               
-        fixer = HTMLFixer(self._url_regex, url, response.raw_html)        
+
+        fixer = HTMLFixer(self._url_regex, url, response.raw_html)
         html = fixer.get_fixed_html()
-        
+
         return html
-                
+
 
 class HTMLFixer(object):
-        
+
     def __init__(self, url_regex, url, html):
-        
+
         self._url_regex = url_regex
         self.url = url
         self.html_tree = XPathExtractor().get_object(html)
-    
+
     def get_fixed_html(self):
-            
+
         self._fix_tags("link", "href")
         self._fix_tags("img", "src")
-        
+
         return etree.tostring(self.html_tree.getroot(), pretty_print=True, method="html")
-        
+
     def _fix_tags(self, tag, attrib):
-        
+
         tags = self.html_tree.xpath("//%s" % tag)
-        
+
         for tag in tags:
             if not self._url_regex.match(tag.attrib[attrib]):
                 tag.attrib[attrib] = "%s/%s" % (self.url, tag.attrib[attrib])
diff --git a/crawley/exceptions.py b/crawley/exceptions.py
@@ -6,38 +6,38 @@ class AuthenticationError(Exception):
     """
         Raised when a login error occurs
     """
-    
+
     def __init__(self, *args, **kwargs):
-        
+
         Exception.__init__(self, *args, **kwargs)
 
 
 class TemplateSyntaxError(Exception):
     """
         DSL Template sintax error
     """
-        
+
     def __init__(self, line=0, *args, **kwargs):
-    
+
         self.line = line
         Exception.__init__(self, *args, **kwargs)
-        
-        
+
+
 class ScraperCantParseError(Exception):
     """
         Raised when a scraper can't parse an html page
     """
-    
+
     def __init__(self, *args, **kwargs):
-        
+
         Exception.__init__(self, *args, **kwargs)
 
 
 class InvalidProjectError(Exception):
     """
         Raised when the user opens a invalid directory with the browser
     """
-    
+
     def __init__(self, *args, **kwargs):
-        
+
         Exception.__init__(self, *args, **kwargs)
diff --git a/crawley/http/cookies.py b/crawley/http/cookies.py
@@ -2,38 +2,38 @@
 import urllib2
 import cookielib
 import tempfile
-    
+
 class CookieHandler(urllib2.HTTPCookieProcessor):
     """
         Cookie jar wrapper for save and load cookie from a file
     """
-    
+
     COOKIES_FILE = "crawley_cookies"
-    
+
     def _make_temp_file(self):
-                
-        tmp = tempfile.gettempdir()        
+
+        tmp = tempfile.gettempdir()
         self.cookie_file = os.path.join(tmp, self.COOKIES_FILE)
-    
-    def __init__(self, *args, **kwargs):        
-        
+
+    def __init__(self, *args, **kwargs):
+
         self._make_temp_file()
-                
+
         self._jar = cookielib.LWPCookieJar(self.cookie_file)
         urllib2.HTTPCookieProcessor.__init__(self, self._jar, *args, **kwargs)
-    
+
     def load_cookies(self):
         """
             Load cookies from the file
         """
-        
+
         if os.path.isfile(self.cookie_file):
             self._jar.load()
-    
+
     def save_cookies(self):
         """
             Save cookies if the jar is not empty
         """
-        
-        if self._jar is not None:            
+
+        if self._jar is not None:
             self._jar.save()
diff --git a/crawley/http/managers.py b/crawley/http/managers.py
@@ -42,25 +42,25 @@ def __init__(self, settings=None, delay=None, deviation=None):
         self.delay = delay
         self.deviation = deviation
         self.settings = settings
-        
+
         self._install_opener()
-        
+
     def _install_opener(self):
-                
+
         if has_valid_attr(self.settings,'PROXY_HOST') and has_valid_attr(self.settings,'PROXY_PORT'):
-            
+
             proxy_info = {        #proxy information
                 'user' : getattr(self.settings, 'PROXY_USER', ''),
                 'pass' : getattr(self.settings, 'PROXY_PASS', ''),
                 'host' : getattr(self.settings, 'PROXY_HOST', ''), #localhost
                 'port' : getattr(self.settings, 'PROXY_PORT', 80)
-            }    
-            
+            }
+
             # build a new opener that uses a proxy requiring authorization
-            proxy = urllib2.ProxyHandler({"http" :"http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info})                
+            proxy = urllib2.ProxyHandler({"http" :"http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info})
             self.opener = urllib2.build_opener(proxy, self.cookie_handler)
-            
-        else:            
+
+        else:
             self.opener = urllib2.build_opener(self.cookie_handler)
 
     def _get_request(self, url):

diff --git a/crawley/http/request.py b/crawley/http/request.py
@@ -32,17 +32,17 @@ def get_response(self, data=None, delay_factor=1):
             Returns the response object from a request.
             Cookies are supported via a CookieHandler object
         """
-   
+
         """The proxy settings is used as the following dictionary"""
-                                    
+
         self._normalize_url()
 
         request = urllib2.Request(self.url, data, self.headers)
-    
+
         args = {}
         if config.REQUEST_TIMEOUT is not None:
             args["timeout"] = config.REQUEST_TIMEOUT
-        
+
         response = self.opener.open(request, **args)
         self.cookie_handler.save_cookies()
 
@@ -68,14 +68,14 @@ def __init__(self, delay=0, deviation=0, **kwargs):
         deviation = deviation * FACTOR
         randomize = random.randint(-deviation, deviation) / FACTOR
 
-        self.delay = delay + randomize        
+        self.delay = delay + randomize
         Request.__init__(self, **kwargs)
 
     def get_response(self, data=None, delay_factor=1):
         """
             Waits [delay] miliseconds and then make the request
         """
-        
+
         delay = self.delay * delay_factor
         time.sleep(delay)
         return Request.get_response(self, data)
diff --git a/crawley/http/response.py b/crawley/http/response.py
@@ -11,8 +11,8 @@ def __init__(self, raw_html=None, extracted_html=None, url=None, response=None):
 
         self.raw_html = raw_html
         self.html = extracted_html
-        self.url = url        
-        
+        self.url = url
+
         if response is not None:
             self.headers = response.headers
             self.code = response.getcode()
diff --git a/crawley/manager/__init__.py b/crawley/manager/__init__.py
@@ -7,20 +7,20 @@ def run_cmd(args):
     """
         Runs a crawley's command
     """
-        
+
     if len(args) <= 1:
         exit_with_error("Subcommand not specified")
-        
+
     cmd_name = args[1]
     cmd_args = args[2:]
-    
-    cmd = commands[cmd_name](cmd_args)    
+
+    cmd = commands[cmd_name](cmd_args)
     cmd.checked_execute()
-        
+
 
 def manage():
     """
         Called when using crawley command from cmd line
-    """        
+    """
     run_cmd(sys.argv)
 
diff --git a/crawley/manager/commands/__init__.py b/crawley/manager/commands/__init__.py
@@ -10,14 +10,14 @@
 from syncdb import SyncDbCommand
 
 class CommandsDict(dict):
-    
+
     def __getitem__(self, key):
-        
+
         if key in self:
             return dict.__getitem__(self, key)
         else:
             exit_with_error("[%s] Subcommand not valid" % (key))
-            
+
 
 commands = CommandsDict()
 

diff --git a/crawley/manager/commands/browser.py b/crawley/manager/commands/browser.py
@@ -9,17 +9,17 @@ class BrowserCommand(BaseCommand):
     """
         Runs a browser
     """
-    
+
     name = "browser"
-    
+
     def validations(self):
-        
+
         return [(len(self.args) >= 1, "No given url")]
-    
-    def execute(self):            
-            
+
+    def execute(self):
+
         app = QtGui.QApplication(sys.argv)
         main = Browser(self.args[0])
         main.show()
         sys.exit(app.exec_())
-        
+