Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'master' of github.com:jmg/crawley

  • Loading branch information...
commit 142341b3a1b26d7b6119899f8dfc2e385bf98cdd 2 parents aa96a2b + 9933cb8
@jmg authored
Showing with 248 additions and 240 deletions.
  1. +5 −4 .gitignore
  2. +8 −0 build_deb.sh
  3. +2 −2 crawley/crawlers/base.py
  4. +2 −2 crawley/crawlers/fast.py
  5. +16 −16 crawley/crawlers/offline.py
  6. +10 −10 crawley/exceptions.py
  7. +14 −14 crawley/http/cookies.py
  8. +9 −9 crawley/http/managers.py
  9. +6 −6 crawley/http/request.py
  10. +2 −2 crawley/http/response.py
  11. +5 −5 crawley/manager/__init__.py
  12. +3 −3 crawley/manager/commands/__init__.py
  13. +7 −7 crawley/manager/commands/browser.py
  14. +15 −15 crawley/manager/commands/command.py
  15. +2 −2 crawley/manager/commands/run.py
  16. +9 −9 crawley/manager/commands/shell.py
  17. +18 −18 crawley/manager/commands/startproject.py
  18. +7 −7 crawley/manager/commands/syncdb.py
  19. +5 −5 crawley/manager/projects/base.py
  20. +22 −22 crawley/persistance/connectors.py
  21. +6 −6 crawley/persistance/databases.py
  22. +18 −18 crawley/simple_parser/compilers.py
  23. +15 −15 crawley/simple_parser/config_parser.py
  24. +28 −28 crawley/simple_parser/parsers.py
  25. +1 −1  crawley/utils/collections/custom_dict.py
  26. +1 −1  crawley/utils/collections/ordered_dict.py
  27. +0 −1  crawley/utils/matchers.py
  28. +8 −8 run_tests.py
  29. +4 −4 setup.py
View
9 .gitignore
@@ -6,9 +6,10 @@
*.json
*.xml
*.csv
-build
-dist
-docs/build
-env
+build/
+dist/
+docs/build/
+env/
url.html
pip-log.txt
+deb_dist/
View
8 build_deb.sh
@@ -0,0 +1,8 @@
+NAME=crawley-0.2.1
+
+python setup.py sdist
+cd dist
+py2dsc $NAME.tar.gz
+cd deb_dist/$NAME
+sed -i 's/current/>=2.6/g' debian/control
+dpkg-buildpackage -rfakeroot -uc -us
View
4 crawley/crawlers/base.py
@@ -123,8 +123,8 @@ def _initialize_scrapers(self):
"""
Instanciates all the scraper classes
"""
-
- self.scrapers = [scraper_class(settings=self.settings) for scraper_class in self.scrapers]
+
+ self.scrapers = [scraper_class(settings=self.settings) for scraper_class in self.scrapers]
def _make_request(self, url, data=None):
"""
View
4 crawley/crawlers/fast.py
@@ -2,8 +2,8 @@
from crawley.http.managers import FastRequestManager
class FastCrawler(BaseCrawler):
-
+
def __init__(self, *args, **kwargs):
-
+
BaseCrawler.__init__(self, *args, **kwargs)
self.request_manager = FastRequestManager()
View
32 crawley/crawlers/offline.py
@@ -4,40 +4,40 @@
from StringIO import StringIO
class OffLineCrawler(BaseCrawler):
-
+
def __init__(self, *args, **kwargs):
-
+
BaseCrawler.__init__(self, *args, **kwargs)
-
+
def _get_response(self, url, data=None):
-
+
response = BaseCrawler._get_response(self, url, data)
-
- fixer = HTMLFixer(self._url_regex, url, response.raw_html)
+
+ fixer = HTMLFixer(self._url_regex, url, response.raw_html)
html = fixer.get_fixed_html()
-
+
return html
-
+
class HTMLFixer(object):
-
+
def __init__(self, url_regex, url, html):
-
+
self._url_regex = url_regex
self.url = url
self.html_tree = XPathExtractor().get_object(html)
-
+
def get_fixed_html(self):
-
+
self._fix_tags("link", "href")
self._fix_tags("img", "src")
-
+
return etree.tostring(self.html_tree.getroot(), pretty_print=True, method="html")
-
+
def _fix_tags(self, tag, attrib):
-
+
tags = self.html_tree.xpath("//%s" % tag)
-
+
for tag in tags:
if not self._url_regex.match(tag.attrib[attrib]):
tag.attrib[attrib] = "%s/%s" % (self.url, tag.attrib[attrib])
View
20 crawley/exceptions.py
@@ -6,9 +6,9 @@ class AuthenticationError(Exception):
"""
Raised when a login error occurs
"""
-
+
def __init__(self, *args, **kwargs):
-
+
Exception.__init__(self, *args, **kwargs)
@@ -16,20 +16,20 @@ class TemplateSyntaxError(Exception):
"""
DSL Template sintax error
"""
-
+
def __init__(self, line=0, *args, **kwargs):
-
+
self.line = line
Exception.__init__(self, *args, **kwargs)
-
-
+
+
class ScraperCantParseError(Exception):
"""
Raised when a scraper can't parse an html page
"""
-
+
def __init__(self, *args, **kwargs):
-
+
Exception.__init__(self, *args, **kwargs)
@@ -37,7 +37,7 @@ class InvalidProjectError(Exception):
"""
Raised when the user opens a invalid directory with the browser
"""
-
+
def __init__(self, *args, **kwargs):
-
+
Exception.__init__(self, *args, **kwargs)
View
28 crawley/http/cookies.py
@@ -2,38 +2,38 @@
import urllib2
import cookielib
import tempfile
-
+
class CookieHandler(urllib2.HTTPCookieProcessor):
"""
Cookie jar wrapper for save and load cookie from a file
"""
-
+
COOKIES_FILE = "crawley_cookies"
-
+
def _make_temp_file(self):
-
- tmp = tempfile.gettempdir()
+
+ tmp = tempfile.gettempdir()
self.cookie_file = os.path.join(tmp, self.COOKIES_FILE)
-
- def __init__(self, *args, **kwargs):
-
+
+ def __init__(self, *args, **kwargs):
+
self._make_temp_file()
-
+
self._jar = cookielib.LWPCookieJar(self.cookie_file)
urllib2.HTTPCookieProcessor.__init__(self, self._jar, *args, **kwargs)
-
+
def load_cookies(self):
"""
Load cookies from the file
"""
-
+
if os.path.isfile(self.cookie_file):
self._jar.load()
-
+
def save_cookies(self):
"""
Save cookies if the jar is not empty
"""
-
- if self._jar is not None:
+
+ if self._jar is not None:
self._jar.save()
View
18 crawley/http/managers.py
@@ -42,25 +42,25 @@ def __init__(self, settings=None, delay=None, deviation=None):
self.delay = delay
self.deviation = deviation
self.settings = settings
-
+
self._install_opener()
-
+
def _install_opener(self):
-
+
if has_valid_attr(self.settings,'PROXY_HOST') and has_valid_attr(self.settings,'PROXY_PORT'):
-
+
proxy_info = { #proxy information
'user' : getattr(self.settings, 'PROXY_USER', ''),
'pass' : getattr(self.settings, 'PROXY_PASS', ''),
'host' : getattr(self.settings, 'PROXY_HOST', ''), #localhost
'port' : getattr(self.settings, 'PROXY_PORT', 80)
- }
-
+ }
+
# build a new opener that uses a proxy requiring authorization
- proxy = urllib2.ProxyHandler({"http" :"http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info})
+ proxy = urllib2.ProxyHandler({"http" :"http://%(user)s:%(pass)s@%(host)s:%(port)d" % proxy_info})
self.opener = urllib2.build_opener(proxy, self.cookie_handler)
-
- else:
+
+ else:
self.opener = urllib2.build_opener(self.cookie_handler)
def _get_request(self, url):
View
12 crawley/http/request.py
@@ -32,17 +32,17 @@ def get_response(self, data=None, delay_factor=1):
Returns the response object from a request.
Cookies are supported via a CookieHandler object
"""
-
+
"""The proxy settings is used as the following dictionary"""
-
+
self._normalize_url()
request = urllib2.Request(self.url, data, self.headers)
-
+
args = {}
if config.REQUEST_TIMEOUT is not None:
args["timeout"] = config.REQUEST_TIMEOUT
-
+
response = self.opener.open(request, **args)
self.cookie_handler.save_cookies()
@@ -68,14 +68,14 @@ def __init__(self, delay=0, deviation=0, **kwargs):
deviation = deviation * FACTOR
randomize = random.randint(-deviation, deviation) / FACTOR
- self.delay = delay + randomize
+ self.delay = delay + randomize
Request.__init__(self, **kwargs)
def get_response(self, data=None, delay_factor=1):
"""
Waits [delay] miliseconds and then make the request
"""
-
+
delay = self.delay * delay_factor
time.sleep(delay)
return Request.get_response(self, data)
View
4 crawley/http/response.py
@@ -11,8 +11,8 @@ def __init__(self, raw_html=None, extracted_html=None, url=None, response=None):
self.raw_html = raw_html
self.html = extracted_html
- self.url = url
-
+ self.url = url
+
if response is not None:
self.headers = response.headers
self.code = response.getcode()
View
10 crawley/manager/__init__.py
@@ -7,20 +7,20 @@ def run_cmd(args):
"""
Runs a crawley's command
"""
-
+
if len(args) <= 1:
exit_with_error("Subcommand not specified")
-
+
cmd_name = args[1]
cmd_args = args[2:]
-
+
cmd = commands[cmd_name](cmd_args)
cmd.checked_execute()
-
+
def manage():
"""
Called when using crawley command from cmd line
- """
+ """
run_cmd(sys.argv)
View
6 crawley/manager/commands/__init__.py
@@ -11,14 +11,14 @@
from browser import BrowserCommand
class CommandsDict(dict):
-
+
def __getitem__(self, key):
-
+
if key in self:
return dict.__getitem__(self, key)
else:
exit_with_error("[%s] Subcommand not valid" % (key))
-
+
commands = CommandsDict()
View
14 crawley/manager/commands/browser.py
@@ -9,17 +9,17 @@ class BrowserCommand(BaseCommand):
"""
Runs a browser
"""
-
+
name = "browser"
-
+
def validations(self):
-
+
return [(len(self.args) >= 1, "No given url")]
-
- def execute(self):
-
+
+ def execute(self):
+
app = QtGui.QApplication(sys.argv)
main = Browser(self.args[0])
main.show()
sys.exit(app.exec_())
-
+
View
30 crawley/manager/commands/command.py
@@ -54,24 +54,24 @@ class ProjectCommand(BaseCommand):
"""
A command that requires a settings.py file to run
"""
-
+
def __init__(self, args=None, settings=None):
-
- self.settings = settings
-
+
+ self.settings = settings
+
BaseCommand.__init__(self, args)
def checked_execute(self):
"""
Checks for settings before run
"""
-
+
if self.settings is None:
self._add_options()
self.settings = self._check_for_settings()
else:
sys.path.insert(0, self.settings.PROJECT_ROOT)
-
+
self._check_setttings_errors()
self._check_project_type()
BaseCommand.checked_execute(self)
@@ -103,7 +103,7 @@ def _check_for_settings(self):
settings_file = "settings"
settings = import_user_module(settings_file)
-
+
sys.path.append(settings.PROJECT_ROOT)
return settings
@@ -115,24 +115,24 @@ def _check_setttings_errors(self):
if hasattr(self.settings, 'DATABASE_ENGINE'):
if self.settings.DATABASE_ENGINE == 'sqlite':
self.settings.DATABASE_NAME = fix_file_extension(self.settings.DATABASE_NAME, 'sqlite')
-
+
if hasattr(self.settings, 'JSON_DOCUMENT'):
self.settings.JSON_DOCUMENT = fix_file_extension(self.settings.JSON_DOCUMENT, 'json')
-
+
if hasattr(self.settings, 'XML_DOCUMENT'):
self.settings.XML_DOCUMENT = fix_file_extension(self.settings.XML_DOCUMENT, 'xml')
-
+
def _check_project_type(self):
"""
- Check for the project's type [code based project
+ Check for the project's type [code based project
or dsl templates based project]
"""
-
+
if check_for_file(self.settings, "config.ini") and check_for_file(self.settings, "template.crw"):
self.project_type = TemplateProject()
-
- elif import_user_module("models", exit=False) is not None:
+
+ elif import_user_module("models", exit=False) is not None:
self.project_type = CodeProject()
-
+
else:
exit_with_error("Unrecognized crawley project")
View
4 crawley/manager/commands/run.py
@@ -13,8 +13,8 @@ class RunCommand(ProjectCommand):
name = "run"
def execute(self):
-
+
self.syncdb = SyncDbCommand(args=self.args, settings=self.settings)
- self.syncdb.checked_execute()
+ self.syncdb.checked_execute()
self.project_type.run(self)
View
18 crawley/manager/commands/shell.py
@@ -10,25 +10,25 @@ class ShellCommand(BaseCommand):
Shows an url data in a console like the XPathExtractor see it.
So users can interactive scrape the data.
"""
-
+
name = "shell"
-
+
def validations(self):
-
+
return [(len(self.args) >= 1, "No given url")]
-
- def execute(self):
-
+
+ def execute(self):
+
try:
import IPython
except ImportError:
exit_with_error("Please install the ipython console")
-
+
url = self.args[0]
crawler = BaseCrawler()
-
+
response = crawler._get_data(url)
html = XPathExtractor().get_object(response)
-
+
shell = IPython.Shell.IPShellEmbed(argv=[], user_ns={ 'response' : response })
shell()
View
36 crawley/manager/commands/startproject.py
@@ -6,43 +6,43 @@
class StartProjectCommand(BaseCommand):
"""
- Starts a new crawley project.
-
- Copies the files inside conf/project_template in order
+ Starts a new crawley project.
+
+ Copies the files inside conf/project_template in order
to generate a new project
"""
-
+
name = "startproject"
-
+
def __init__(self, args=None, project_type=None, project_name=None):
-
+
self.project_type = project_type
self.project_name = project_name
-
+
BaseCommand.__init__(self, args)
-
+
def validations(self):
-
+
return [(len(self.args) >= 1, "No given project name")]
- def execute(self):
-
+ def execute(self):
+
if self.project_type is None:
-
+
self.parser = OptionParser()
self.parser.add_option("-t", "--type", help="Type can be 'code' or 'template'")
-
+
(options, args) = self.parser.parse_args(self.args)
-
+
if options.type is None:
-
+
options.type = CodeProject.name
self.project_name = self.args[0]
-
+
else:
self.project_name = self.args[2]
-
+
self.project_type = options.type
-
+
project = project_types[self.project_type]()
project.set_up(self.project_name)
View
14 crawley/manager/commands/syncdb.py
@@ -3,14 +3,14 @@
class SyncDbCommand(ProjectCommand):
"""
- Build up the DataBase.
-
- Reads the models.py user's file and generate a database from it.
+ Build up the DataBase.
+
+ Reads the models.py user's file and generate a database from it.
"""
-
+
name = "syncdb"
-
+
def execute(self):
-
+
self.project_type.syncdb(self)
-
+
View
10 crawley/manager/projects/base.py
@@ -63,13 +63,13 @@ def syncdb(self, syncb_command):
self.connector = None
syncb_command.sessions = []
-
- documents_sessions = { 'JSON_DOCUMENT' : json_session,
- 'XML_DOCUMENT' : xml_session,
+
+ documents_sessions = { 'JSON_DOCUMENT' : json_session,
+ 'XML_DOCUMENT' : xml_session,
'CSV_DOCUMENT' : csv_session, }
-
+
for document_name, session in documents_sessions.iteritems():
-
+
if has_valid_attr(syncb_command.settings, document_name):
session.file_name = getattr(syncb_command.settings, document_name)
View
44 crawley/persistance/connectors.py
@@ -8,25 +8,25 @@ class Connector(object):
A Connector represents an object that can provide the
database connection to the elixir framework.
"""
-
+
def __init__(self, settings):
-
+
self.settings = settings
-
+
def get_connection_string(self):
"""
Returns the connection string to the corresponding database
"""
pass
-
+
class SimpleConnector(Connector):
"""
A simple connector for a database without host and user. I.E: sqlite
"""
-
- def get_connection_string(self):
-
+
+ def get_connection_string(self):
+
return "%s:///%s" % (self.settings.DATABASE_ENGINE, self.settings.DATABASE_NAME)
@@ -36,10 +36,10 @@ class HostConnector(Connector):
"""
def get_connection_string(self):
-
+
user_pass = "%s:%s" % (self.settings.DATABASE_USER, self.settings.DATABASE_PASSWORD)
host_port = "%s:%s" % (self.settings.DATABASE_HOST, self.settings.DATABASE_PORT)
- return "%s://%s@%s/%s" % (self.settings.DATABASE_ENGINE, user_pass, host_port, self.settings.DATABASE_NAME)
+ return "%s://%s@%s/%s" % (self.settings.DATABASE_ENGINE, user_pass, host_port, self.settings.DATABASE_NAME)
@@ -47,43 +47,43 @@ class SqliteConnector(SimpleConnector):
"""
Sqlite3 Engine connector
"""
-
+
name = "sqlite"
-
+
class MySqlConnector(HostConnector):
"""
Mysql Engine connector
"""
-
+
name = "mysql"
-
-
+
+
class OracleConnector(HostConnector):
"""
Oracle Engine connector
"""
-
+
name = "oracle"
-
-
+
+
class PostgreConnector(HostConnector):
"""
Postgre Engine connector
"""
-
+
name = "postgres"
-
+
class ConnectorsDict(dict):
-
+
def __getitem__(self, key):
-
+
if key in self:
return dict.__getitem__(self, key)
else:
exit_with_error("No recognized database Engine")
-
+
connectors = ConnectorsDict()
connectors.update({ PostgreConnector.name : PostgreConnector,
View
12 crawley/persistance/databases.py
@@ -7,10 +7,10 @@
class Entity(elixir.EntityBase):
"""
Base Entity.
-
+
Every Crawley's Entity must Inherit from this class
"""
-
+
__metaclass__ = elixir.EntityMeta
@@ -18,17 +18,17 @@ class UrlEntity(elixir.EntityBase):
"""
Entity intended to save urls
"""
-
+
href = Field(Unicode(255))
parent = Field(Unicode(255))
-
+
__metaclass__ = elixir.EntityMeta
-
+
def setup(entities):
"""
Setup the database based on a list of user's entities
"""
-
+
elixir.setup_entities(entities)
elixir.create_all()
View
36 crawley/simple_parser/compilers.py
@@ -53,20 +53,20 @@ def gen_entities(self):
descriptors = {}
fields = [line.field for lines in self.code_blocks for line in lines if not line.is_header]
-
+
for field in fields:
-
+
table = field["table"]
column = field["column"]
-
+
if table not in descriptors:
descriptors[table] = [column, ]
else:
if column not in descriptors[table]:
descriptors[table].append(column)
-
+
for entity_name, fields in descriptors.iteritems():
-
+
attrs_dict = dict([(field, Field(Unicode(255))) for field in fields])
entity = self._gen_class(entity_name, (Entity, ), attrs_dict)
@@ -80,7 +80,7 @@ def _gen_scrape_method(self, sentences):
Returns a dictionary containing methods and attributes for the
scraper class.
"""
-
+
entities = self.entities
def scrape(self, response):
@@ -89,25 +89,25 @@ def scrape(self, response):
"""
fields = {}
-
+
for sentence in sentences:
-
+
nodes = response.html.xpath(sentence.xpath)
-
+
column = sentence.field["column"]
table = sentence.field["table"]
-
+
if nodes:
-
+
value = _get_text_recursive(nodes[0])
-
+
if table not in fields:
fields[table] = {column : value}
else:
fields[table][column] = value
-
- for table, attrs_dict in fields.iteritems():
-
+
+ for table, attrs_dict in fields.iteritems():
+
entities[table](**attrs_dict)
session.commit()
@@ -131,9 +131,9 @@ class CrawlerCompiler(object):
def __init__(self, scrapers, settings):
- self.scrapers = scrapers
- self.config = ConfigApp(settings.PROJECT_ROOT)
-
+ self.scrapers = scrapers
+ self.config = ConfigApp(settings.PROJECT_ROOT)
+
def compile(self):
attrs_dict = {}
View
30 crawley/simple_parser/config_parser.py
@@ -8,22 +8,22 @@ class ConfigObj(object):
"""
def __init__(self):
-
+
self._config_parser = ConfigParser()
self.config = {}
def _update_dictionary(self):
-
+
for sect in self._config_parser.sections():
for item_name, value in self._config_parser.items(sect):
self.config[(sect, item_name)] = value
def __getitem__(self, key):
-
+
return self.config.get(key, None)
def __setitem__(self, key, value):
-
+
if value is None:
value = ''
self.config[key] = value
@@ -33,11 +33,11 @@ def __setitem__(self, key, value):
self._config_parser.set(section, item, value)
def __str__(self):
-
+
return str(self.config)
def save(self, filename):
-
+
self._config_parser.write(open(filename,'wb'))
@@ -49,23 +49,23 @@ class ConfigApp(ConfigObj):
config = ConfigApp()
value = config[('section', 'item')]
"""
-
+
CONFIG_FILE = 'config.ini'
def __init__(self, ini_dir):
-
+
ConfigObj.__init__(self)
-
+
self.ini_dir = ini_dir
- config = open(self._get_path(), 'rb')
-
+ config = open(self._get_path(), 'rb')
+
self._config_parser.readfp(config)
self._update_dictionary()
-
+
def _get_path(self):
-
+
return os.path.join(self.ini_dir, self.CONFIG_FILE)
-
+
def save(self):
-
+
ConfigObj.save(self, self._get_path())
View
56 crawley/simple_parser/parsers.py
@@ -4,38 +4,38 @@ class DSLAnalizer(object):
"""
Analizes the DSL written by users
"""
-
+
def __init__(self, dsl):
-
+
self.dsl = dsl
-
+
def is_header(self, line):
-
+
return DSLHeaderLine.SEPARATOR in line
-
+
def parse(self):
-
+
blocks = []
lines = []
-
+
for n, line in enumerate(self.dsl.split("\n")):
-
+
line = line.strip()
-
+
if not line:
continue
-
+
if self.is_header(line):
-
+
if lines:
blocks.append(lines)
-
+
lines = []
lines.append(DSLHeaderLine(line, n))
-
+
else:
lines.append(DSLLine(line, n))
-
+
blocks.append(lines)
return blocks
@@ -44,39 +44,39 @@ class DSLLine(object):
"""
A DSL line abstraction
"""
-
- SEPARATOR = "->"
+
+ SEPARATOR = "->"
is_header = False
-
+
def __init__(self, content, number):
-
+
self.number = number
self.content = content
self._parse()
-
+
def _parse(self):
-
- parts = self.content.split(self.SEPARATOR)
-
+
+ parts = self.content.split(self.SEPARATOR)
+
if len(parts) > 2:
raise TemplateSyntaxError(self.number, "More than one '%s' token found in the same line" % self.SEPARATOR)
elif len(parts) < 2:
raise TemplateSyntaxError(self.number, "Missed separator token '%s'" % self.SEPARATOR)
-
+
self.field = self._parse_attribs(parts[0])
self.xpath = parts[1].strip()
-
+
def _parse_attribs(self, parmas):
-
+
table, column = parmas.split(".")
return {"table" : table.strip(), "column" : column.strip()}
class DSLHeaderLine(DSLLine):
-
+
SEPARATOR = "=>"
is_header = True
-
+
def _parse_attribs(self, field):
-
+
return field
View
2  crawley/utils/collections/custom_dict.py
@@ -1,6 +1,6 @@
from crawley.utils.common import exit_with_error
-class CustomDict(dict):
+class CustomDict(dict):
def __init__(self, error="[%s] Not valid argument", *args, **kwargs):
View
2  crawley/utils/collections/ordered_dict.py
@@ -10,7 +10,7 @@
from _abcoll import KeysView, ValuesView, ItemsView
except ImportError:
pass
-
+
class OrderedDict(dict):
'Dictionary that remembers insertion order'
View
1  crawley/utils/matchers.py
@@ -35,7 +35,6 @@ def complex_matcher(pattern, url, strict=True):
if match is None:
return url in pattern
-
group = match.group(0)
if strict:
View
16 run_tests.py
@@ -7,7 +7,7 @@
from optparse import OptionParser
from tests.crawler_test import CrawlerTest
-from tests.utils_test import UtilsTest
+from tests.utils_test import UtilsTest
from tests.commands_test import CommandsTest
from tests.simple_parser_test import ParserTest
from tests.persistance_test import PersistanceTest
@@ -15,28 +15,28 @@
def load_tests(tests):
-
+
suite = unittest.TestSuite()
for test_class in tests:
tests = unittest.defaultTestLoader.loadTestsFromTestCase(test_class)
suite.addTests(tests)
return suite
-
+
def suite(options):
unit = [UtilsTest, ParserTest, PersistanceTest, HTTPTest]
integration = [CommandsTest, CrawlerTest]
-
+
if options.all is not None:
return load_tests(unit + integration)
elif options.unittests is not None:
return load_tests(unit)
elif options.integration is not None:
return load_tests(integration)
- else:
+ else:
return None
-
+
if __name__ == "__main__":
-
+
parser = OptionParser()
parser.add_option("-i", "--integration", help="run integration tests", nargs=0)
parser.add_option("-u", "--unittests", help="run unit tests", nargs=0)
@@ -47,4 +47,4 @@ def suite(options):
print parser.print_help()
else:
unittest.TextTestRunner(verbosity=2).run(test_suite)
-
+
View
8 setup.py
@@ -15,17 +15,17 @@
author_email = "jmg.utn@gmail.com",
license = "GPL v3",
keywords = "Scraping Crawling Framework Python",
- packages=find_packages(exclude=["tests"]),
+ packages=find_packages(exclude=["tests"]),
data_files=[
(templates_dir, templates_files)
],
- include_package_data=True,
- scripts=['crawley/bin/crawley'],
+ include_package_data=True,
+ scripts=['crawley/bin/crawley'],
install_requires=[
'lxml',
'eventlet',
'elixir',
- 'pyquery',
+ 'pyquery',
],
url='http://crawley-project.com.ar/',
)
Please sign in to comment.
Something went wrong with that request. Please try again.