Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
oiwn committed Apr 17, 2012
1 parent e095dfb commit 6a07eb6
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 11 deletions.
15 changes: 13 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,28 @@
"""
Configs for default spider
"""
from models import init_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine

from models import Base

# save or not
MAX_THREADS = 3
USE_CACHE = True
SAVE_TO_DB = USE_CACHE
CACHE_DB = 'default_project'


def init_engine():
db_engine = create_engine(
'sqlite+pysqlite:///data.sqlite', encoding='utf-8')
Base.metadata.create_all(db_engine)
return db_engine


db_engine = init_engine()
Session = sessionmaker(bind=db_engine)


def default_spider_params():
params = {
'thread_number': MAX_THREADS,
Expand All @@ -22,6 +32,7 @@ def default_spider_params():
}
if USE_CACHE:
params.update({
'thread_number': 3,
'use_cache': USE_CACHE,
'cache_db': CACHE_DB,
'debug_error' :True,
Expand Down
Binary file modified data.sqlite
Binary file not shown.
7 changes: 0 additions & 7 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,13 @@
"""
import datetime

from sqlalchemy import create_engine
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import (Column, Integer, Text, String, ForeignKey,
DateTime, PickleType, Table)

Base = declarative_base()

def init_engine():
db_engine = create_engine(
'sqlite+pysqlite:///data.sqlite', encoding='utf-8')
Base.metadata.create_all(db_engine)
return db_engine


class Item(Base):
__tablename__ = 'item'
Expand Down
7 changes: 5 additions & 2 deletions spiders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from grab.spider import Spider

from models import Item
from config import Session
from config import Session, SAVE_TO_DB


class BaseHubSpider(Spider):
Expand All @@ -11,6 +11,9 @@ class BaseHubSpider(Spider):
items_total = 0

def save(self, data):
if not SAVE_TO_DB:
return

session = Session()

if not session.query(Item).filter_by(title=data['title']).first():
Expand All @@ -20,4 +23,4 @@ def save(self, data):

def log_progress(self, str):
self.items_total += 1
print "Item scraped: %s" % str
print "(%d) Item scraped: %s" % (self.items_total, str)
1 change: 1 addition & 0 deletions spiders/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from spiders.base import BaseHubSpider


class ExploreSpider(BaseHubSpider):
initial_urls = ['http://github.com/explore']

Expand Down

0 comments on commit 6a07eb6

Please sign in to comment.