Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Commit

Permalink
add mongo projectdb
Browse files Browse the repository at this point in the history
  • Loading branch information
binux committed Oct 12, 2014
1 parent a4c089d commit 6a823ab
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 2 deletions.
3 changes: 3 additions & 0 deletions database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def connect_database(url):
if dbtype == 'taskdb':
from .mongodb.taskdb import TaskDB
return TaskDB(url, **parames)
elif dbtype == 'projectdb':
from .mongodb.projectdb import ProjectDB
return ProjectDB(url, **parames)
else:
raise Exception('unknow database type: %s' % dbtype)
else:
Expand Down
53 changes: 53 additions & 0 deletions database/mongodb/projectdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
# Author: Binux<i@binux.me>
# http://binux.me
# Created on 2014-10-12 12:22:42

import json
import time
from pymongo import MongoClient

from database.base.projectdb import ProjectDB as BaseProjectDB

class ProjectDB(BaseProjectDB):
__collection_name__ = 'projectdb'
def __init__(self, url, database='projectdb'):
self.conn = MongoClient(url)
self.database = self.conn[database]
self.collection = self.database[self.__collection_name__]

self.collection.ensure_index('name', unique=True)

def insert(self, name, obj={}):
obj = dict(obj)
obj['name'] = name
obj['updatetime'] = time.time()
return self.collection.update({'name': name}, {'$set': obj}, upsert=True)

def update(self, name, obj={}, **kwargs):
obj = dict(obj)
obj.update(kwargs)
obj['updatetime'] = time.time()
return self.collection.update({'name': name}, {'$set': obj})

def get_all(self, fields=None):
for each in self.collection.find({}, fields=fields):
if each and '_id' in each:
del each['_id']
yield each

def get(self, name, fields=None):
each = self.collection.find_one({'name': name}, fields=fields)
if each and '_id' in each:
del each['_id']
return each

def check_update(self, timestamp, fields=None):
result = []
for project in self.get_all(fields=('updatetime', 'name')):
if project['updatetime'] > timestamp:
project = self.get(project['name'], fields)
result.append(project)
return result
8 changes: 7 additions & 1 deletion database/mongodb/taskdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# http://binux.me
# Created on 2014-10-11 23:54:50

import re
import json
import time
from pymongo import MongoClient
Expand All @@ -18,7 +17,12 @@ class TaskDB(BaseTaskDB):
def __init__(self, url, database='taskdb'):
self.conn = MongoClient(url)
self.database = self.conn[database]
self.projects = set()

self._list_project()
for project in self.projects:
collection_name = self._collection_name(project)
self.database[collection_name].ensure_index('status')

def _list_project(self):
self.projects = set()
Expand All @@ -37,6 +41,8 @@ def _collection_name(self, project):
return project

def _parse(self, data):
if '_id' in data:
del data['_id']
for each in ('schedule', 'fetch', 'process', 'track'):
if each in data:
if data[each]:
Expand Down
12 changes: 11 additions & 1 deletion test/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def tearDownClass(self):
self.projectdb._execute('DROP DATABASE pyspider_test_projectdb')

@unittest.skipUnless(os.environ.get('TEST_MONGODB'), 'no mongodb server for test.')
class TestMysqlTaskDB(TestTaskDB, unittest.TestCase):
class TestMongoDBTaskDB(TestTaskDB, unittest.TestCase):
@classmethod
def setUpClass(self):
self.taskdb = database.connect_database('mongodb+taskdb://localhost/pyspider_test_taskdb')
Expand All @@ -251,5 +251,15 @@ def setUpClass(self):
def tearDownClass(self):
self.taskdb.conn.drop_database(self.taskdb.database.name)

@unittest.skipUnless(os.environ.get('TEST_MONGODB'), 'no mongodb server for test.')
class TestMongoDBTaskDB(TestProjectDB, unittest.TestCase):
@classmethod
def setUpClass(self):
self.projectdb = database.connect_database('mongodb+projectdb://localhost/pyspider_test_projectdb')

@classmethod
def tearDownClass(self):
self.projectdb.conn.drop_database(self.projectdb.database.name)

if __name__ == '__main__':
unittest.main()

0 comments on commit 6a823ab

Please sign in to comment.