Skip to content

Commit

Permalink
Various fixes in the new solr code
Browse files Browse the repository at this point in the history
  • Loading branch information
lalinsky committed Oct 23, 2012
1 parent a4e2640 commit 21a5d97
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 39 deletions.
2 changes: 1 addition & 1 deletion mbslave-solr-export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from mbslave.search import fetch_all

cfg = Config(os.path.join(os.path.dirname(__file__), 'mbslave.conf'))
db = connect_db(cfg)
db = connect_db(cfg, True)

print '<add>'
for doc in fetch_all(cfg, db):
Expand Down
7 changes: 5 additions & 2 deletions mbslave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
from mbslave.replication import ReplicationHook


def connect_db(cfg):
return psycopg2.connect(**cfg.make_psql_args())
def connect_db(cfg, set_search_path=False):
db = psycopg2.connect(**cfg.make_psql_args())
if set_search_path:
db.cursor().execute("SET search_path TO %s", (cfg.schema.name('musicbrainz'),))
return db


def parse_name(config, table):
Expand Down
84 changes: 48 additions & 36 deletions mbslave/search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
import urllib2
import psycopg2.extras
from contextlib import closing
from collections import namedtuple
from lxml import etree as ET
from lxml.builder import E
Expand Down Expand Up @@ -99,6 +100,7 @@ def __init__(self, table, name, foreign=None, null=False):
ForeignColumn('release_group_secondary_type_join', 'secondary_type',
ForeignColumn('release_group_secondary_type', 'name'))),
Field('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))),
MultiField('alias', ForeignColumn('release', 'name', ForeignColumn('release_name', 'name'))),
]),
Entity('release', [
Field('id', Column('gid')),
Expand All @@ -110,12 +112,14 @@ def __init__(self, table, name, foreign=None, null=False):
Field('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))),
MultiField('catno', ForeignColumn('release_label', 'catalog_number')),
MultiField('label', ForeignColumn('release_label', 'label', ForeignColumn('label', 'name', ForeignColumn('label_name', 'name')))),
Field('alias', Column('release_group', ForeignColumn('release_group', 'name', ForeignColumn('release_name', 'name')))),
]),
Entity('recording', [
Field('id', Column('gid')),
Field('disambiguation', Column('comment')),
Field('name', Column('name', ForeignColumn('recording_name', 'name'))),
Field('name', Column('name', ForeignColumn('track_name', 'name'))),
Field('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))),
MultiField('alias', ForeignColumn('track', 'name', ForeignColumn('track_name', 'name'))),
]),
])

Expand Down Expand Up @@ -158,20 +162,24 @@ def iter_main(db, kind, ids=()):
names.append(field.name)

query = generate_iter_query(columns, joins, ids)
#print query

cursor = db.cursor()
cursor.execute(query, ids)

for row in cursor:
id = row[0]
fields = [E.field(kind, name='kind')]
for name, value in zip(names, row[1:]):
if isinstance(value, str):
value = value.decode('utf8')
if value:
fields.append(E.field(value, name=name))
yield id, fields
with closing(db.cursor('cursor_' + kind)) as cursor:
cursor.itersize = 100 * 1000
cursor.execute(query, ids)
for row in cursor:
id = row[0]
fields = [E.field(kind, name='kind')]
for name, value in zip(names, row[1:]):
if not value:
continue
if isinstance(value, str):
value = value.decode('utf8')
elif not isinstance(value, unicode):
value = unicode(value)
try:
fields.append(E.field(value, name=name))
except ValueError:
continue # XXX
yield id, fields


def iter_sub(db, kind, subtable, ids=()):
Expand Down Expand Up @@ -207,27 +215,31 @@ def iter_sub(db, kind, subtable, ids=()):
names.append(field.name)

query = generate_iter_query(columns, joins, ids)
#print query

cursor = db.cursor()
cursor.execute(query, ids)

fields = []
last_id = None
for row in cursor:
id = row[0]
if last_id != id:
if fields:
yield last_id, fields
last_id = id
fields = []
for name, value in zip(names, row[1:]):
if isinstance(value, str):
value = value.decode('utf8')
if value:
fields.append(E.field(value, name=name))
if fields:
yield last_id, fields
with closing(db.cursor('cursor_' + kind + '_' + subtable)) as cursor:
cursor.itersize = 100 * 1000
cursor.execute(query, ids)
fields = []
last_id = None
for row in cursor:
id = row[0]
if last_id != id:
if fields:
yield last_id, fields
last_id = id
fields = []
for name, value in zip(names, row[1:]):
if not value:
continue
if isinstance(value, str):
value = value.decode('utf8')
elif not isinstance(value, unicode):
value = unicode(value)
try:
fields.append(E.field(value, name=name))
except ValueError:
continue # XXX
if fields:
yield last_id, fields


def placeholders(ids):
Expand Down

0 comments on commit 21a5d97

Please sign in to comment.