Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 254 lines (208 sloc) 9.123 kB
#!/usr/bin/env python
import collections
import glob
import logging
import os
import re
import sqlite3
import subprocess
import sys
import memcacheConstants
from pump import EndPoint, Source, Batch
MBF_VERSION = 2 # sqlite pragma user version for Couchbase 1.8.
class MBFSource(Source):
"""Can read 1.8 server master and *.mb data files."""
def __init__(self, opts, spec, source_bucket, source_node,
source_map, sink_map, ctl, cur):
super(MBFSource, self).__init__(opts, spec, source_bucket, source_node,
source_map, sink_map, ctl, cur)
self.cursor_todo = None
self.cursor_done = False
self.s = """SELECT vbid, k, flags, exptime, cas, v
FROM `{{0}}`.`{{1}}` as kv,
`{0}`.vbucket_states as vb
WHERE kv.vbucket = vb.vbid
AND kv.vb_version = kv.vb_version
AND vb.state like '{1}'"""
def can_handle(opts, spec):
return os.path.isfile(spec) and MBFSource.version(spec) == 2
def check_base(opts, spec):
# Skip immediate superclass Source.check_base(),
# since MBFSource can handle different vbucket states.
return EndPoint.check_base(opts, spec)
def check(opts, spec):
spec = os.path.normpath(spec)
if not os.path.isfile(spec):
return "error: backup_dir is not a file: " + spec, None
db_files = MBFSource.db_files(spec)
versions = MBFSource.db_file_versions(db_files)
logging.debug(" MBFSource check db file versions: %s" % (versions))
if max(versions.values()) < 2:
err = ("error: wrong backup/db file versions;\n" +
" either the metadata db file is not specified\n" +
" or the backup files need upgrading to version %s;\n" +
" please use cbdbupgrade or contact support.") \
return err, None
# Map of state string (e.g., 'active') to map of vbucket_id to info.
vbucket_states = collections.defaultdict(dict)
sql = """SELECT vbid, vb_version, state, checkpoint_id
FROM vbucket_states"""
for db_file in [f for f in db_files if f.endswith(".mb")]:
db = sqlite3.connect(db_file)
cur = db.cursor()
for row in cur.execute(sql):
vbucket_id = row[0]
state = row[2]
vbucket_states[state][vbucket_id] = {
'vbucket_id': vbucket_id,
'vb_version': row[1],
'state': state,
'checkpoint_id': row[3]
except sqlite3.DatabaseError, e:
pass # A missing vbucket_states table is expected.
return 0, {'spec': spec,
[{'name': os.path.basename(spec),
'nodes': [{'hostname': 'N/A',
'vbucket_states': vbucket_states
def db_file_versions(db_files):
rv = {}
for db_file in db_files:
rv[db_file] = MBFSource.version(db_file)
return rv
def version(db_file):
return int(MBFSource.run_sql(db_file, "PRAGMA user_version;")[0])
except sqlite3.DatabaseError, e:
logging.error("error: could not access user_version from: %s" +
"; exception: %s" +
"; perhaps it is being used by another program" +
" like couchbase-server", db_file, e)
return 0
def db_files(spec):
return [spec] + glob.glob(spec + "-*.mb")
def run_sql(db_file, sql):
db = sqlite3.connect(db_file)
cur = db.cursor()
rv = cur.fetchone()
return rv
def provide_design(opts, source_spec, source_bucket, source_map):
return 0, None
def provide_batch(self):
if self.cursor_done:
return 0, None
batch = Batch(self)
batch_max_size = self.opts.extra['batch_max_size']
batch_max_bytes = self.opts.extra['batch_max_bytes']
source_vbucket_state = \
getattr(self.opts, 'source_vbucket_state', 'active')
if self.cursor_todo is None:
rv, db, attached_dbs, table_dbs = self.connect_db()
if rv != 0:
return rv, None
# Determine which db the state table is in.
(state_db,) = table_dbs[u'vbucket_states']
except ValueError:
return "error: no unique vbucket_states table", None
sql = self.s.format(state_db, source_vbucket_state)
kv_names = []
for kv_name, db_name in table_dbs.iteritems():
if ( is None and
not kv_name.startswith('kv_')):
if ( is not None and
kv_name != "kv_{0}".format(
db_kv_names = []
for kv_name in sorted(kv_names,
key=lambda x: int(x.split('_')[-1])):
for db_name in sorted(table_dbs[kv_name]):
db_kv_names.append((db_name, kv_name))
self.cursor_todo = (db, sql, db_kv_names, None)
db, sql, db_kv_names, cursor = self.cursor_todo
if not db:
self.cursor_done = True
self.cursor_todo = None
return 0, None
while (not self.cursor_done and
batch.size() < batch_max_size and
batch.bytes < batch_max_bytes):
if not cursor:
if not db_kv_names:
self.cursor_done = True
self.cursor_todo = None
db_name, kv_name = db_kv_names.pop()
logging.debug(" MBFSource db/kv table: %s/%s" %
(db_name, kv_name))
cursor = db.cursor()
cursor.execute(sql.format(db_name, kv_name))
self.cursor_todo = (db, sql, db_kv_names, cursor)
row = cursor.fetchone()
if row:
vbucket_id = row[0]
key = row[1]
flg = row[2]
exp = row[3]
cas = row[4]
val = row[5]
if self.skip(key, vbucket_id):
meta = ''
vbucket_id, key, flg, exp, cas, meta, val), len(val))
self.cursor_todo = (db, sql, db_kv_names, None)
break # Close the batch; next pass hits new db_name/kv_name.
except Exception, e:
self.cursor_done = True
self.cursor_todo = None
return "error: MBFSource exception: " + str(e), None
return 0, batch
def connect_db(self):
db = sqlite3.connect(':memory:')
logging.debug(" MBFSource connect_db: %s" % self.spec)
db_files = MBFSource.db_files(self.spec)
logging.debug(" MBFSource db_files: %s" % db_files)
attached_dbs = ["db{0}".format(i) for i in xrange(len(db_files))]
db.executemany("attach ? as ?", zip(db_files, attached_dbs))
# Find all tables, filling a table_name => db_name map.
table_dbs = {}
for db_name in attached_dbs:
cursor = db.cursor()
cursor.execute("SELECT name FROM %s.sqlite_master"
" WHERE type = 'table'" % db_name)
for (table_name,) in cursor:
table_dbs.setdefault(table_name, []).append(db_name)
if not filter(lambda table_name: table_name.startswith("kv_"),
return "error: no kv data was found;" + \
" check if db files are correct", None, None, None
logging.debug(" MBFSource total # tables: %s" % len(table_dbs))
return 0, db, attached_dbs, table_dbs
Jump to Line
Something went wrong with that request. Please try again.