Skip to content
This repository has been archived by the owner on Jun 15, 2018. It is now read-only.

Commit

Permalink
Merge pull request #16 from esby/master
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Weller committed Jun 28, 2012
2 parents d80faea + d4d9692 commit bdcbb38
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 26 deletions.
53 changes: 41 additions & 12 deletions src/mw/clicommands.py
Expand Up @@ -77,12 +77,15 @@ def _die_if_no_init(self):
if self.metadir.config is None:
print '%s: not a mw repo' % self.me
sys.exit(1)
self.api_setup = False

def _api_setup(self):
cookie_filename = os.path.join(self.metadir.location, 'cookies')
self.api_url = self.metadir.config.get('remote', 'api_url')
self.api = simplemediawiki.MediaWiki(self.api_url,
if not self.api_setup: # do not call _api_setup twice
cookie_filename = os.path.join(self.metadir.location, 'cookies')
self.api_url = self.metadir.config.get('remote', 'api_url')
self.api = simplemediawiki.MediaWiki(self.api_url,
cookie_file=cookie_filename)
self.api_setup = True


class InitCommand(CommandBase):
Expand Down Expand Up @@ -129,6 +132,7 @@ def __init__(self):
usage = '[options] PAGENAME ...'
CommandBase.__init__(self, 'pull_commandat', 'add remote pages to repo '
'belonging to the given category', usage)
self.query_continue = ''

def _do_command(self):
self._die_if_no_init()
Expand All @@ -142,14 +146,31 @@ def _do_command(self):
'generator': 'categorymembers',
'gcmlimit': 500
}
response = self.api.call(data)['query']['pages']
for pageid in response.keys():
pagename = response[pageid]['title']
print pagename
if self.query_continue != '':
data['gcmcontinue'] = self.query_continue

api_call = self.api.call(data)
if 'query-continue' in api_call:
self.query_continue = api_call['query-continue']['categorymembers']['gcmcontinue']
else:
self.query_continue = ''
if api_call != [] :

response = api_call['query']['pages']
pull_command = PullCommand()
pull_command.args = [pagename.encode('utf-8')]
pull_command.args = []

for pageid in response.keys():
pagename = response[pageid]['title']
pull_command.args += [pagename.encode('utf-8')]

pull_command._do_command()

if self.query_continue != '':
print 'query continue detected - continuing the query'
self._do_command()



class PullCommand(CommandBase):

Expand Down Expand Up @@ -283,7 +304,7 @@ def _do_command(self):
os.rename(full_filename, full_filename + '.local')
# pull wiki copy
pull_command = PullCommand()
pull_command.args = [pagename.encode('utf-8')]
pull_command.args = [pagename]#.encode('utf-8')] #assuming the file is already using utf-8 - esby
pull_command._do_command()
# mv remote to filename.wiki.remote
os.rename(full_filename, full_filename + '.remote')
Expand All @@ -297,7 +318,7 @@ def _do_command(self):
os.remove(full_filename + '.remote')
# mw ci pagename
commit_command = CommitCommand()
commit_command.args = [pagename.encode('utf-8')]
commit_command.args = [pagename]#.encode('utf-8')] #assuming the file is already using utf-8 - esby
commit_command._do_command()


Expand Down Expand Up @@ -334,6 +355,7 @@ def _do_command(self):
edit_summary = self.options.edit_summary
for filename in status:
if status[filename] in ['M']:
start_time = time.time()
files_to_commit -= 1
# get edit token
data = {
Expand Down Expand Up @@ -406,8 +428,15 @@ def _do_command(self):
data = data.encode('utf-8')
fd.write(data)
if files_to_commit :
print 'waiting 3s before processing the next file'
time.sleep(3)
end_time = time.time()
print time.strftime("%Y-%m-%d - %H:%M:%S", time.gmtime(time.time())) \
+ " - Committed - " + mw.metadir.filename_to_pagename(filename[:-5]) \
+ " - Files left: " + str(files_to_commit)
time_inc = end_time - start_time
delay = 10 - time_inc
if delay > 0 :
print "adjusting throttle - waiting for %.2fs" % delay
time.sleep(delay)
else:
print 'error: committing %s failed: %s' % \
(filename, response['edit']['result'])
73 changes: 59 additions & 14 deletions src/mw/metadir.py
Expand Up @@ -23,6 +23,7 @@
import os
from StringIO import StringIO
import sys
import hashlib


class Metadir(object):
Expand All @@ -45,6 +46,13 @@ def __init__(self):
os.path.isfile(self.config_loc):
self.config = ConfigParser.RawConfigParser()
self.config.read(self.config_loc)
self.use_md5 = False
if self.config.has_option('index', 'use_md5'):
self.use_md5 = ( self.config.get('index', 'use_md5') == 'on' )
md5path = os.path.join(self.location, 'cache', 'md5index')
if self.use_md5 and not os.path.exists(md5path):
os.mkdir(md5path, 0755)
self.pagedict_loaded = False
else:
self.config = None

Expand All @@ -69,13 +77,20 @@ def create(self, api_url):
self.config.set('remote', 'api_url', api_url)
self.config.add_section('merge')
self.config.set('merge', 'tool', 'kidff3 %s %s -o %s')
self.config.add_section('index')
self.config.set('index', 'use_md5','on')
self.save_config()
# create cache/
os.mkdir(os.path.join(self.location, 'cache'))
# create cache/pagedict
fd = file(os.path.join(self.location, 'cache', 'pagedict'), 'w')
fd.write(json.dumps({}))
fd.close()

# structure replacement for pagedict
# will also be created if use_md5 is turned on with an existing project
os.mkdir(os.path.join(self.location, 'cache', 'md5index'), 0755)

# create cache/pages/
os.mkdir(os.path.join(self.location, 'cache', 'pages'), 0755)

Expand All @@ -88,23 +103,53 @@ def clean_page(self, pagename):
fd.write(cur_content.encode('utf-8'))
fd.close()

def pagedict_load(self):
if not self.pagedict_loaded:
fd = file(os.path.join(self.location, 'cache', 'pagedict'), 'r+')
self.pagedict = json.loads(fd.read())
fd.close
self.pagedict_loaded = True

def get_md5_from_pagename(self, pagename):
m = hashlib.md5()
name = pagename.encode('unicode_escape')
m.update(name)
return os.path.join(self.location, 'cache', 'md5index', m.hexdigest())

def pagedict_add(self, pagename, pageid, currentrv):
fd = file(os.path.join(self.location, 'cache', 'pagedict'), 'r+')
pagedict = json.loads(fd.read())
pagedict[pagename] = {'id': int(pageid), 'currentrv': int(currentrv)}
fd.seek(0)
fd.write(json.dumps(pagedict))
fd.truncate()
fd.close()
if not self.use_md5:
self.pagedict_load()
self.pagedict[pagename] = {'id': int(pageid), 'currentrv': int(currentrv)}
fd = file(os.path.join(self.location, 'cache', 'pagedict'), 'w')
fd.write(json.dumps(self.pagedict))
fd.truncate()
fd.close()
else: # feeding the new index structure
md5pagename = self.get_md5_from_pagename(pagename)
page = {}
page[pagename] = {'id': int(pageid), 'currentrv': int(currentrv)}
fd = file(md5pagename , 'w')
fd.write(json.dumps(page))
fd.truncate()
fd.close()

def get_pageid_from_pagename(self, pagename):
fd = file(os.path.join(self.location, 'cache', 'pagedict'), 'r')
pagedict = json.loads(fd.read())
pagename = pagename.decode('utf-8')
if pagename in pagedict.keys():
return pagedict[pagename]
else:
return None
if not self.use_md5:
self.pagedict_load()
pagename = pagename.decode('utf-8')
if pagename in self.pagedict.keys():
return self.pagedict[pagename]
else:
return None
else: # feeding the new index structure
pagename = pagename.decode('utf-8')
md5pagename = self.get_md5_from_pagename(pagename)
if os.path.isfile(md5pagename):
fd = file(md5pagename, 'r+')
page = json.loads(fd.read())
return page[pagename]
else:
return None

def pages_add_rv(self, pageid, rv):
pagefile = os.path.join(self.location, 'cache', 'pages', str(pageid))
Expand Down

0 comments on commit bdcbb38

Please sign in to comment.