diff --git a/.gitignore b/.gitignore index 07379dd..47686e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ config.py +config-chevah.py +*.psql +*.db3 # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.rst b/README.rst index d6dd924..d94b650 100644 --- a/README.rst +++ b/README.rst @@ -1,24 +1,36 @@ trac-to-github ============== -Hacks used to migrate from Trac sqlite to GitHub. +Hacks used to migrate from Trac sqlite or Postgresql dump to GitHub. Works with Python 3.8. For wiki migration, you will need git available in your dev environment. +This is a 2 stage process: + +1. Convert the wiki pages using native Trac wiki content. + This is done to have better diffs + +2. Convert the last version of the each wiki page to ReStructuredText, + or any other format. + + +Convert to git repo +=================== + Create a virtualenv:: virtualenv build . build/bin/activate mv config.py.sample config.py +Modify the config.py values. -For wiki migration. All pages are generated into a flat file structure. Spaces are used instead of path separators:: - python wiki_migrate.py PATH/TO/Trac.DB PATH/TO/GIT-REPO + python wiki_migrate.py PATH/TO/Trac.db3 PATH/TO/GIT-REPO You might want to add a `_Sidebar.rst` file in the root with:: @@ -31,6 +43,10 @@ You might want to add a `_Sidebar.rst` file in the root with:: * ``_ + +Convert the content to RST +========================== + For wiki content conversion:: python wiki_trac_rst_convert.py PATH/TO/GIT-REPO diff --git a/config.py.sample b/config.py.sample index 87ee7f7..d8bbf09 100644 --- a/config.py.sample +++ b/config.py.sample @@ -4,3 +4,9 @@ USER_MAPPING = { } TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' + +# None or a tuple of (name, email) +# This is used for Trac user that don't have GitHub mapping. +DEFAULT_GITHUB_USER = None + +FILE_EXTENSION = '.mediawiki' diff --git a/wiki_migrate.py b/wiki_migrate.py index 65a7266..a66f720 100644 --- a/wiki_migrate.py +++ b/wiki_migrate.py @@ -1,39 +1,56 @@ -# Script to migrate the wiki to a repo. +""" +Generate local wiki files based on Trac DB file. + +Accepted DB formats: +* SQlite3 DB (.db3) +* PSQL dump (.psql) +""" import os import sqlite3 import subprocess import sys from datetime import datetime -from config import USER_MAPPING +from config import USER_MAPPING, DEFAULT_GITHUB_USER, FILE_EXTENSION # Set to True to not commit. DRY_RUN = False -DEFAULT_GITHUB_USER = USER_MAPPING['adi'] - # Wiki names to file names. PAGE_NAME_MAPPING = { 'WikiStart': 'Home', -} + } -FILE_EXTENSION = '.rst' -def main(): +def main(args): """ Do the job. """ - if len(sys.argv) != 3: - print("Need to pass the path to Trac DB and git repo as arguments.") + if len(args) != 2: + print("Need to pass the path to DB file and git repo as arguments.") sys.exit(1) - db = sqlite3.connect(sys.argv[1]) + db_file = args[0] + target_repo = args[1] + + if db_file.endswith('.db3'): + return _migrate_sqlite(db_file, target_repo) + + if db_file.endswith('.psql'): + return _migrate_pq_dump(db_file, target_repo) + + +def _migrate_sqlite(db_file, target_repo): + """ + Generate files based on SQLite3 db file. + """ + db = sqlite3.connect(db_file) start_dir = os.getcwd() try: - os.chdir(sys.argv[2]) + os.chdir(target_repo) for row in db.execute('SELECT * FROM wiki ORDER BY time'): name, version, timestamp, author, ipnr, text, comment, ro = row @@ -47,6 +64,81 @@ def main(): write_file(name, text) commit_change(name, author, comment, timestamp / 1000000) + finally: + os.chdir(start_dir) + + +def _migrate_pq_dump(db_file, target_repo): + """ + Generate files based on pg_dump file. + + pg_dump --no-owner --data-only --file=trac-wiki.dump --table=wiki trac + """ + + copy_started = False + # The dump is not sorted by timestamp, so we need to manually sort it + # and keep all pages in memory. + changes = [] + + with open(db_file, 'rb') as stream: + while True: + line = stream.readline() + + if not line: + # End of stream. + # Most likely this is not heat. + break + + if line == b'\\.\n': + # End of COPY dump. + break + + if line.startswith(b'COPY '): + # We can start to process the next line. + copy_started = True + continue + + if not copy_started: + # We are still in the header + continue + + line = line.decode('utf-8') + + name, version, timestamp, author, ipnr, rest = line.split('\t', 5) + text, comment, ro = rest.rsplit('\t', 2) + + if author == 'trac': + # This is internal trac update. + continue + + timestamp = int(timestamp) + name = get_page_name(name) + + text = text.replace('\\r\\n', '\r\n') + text = text.replace('\\n', '\n') + changes.append({ + 'name': name, + 'timestamp': timestamp, + 'author': author, + 'text': text, + 'comment': comment, + }) + + start_dir = os.getcwd() + try: + os.chdir(target_repo) + + for change in sorted(changes, key=lambda k: k['timestamp']): + + print("Adding", change['name']) + + write_file(change['name'], change['text']) + commit_change( + change['name'], + change['author'], + change['comment'], + change['timestamp'] / 1000000, + ) finally: os.chdir(start_dir) @@ -75,15 +167,17 @@ def commit_change(path, author, comment, timestamp): """ Commit the current file. """ - try: - git_user, git_author = USER_MAPPING.get(author, DEFAULT_GITHUB_USER) - except: - import pdb; import sys; sys.stdout = sys.__stdout__; pdb.set_trace() + default_user = DEFAULT_GITHUB_USER + if not default_user: + # Create a default git user on the fly if one is not configured. + default_user = (author, '{} '.format(author)) + + git_user, git_author = USER_MAPPING.get(author, default_user) name = path.rsplit(' ', 1)[-1] if comment: - message = comment + ' ' + name + ' modified by ' + git_user + message = comment + ' ' + name + ' modified by ' + git_user else: message = name + ' modified by ' + git_user @@ -101,5 +195,6 @@ def commit_change(path, author, comment, timestamp): '--date=' + git_date, ]) + if __name__ == '__main__': - main() + main(sys.argv[1:])