-
Notifications
You must be signed in to change notification settings - Fork 0
Add support to migrate wiki from PostgreSQL dump files. #12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,7 @@ | ||
| config.py | ||
| config-chevah.py | ||
| *.psql | ||
| *.db3 | ||
|
|
||
| # Byte-compiled / optimized / DLL files | ||
| __pycache__/ | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,39 +1,56 @@ | ||
| # Script to migrate the wiki to a repo. | ||
| """ | ||
| Generate local wiki files based on Trac DB file. | ||
|
|
||
| Accepted DB formats: | ||
| * SQlite3 DB (.db3) | ||
| * PSQL dump (.psql) | ||
| """ | ||
| import os | ||
| import sqlite3 | ||
| import subprocess | ||
| import sys | ||
| from datetime import datetime | ||
|
|
||
| from config import USER_MAPPING | ||
| from config import USER_MAPPING, DEFAULT_GITHUB_USER, FILE_EXTENSION | ||
|
|
||
| # Set to True to not commit. | ||
| DRY_RUN = False | ||
|
|
||
| DEFAULT_GITHUB_USER = USER_MAPPING['adi'] | ||
|
|
||
|
|
||
| # Wiki names to file names. | ||
| PAGE_NAME_MAPPING = { | ||
| 'WikiStart': 'Home', | ||
| } | ||
| } | ||
|
|
||
| FILE_EXTENSION = '.rst' | ||
|
|
||
| def main(): | ||
| def main(args): | ||
| """ | ||
| Do the job. | ||
| """ | ||
|
|
||
| if len(sys.argv) != 3: | ||
| print("Need to pass the path to Trac DB and git repo as arguments.") | ||
| if len(args) != 2: | ||
| print("Need to pass the path to DB file and git repo as arguments.") | ||
| sys.exit(1) | ||
|
|
||
| db = sqlite3.connect(sys.argv[1]) | ||
| db_file = args[0] | ||
| target_repo = args[1] | ||
|
|
||
| if db_file.endswith('.db3'): | ||
| return _migrate_sqlite(db_file, target_repo) | ||
|
|
||
| if db_file.endswith('.psql'): | ||
| return _migrate_pq_dump(db_file, target_repo) | ||
|
|
||
|
|
||
| def _migrate_sqlite(db_file, target_repo): | ||
| """ | ||
| Generate files based on SQLite3 db file. | ||
| """ | ||
| db = sqlite3.connect(db_file) | ||
|
|
||
| start_dir = os.getcwd() | ||
| try: | ||
| os.chdir(sys.argv[2]) | ||
| os.chdir(target_repo) | ||
|
|
||
| for row in db.execute('SELECT * FROM wiki ORDER BY time'): | ||
| name, version, timestamp, author, ipnr, text, comment, ro = row | ||
|
|
@@ -47,6 +64,81 @@ def main(): | |
| write_file(name, text) | ||
| commit_change(name, author, comment, timestamp / 1000000) | ||
|
|
||
| finally: | ||
| os.chdir(start_dir) | ||
|
|
||
|
|
||
| def _migrate_pq_dump(db_file, target_repo): | ||
| """ | ||
| Generate files based on pg_dump file. | ||
|
|
||
| pg_dump --no-owner --data-only --file=trac-wiki.dump --table=wiki trac | ||
| """ | ||
|
|
||
| copy_started = False | ||
| # The dump is not sorted by timestamp, so we need to manually sort it | ||
| # and keep all pages in memory. | ||
| changes = [] | ||
|
|
||
| with open(db_file, 'rb') as stream: | ||
| while True: | ||
| line = stream.readline() | ||
|
|
||
| if not line: | ||
| # End of stream. | ||
| # Most likely this is not heat. | ||
| break | ||
|
|
||
| if line == b'\\.\n': | ||
| # End of COPY dump. | ||
| break | ||
|
|
||
| if line.startswith(b'COPY '): | ||
| # We can start to process the next line. | ||
| copy_started = True | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this works, then it's fine! But I couldn't try it out or see the output, because I don't have a dump. |
||
| continue | ||
|
|
||
| if not copy_started: | ||
| # We are still in the header | ||
| continue | ||
|
|
||
| line = line.decode('utf-8') | ||
|
|
||
| name, version, timestamp, author, ipnr, rest = line.split('\t', 5) | ||
| text, comment, ro = rest.rsplit('\t', 2) | ||
|
|
||
| if author == 'trac': | ||
| # This is internal trac update. | ||
| continue | ||
|
|
||
| timestamp = int(timestamp) | ||
| name = get_page_name(name) | ||
|
|
||
| text = text.replace('\\r\\n', '\r\n') | ||
| text = text.replace('\\n', '\n') | ||
| changes.append({ | ||
| 'name': name, | ||
| 'timestamp': timestamp, | ||
| 'author': author, | ||
| 'text': text, | ||
| 'comment': comment, | ||
| }) | ||
|
|
||
| start_dir = os.getcwd() | ||
| try: | ||
| os.chdir(target_repo) | ||
|
|
||
| for change in sorted(changes, key=lambda k: k['timestamp']): | ||
|
|
||
| print("Adding", change['name']) | ||
|
|
||
| write_file(change['name'], change['text']) | ||
| commit_change( | ||
| change['name'], | ||
| change['author'], | ||
| change['comment'], | ||
| change['timestamp'] / 1000000, | ||
| ) | ||
|
|
||
| finally: | ||
| os.chdir(start_dir) | ||
|
|
@@ -75,15 +167,17 @@ def commit_change(path, author, comment, timestamp): | |
| """ | ||
| Commit the current file. | ||
| """ | ||
| try: | ||
| git_user, git_author = USER_MAPPING.get(author, DEFAULT_GITHUB_USER) | ||
| except: | ||
| import pdb; import sys; sys.stdout = sys.__stdout__; pdb.set_trace() | ||
| default_user = DEFAULT_GITHUB_USER | ||
| if not default_user: | ||
| # Create a default git user on the fly if one is not configured. | ||
| default_user = (author, '{} <anonymous@example.com>'.format(author)) | ||
|
|
||
| git_user, git_author = USER_MAPPING.get(author, default_user) | ||
|
|
||
| name = path.rsplit(' ', 1)[-1] | ||
|
|
||
| if comment: | ||
| message = comment + ' ' + name + ' modified by ' + git_user | ||
| message = comment + ' ' + name + ' modified by ' + git_user | ||
| else: | ||
| message = name + ' modified by ' + git_user | ||
|
|
||
|
|
@@ -101,5 +195,6 @@ def commit_change(path, author, comment, timestamp): | |
| '--date=' + git_date, | ||
| ]) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() | ||
| main(sys.argv[1:]) | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't quite get this.
But I think step 1 does something different (the title below is "Convert to git repo"). Maybe step 1 should be "Convert the Trac wiki as-is into a Git repository with history."