forked from redmine-gitlab-migrator/redmine-gitlab-migrator
/
wiki.py
173 lines (137 loc) · 7.71 KB
/
wiki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from git import Repo, Actor
import pypandoc
import logging
import re
import unicodedata
log = logging.getLogger(__name__)
class TextileConverter():
def __init__(self):
# make sure we use at least version 17 of pandoc
# TODO: fix this test, it will not work properly for version 1.2 or 1.100
version = pypandoc.get_pandoc_version()
if (version < "1.17"):
log.error('You need at least pandoc 1.17.0, download from http://pandoc.org/installing.html')
exit(1)
# precompile regular expressions
self.regexWikiLinkWithText = re.compile(r'\\\[\\\[\s*([^\]]*?)\s*\|\s*([^\]]*?)\s*\\\]\\\]')
self.regexWikiLinkWithoutText = re.compile(r'\\\[\\\[\s*([^\]]*?)\s*\\\]\\\]')
self.regexTipMacro = re.compile(r'\{\{tip\((.*?)\)\}\}')
self.regexNoteMacro = re.compile(r'\{\{note\((.*?)\)\}\}')
self.regexWarningMacro = re.compile(r'\{\{warning\((.*?)\)\}\}')
self.regexImportantMacro = re.compile(r'\{\{important\((.*?)\)\}\}')
self.regexAnyMacro = re.compile(r'\{\{(.*)\}\}')
self.regexCodeBlock = re.compile(r'\A ((.|\n)*)', re.MULTILINE)
self.regexCollapse = re.compile(r'({{collapse\s?\(([^)]+)\))(.*)(}})', re.MULTILINE | re.DOTALL)
self.regexParagraph = re.compile(r'p(\(+|(\)+)?>?|=)?\.', re.MULTILINE | re.DOTALL)
self.regexCodeHighlight = re.compile(r'(<code\s?(class=\"(.*)\")?>).*(</code>)', re.MULTILINE | re.DOTALL)
self.regexAttachment = re.compile(r'attachment:[\'\"“”‘’„”«»](.*)[\'\"“”‘’„”«»]', re.MULTILINE | re.DOTALL)
def wiki_link(self, match):
name = match.group(1)
if len(match.groups()) > 1:
text = match.group(2)
else:
text = name
name = self.normalize(name).replace(' ', '_')
return '[{}]({})'.format(text, name)
def normalize(self, title):
title = title.replace("ß", "ss")
title = title.replace("ä", "ae")
title = title.replace("ö", "oe")
title = title.replace("ü", "ue")
title = unicodedata.normalize('NFD', title).encode('ascii', 'ignore').decode('ascii')
return title
def convert(self, text):
text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])
collapseResults = re.findall(self.regexCollapse, text)
if len(collapseResults) > 0:
for i in range(0, len(collapseResults)):
text = text.replace(collapseResults[i][0], "<details>")
text = text.replace(collapseResults[i][2], "<summary>{}</summary> \n\n{}".format(collapseResults[i][1], collapseResults[i][2]))
text = text.replace(collapseResults[i][3], "</details>")
text = re.sub(self.regexParagraph, "", text)
# convert from textile to markdown
try:
text = pypandoc.convert_text(text, 'markdown_strict', format='textile')
# pandoc does not convert everything, notably the [[link|text]] syntax
# is not handled. So let's fix that.
# [[ wikipage | link_text ]] -> [link_text](wikipage)
text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)
# [[ link_url ]] -> [link_url](link_url)
text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)
# nested lists, fix at least the common issues
text = text.replace(" \\#\\*", " -")
text = text.replace(" \\*\\#", " 1.")
# Redmine is using '>' for blockquote, which is not textile
text = text.replace("> ", ">")
# wiki note macros
text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
# all other macros
text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)
# attachments in notes
text = re.sub(self.regexAttachment, r"\n\n*(Merged from Redmine, please check first note for attachment named **\1**)*", text, re.MULTILINE | re.DOTALL)
# code highlight
codeHighlights = re.findall(self.regexCodeHighlight, text)
if len(codeHighlights) > 0:
for i in range(0, len(codeHighlights)):
text = text.replace(codeHighlights[i][0], "\n```{}".format(codeHighlights[i][2].lower()))
text = text.replace(codeHighlights[i][3], "\n```")
except RuntimeError as e:
return False
return text
class WikiPageConverter():
"""
TODO:
* set author email address in git commit
* adjust issue numbers in links in case they do not match ("#123")
* check links to commits ("commit:01234abc") or changesets ("r123")
* make all wiki pages filenames lower-case and fix all links
* tables are sometimes not converted correctly.
* fix anything else that pandoc does not convert correctly.
NOTE: This was tested with pandoc 1.17.0.2 - it may not work as nice
(or badly? :-)) with other versions.
Redmine's Textile:
http://www.redmine.org/projects/redmine/wiki/RedmineTextFormattingTextile
"""
def __init__(self, local_repo_path):
self.repo_path = local_repo_path
self.repo = Repo(local_repo_path)
# make sure we use at least version 17 of pandoc
# TODO: fix this test, it will not work properly for version 1.2 or 1.100
version = pypandoc.get_pandoc_version()
if (version < "1.17"):
log.error('You need at least pandoc 1.17.0, download from http://pandoc.org/installing.html')
exit(1)
self.textile_converter = TextileConverter()
def convert(self, redmine_page):
title = self.textile_converter.normalize(redmine_page["title"])
if (title == 'Wiki'):
title = 'home'
print("Converting {} ({} version {})".format(title, redmine_page["title"], redmine_page["version"]))
text = redmine_page.get('text', "")
# create a copy of the original page (for comparison, will not be committed)
file_name = title + ".textile"
with open(self.repo_path + "/" + file_name, mode='wt', encoding='utf-8') as fd:
print(text, file=fd)
# replace some contents
text = text.replace("{{lastupdated_at}}", redmine_page["updated_on"])
text = text.replace("{{lastupdated_by}}", redmine_page["author"]["name"])
text = text.replace("[[PageOutline]]", "")
text = text.replace("{{>toc}}", "")
text = self.textile_converter.convert(text)
# save file with author/date
file_name = title + ".md"
with open(self.repo_path + "/" + file_name, mode='wt', encoding='utf-8') as fd:
print(text.replace('\n', "\n"), file=fd)
# todo: check for attachments
# todo: upload attachments
if redmine_page["comments"]:
commit_msg = redmine_page["comments"] + " (" + title + " v" + str(redmine_page["version"]) + ")";
else:
commit_msg = title + ", version " + str(redmine_page["version"]);
author = Actor(redmine_page["author"]["name"], "")
time = redmine_page["updated_on"].replace("T", " ").replace("Z", " +0000")
self.repo.index.add([file_name])
self.repo.index.commit(commit_msg, author=author, committer=author, author_date=time, commit_date=time)