forked from SirComputer1/SCBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unlink.py
202 lines (182 loc) · 7.44 KB
/
unlink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bot unlinks a page on every page that links to it.
This script understands this command-line argument:
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
want to iterate over all user pages starting at User:M, use
-start:User:M.
All other parameters will be regarded as part of the title of the page that
should be unlinked.
Example:
python unlink.py Foo bar -namespace:0 -namespace:6
Removes links to the page [[Foo bar]] in articles and image descriptions.
"""
#
# (C) Pywikibot team, 2007-2013
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
#
import re
import wikipedia as pywikibot
import pagegenerators
import editarticle
from pywikibot import i18n
class UnlinkBot:
def __init__(self, pageToUnlink, namespaces, always):
self.pageToUnlink = pageToUnlink
gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
self.generator = pagegenerators.PreloadingGenerator(gen)
linktrail = pywikibot.getSite().linktrail()
# The regular expression which finds links. Results consist of four
# groups:
#
# group title is the target page title, that is, everything
# before | or ].
#
# group section is the page section.
# It'll include the # to make life easier for us.
#
# group label is the alternative link title, that's everything
# between | and ].
#
# group linktrail is the link trail, that's letters after ]] which are
# part of the word.
# note that the definition of 'letter' varies from language to language.
self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
% linktrail)
self.always = always
self.done = False
self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
self.pageToUnlink.title())
def handleNextLink(self, text, match, context=100):
"""
Returns a tuple (text, jumpToBeginning).
text is the unicode string after the current link has been processed.
jumpToBeginning is a boolean which specifies if the cursor position
should be reset to 0. This is required after the user has edited the
article.
"""
# ignore interwiki links and links to sections of the same page as well
# as section links
if not match.group('title') \
or self.pageToUnlink.site().isInterwikiLink(match.group('title')) \
or match.group('section'):
return text, False
linkedPage = pywikibot.Page(self.pageToUnlink.site(),
match.group('title'))
# Check whether the link found is to the current page itself.
if linkedPage != self.pageToUnlink:
# not a self-link
return text, False
else:
# at the beginning of the link, start red color.
# at the end of the link, reset the color to default
if self.always:
choice = 'a'
else:
pywikibot.output(
text[max(0, match.start() - context):match.start()]
+ '\03{lightred}' + text[match.start():match.end()]
+ '\03{default}' + text[match.end():match.end() + context])
choice = pywikibot.inputChoice(
u'\nWhat shall be done with this link?\n',
['unlink', 'skip', 'edit', 'more context',
'unlink all', 'quit'],
['U', 's', 'e', 'm', 'a', 'q'], 'u')
pywikibot.output(u'')
if choice == 's':
# skip this link
return text, False
elif choice == 'e':
editor = editarticle.TextEditor()
newText = editor.edit(text, jumpIndex=match.start())
# if user didn't press Cancel
if newText:
return newText, True
else:
return text, True
elif choice == 'm':
# show more context by recursive self-call
return self.handleNextLink(text, match,
context=context + 100)
elif choice == 'a':
self.always = True
elif choice == 'q':
self.done = True
return text, False
new = match.group('label') or match.group('title')
new += match.group('linktrail')
return text[:match.start()] + new + text[match.end():], False
def treat(self, page):
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
try:
oldText = page.get()
text = oldText
curpos = 0
while curpos < len(text):
match = self.linkR.search(text, pos=curpos)
if not match:
break
# Make sure that next time around we will not find this same
# hit.
curpos = match.start() + 1
text, jumpToBeginning = self.handleNextLink(text, match)
if jumpToBeginning:
curpos = 0
if oldText == text:
pywikibot.output(u'No changes necessary.')
else:
pywikibot.showDiff(oldText, text)
page.put(text, self.comment)
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist?!"
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True))
def run(self):
for page in self.generator:
if self.done:
break
self.treat(page)
def main():
# This temporary array is used to read the page title if one single
# page that should be unlinked.
pageTitle = []
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
always = False
for arg in pywikibot.handleArgs():
if arg.startswith('-namespace:'):
try:
namespaces.append(int(arg[11:]))
except ValueError:
namespaces.append(arg[11:])
elif arg == '-always':
always = True
else:
pageTitle.append(arg)
if pageTitle:
page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
bot = UnlinkBot(page, namespaces, always)
bot.run()
else:
pywikibot.showHelp('unlink')
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()