/
fix_proxy_urls.py
83 lines (61 loc) · 2.05 KB
/
fix_proxy_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Replaces proxy domains from Wikipedia Library.
See https://en.wikipedia.org/wiki/User:BsoykaBot/Task_2 for more info.
"""
import logging
import pywikibot
from loguru import logger
from pywikibot import pagegenerators
__version__ = "0.3.0"
class InterceptHandler(logging.Handler):
"""Intercept standard logging messages toward Loguru."""
def emit(self, record):
"""Send standard logging messages to Loguru."""
# Get corresponding Loguru level if it exists.
try:
level = logger.level(record.levelname).name
except ValueError:
level = record.levelno
# Forward the message
logger.opt(depth=6, exception=record.exc_info).log(level, record.getMessage())
logging.basicConfig(handlers=[InterceptHandler()], level=0)
DOMAINS = {
"www.newspapers.com",
"doi.org",
"www.jstor.org",
"www.loebclassics.com",
"www.cambridge.org",
"onlinelibrary.wiley.com",
"heinonline.org",
}
REPLACEMENTS = {}
for domain in DOMAINS:
REPLACEMENTS[domain.replace(".", "-") + ".wikipedialibrary.idm.oclc.org"] = domain
REPLACEMENTS[domain + ".wikipedialibrary.idm.oclc.org"] = domain
def process_page(page: pywikibot.Page) -> None:
"""Process a page."""
text = page.text
for proxy_string, replacement in REPLACEMENTS.items():
text = text.replace(proxy_string, replacement)
if text != page.text:
page.text = text
page.save(
summary=f"Replacing [[WP:TWL|TWL]] proxy links ([[User:BsoykaBot/Task 2|Task 2]], v{__version__})",
minor=True,
)
def main():
"""Main script function."""
pages_to_edit: set[pywikibot.Page] = set()
for proxy_string in REPLACEMENTS:
new_pages = set(
pagegenerators.SearchPageGenerator(
f'insource:"{proxy_string}"',
namespaces={
0,
},
)
)
pages_to_edit.update(new_pages)
for page in pages_to_edit:
process_page(page)
if __name__ == "__main__":
main()