Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
148 lines (125 sloc) 6.72 KB
import argparse
import datetime
import mwparserfromhell
from operator import itemgetter
import pywikibot
from pywikibot.data.api import Request
import re
import sys
from clint.textui import progress
THREE_MONTHS_AGO = (datetime.datetime.today() - datetime.timedelta(60)).strftime("%Y-%m-%d")
#USER_TEMPLATE = re.compile(ur"{{.*?[Uu]ser.*?\|(.+?)(?:\|.*?)?}}", re.UNICODE)
USER_LINK = re.compile(ur"\[\[[Uu]ser.*?:(.+?)(?:/.*)?(?:\|.*?)?\]\]", re.UNICODE)
TEMPLATE = u"{}\n===Inactive participants===\n''Generated by a [[User:APersonBot|bot]]''\n{}"
SUMMARY = "[[Wikipedia:Bots/Requests for approval/APersonBot 9|Bot]] testing a bot"
PARTICIPANTS = ("Participants", "Members")
LIST_ITEM = re.compile(ur"(\*|#).+")
def is_user_active(site, username):
"""Tests if a given user is active."""
contributions = Request(site=site, action="query", list="usercontribs", ucuser=username, uclimit=1, ucprop="timestamp").submit()[u"query"][u"usercontribs"]
return bool(contributions) and contributions[0][u"timestamp"].partition("T")[0] > THREE_MONTHS_AGO
def line_to_username(line):
"""Given a line of wikitext, extract a username."""
#user_template_match = USER_TEMPLATE.search(line)
#if user_template_match:
# return user_template_match.group(1)
user_link_match = USER_LINK.search(line)
if user_link_match:
return user_link_match.group(1)
return None
first = itemgetter(0)
def wikitext_to_usernames(wikitext):
"""Given some wikitext, return a tuple of (username, line)'s."""
return filter(first, ((line_to_username(l), l) for l in wikitext.splitlines()))
def update_participants_list(site, wikitext):
"""
Given a Pywikibot Site and some wikitext containing a participants list,
create an "Inactive participants" section and move users there as needed.
"""
# Obtain the region of wikitext that contains usernames
first_line = next(x for x in wikitext.splitlines() if line_to_username(x))
last_line = next(x for x in reversed(wikitext.splitlines()) if line_to_username(x))
old_participants_list = wikitext[wikitext.find(first_line):wikitext.find(last_line) + len(last_line)]
# If the page doesn't use a numbered or bulleted list, we really shouldn't parse it
if not LIST_ITEM.match(first_line):
print("Error! I can't recognize that list format.")
return wikitext
activity_lists = {True: [], False: []}
for username, line in progress.bar(wikitext_to_usernames(wikitext)):
activity_lists[is_user_active(site, username)].append(line)
active_users = "\n".join(activity_lists[True])
inactive_users = "\n".join(activity_lists[False])
new_participants_list = TEMPLATE.format(unicode(active_users),
unicode(inactive_users))
return wikitext.replace(old_participants_list, new_participants_list)
def locate_participants_list(page):
"""
Gets the text of a participants list in the given page. Also searches in
specially-named pages transcluded in the given page. Returns a tuple of
the form (wikitext, page), where wikitext is the text of the list and page
is the pywikibot.Page where it came from.
"""
if any(x in page.title(withNamespace=False) for x in PARTICIPANTS):
print("Entire page is a participants list. Parsing...")
return (page.text, page)
else:
# Locate a "Participants" section and try to parse that.
wikicode = mwparserfromhell.parse(page.text)
sections = wikicode.get_sections(include_lead=False, levels=(2,))
is_participants_section = lambda s:get_section_title(s) in PARTICIPANTS
sections = filter(is_participants_section, sections)
if sections:
if len(sections) == 1:
print("Found a section with a participants list. Parsing...")
section = sections[0]
section_text = unicode(section)
# Verify that the structure of this section is simple
if "===" in section_text:
subsections = section.get_sections(include_lead=False)
subsection_titles = map(get_section_title, subsections)
subsection_titles = subsection_titles[1:] # Forget the main title
for subsection_title in subsection_titles:
subsection_title = subsection_title.lower()
if "active" not in subsection_title and "inactive" not in subsection_title:
print("Error! Participants list structure is too complicated (found title: {})".format(subsection_title))
break
else:
return (section_text, page)
else:
print("Error! Multiple participants sections found.")
else:
# Is there a participants subpage transclusion?
templates = wikicode.filter_templates()
is_participants_tpl = lambda t:any("/" + x in str(t.name) for x in PARTICIPANTS)
templates = filter(is_participants_tpl, templates)
if templates:
template = templates[0]
print("Found a template titled \"{}\". Parsing...".format(template.name))
template = pywikibot.Page(site, str(template.name))
return (template.text, template)
def get_section_title(section):
"""Given a mwparserfromhell Section, get the title."""
return section.filter_headings()[0].title.strip()
def main():
site = pywikibot.Site("en", "wikipedia")
site.login()
parser = argparse.ArgumentParser()
parser.add_argument("page", help="The title (with namespace) of the page to process.")
args = parser.parse_args()
page = pywikibot.Page(site, args.page)
if not page.exists():
print("%s doesn't exist! Exiting." % args.page)
sys.exit(1)
list_text, page = locate_participants_list(page)
original_page_text = page.text
new_list_text = update_participants_list(site, list_text)
page.text = page.text.replace(list_text, new_list_text)
if original_page_text == page.text:
print("Nothing changed; exiting.")
sys.exit(0)
sandbox_page = pywikibot.Page(site, "User:APersonBot/sandbox/Task 9/" + page.title(withNamespace=True))
sandbox_page.text = "{{{{mbox|text=Results of [https://github.com/APerson241/APersonBot/blob/master/update-participants/update-participants.py update-participants] for [[{}]], run at {}. (Edit, remove the surrounding <nowiki>'s, and preview to see the list as it would normally appear.)}}}}\n".format(page.title(withNamespace=True), datetime.datetime.utcnow().isoformat())
sandbox_page.text += u"<nowiki>{}</nowiki>".format(page.text)
sandbox_page.save(summary=SUMMARY)
if __name__ == "__main__":
main()