import argparse
import datetime
import mwparserfromhell
from operator import itemgetter
import pywikibot
from import Request
import re
import sys
from clint.textui import progress
THREE_MONTHS_AGO = ( - datetime.timedelta(60)).strftime("%Y-%m-%d")
#USER_TEMPLATE = re.compile(ur"{{.*?[Uu]ser.*?\|(.+?)(?:\|.*?)?}}", re.UNICODE)
USER_LINK = re.compile(ur"\[\[[Uu]ser.*?:(.+?)(?:/.*)?(?:\|.*?)?\]\]", re.UNICODE)
TEMPLATE = u"{}\n===Inactive participants===\n''Generated by a [[User:APersonBot|bot]]''\n{}"
SUMMARY = "[[Wikipedia:Bots/Requests for approval/APersonBot 9|Bot]] testing a bot"
PARTICIPANTS = ("Participants", "Members")
LIST_ITEM = re.compile(ur"(\*|#).+")
def is_user_active(site, username):
"""Tests if a given user is active."""
contributions = Request(site=site, action="query", list="usercontribs", ucuser=username, uclimit=1, ucprop="timestamp").submit()[u"query"][u"usercontribs"]
return bool(contributions) and contributions[0][u"timestamp"].partition("T")[0] > THREE_MONTHS_AGO
def line_to_username(line):
"""Given a line of wikitext, extract a username."""
#user_template_match =
#if user_template_match:
# return
user_link_match =
if user_link_match:
return None
first = itemgetter(0)
def wikitext_to_usernames(wikitext):
"""Given some wikitext, return a tuple of (username, line)'s."""
return filter(first, ((line_to_username(l), l) for l in wikitext.splitlines()))
def update_participants_list(site, wikitext):
Given a Pywikibot Site and some wikitext containing a participants list,
create an "Inactive participants" section and move users there as needed.
# Obtain the region of wikitext that contains usernames
first_line = next(x for x in wikitext.splitlines() if line_to_username(x))
last_line = next(x for x in reversed(wikitext.splitlines()) if line_to_username(x))
old_participants_list = wikitext[wikitext.find(first_line):wikitext.find(last_line) + len(last_line)]
# If the page doesn't use a numbered or bulleted list, we really shouldn't parse it
if not LIST_ITEM.match(first_line):
print("Error! I can't recognize that list format.")
return wikitext
activity_lists = {True: [], False: []}
for username, line in
activity_lists[is_user_active(site, username)].append(line)
active_users = "\n".join(activity_lists[True])
inactive_users = "\n".join(activity_lists[False])
new_participants_list = TEMPLATE.format(unicode(active_users),
return wikitext.replace(old_participants_list, new_participants_list)
def locate_participants_list(page):
Gets the text of a participants list in the given page. Also searches in
specially-named pages transcluded in the given page. Returns a tuple of
the form (wikitext, page), where wikitext is the text of the list and page
is the pywikibot.Page where it came from.
if any(x in page.title(withNamespace=False) for x in PARTICIPANTS):
print("Entire page is a participants list. Parsing...")
return (page.text, page)
# Locate a "Participants" section and try to parse that.
wikicode = mwparserfromhell.parse(page.text)
sections = wikicode.get_sections(include_lead=False, levels=(2,))
is_participants_section = lambda s:get_section_title(s) in PARTICIPANTS
sections = filter(is_participants_section, sections)
if sections:
if len(sections) == 1:
print("Found a section with a participants list. Parsing...")
section = sections[0]
section_text = unicode(section)
# Verify that the structure of this section is simple
if "===" in section_text:
subsections = section.get_sections(include_lead=False)
subsection_titles = map(get_section_title, subsections)
subsection_titles = subsection_titles[1:] # Forget the main title
for subsection_title in subsection_titles:
subsection_title = subsection_title.lower()
if "active" not in subsection_title and "inactive" not in subsection_title:
print("Error! Participants list structure is too complicated (found title: {})".format(subsection_title))
return (section_text, page)
print("Error! Multiple participants sections found.")
# Is there a participants subpage transclusion?
templates = wikicode.filter_templates()
is_participants_tpl = lambda t:any("/" + x in str( for x in PARTICIPANTS)
templates = filter(is_participants_tpl, templates)
if templates:
template = templates[0]
print("Found a template titled \"{}\". Parsing...".format(
template = pywikibot.Page(site, str(
return (template.text, template)
def get_section_title(section):
"""Given a mwparserfromhell Section, get the title."""
return section.filter_headings()[0].title.strip()
def main():
site = pywikibot.Site("en", "wikipedia")
parser = argparse.ArgumentParser()
parser.add_argument("page", help="The title (with namespace) of the page to process.")
args = parser.parse_args()
page = pywikibot.Page(site,
if not page.exists():
print("%s doesn't exist! Exiting." %
list_text, page = locate_participants_list(page)
original_page_text = page.text
new_list_text = update_participants_list(site, list_text)
page.text = page.text.replace(list_text, new_list_text)
if original_page_text == page.text:
print("Nothing changed; exiting.")
sandbox_page = pywikibot.Page(site, "User:APersonBot/sandbox/Task 9/" + page.title(withNamespace=True))
sandbox_page.text = "{{{{mbox|text=Results of [ update-participants] for [[{}]], run at {}. (Edit, remove the surrounding <nowiki>'s, and preview to see the list as it would normally appear.)}}}}\n".format(page.title(withNamespace=True), datetime.datetime.utcnow().isoformat())
sandbox_page.text += u"<nowiki>{}</nowiki>".format(page.text)
if __name__ == "__main__":