In [1]:
import requests
import mwparserfromhell


In [2]:

API_URL    = "https://fr.wikipedia.org/w/api.php"
USER_AGENT = "CitationStatsBot/1.0 (mailto:ton.email@example.com)"



In [3]:

def citation_stats_with_mwpfh(page_title: str):
    # 1) Récupère le wikitexte
    session = requests.Session()
    session.headers.update({"User-Agent": USER_AGENT})
    params = {
        "action":  "query",
        "format":  "json",
        "prop":    "revisions",
        "rvprop":  "content",
        "rvslots": "main",
        "titles":  page_title
    }
    resp = session.get(API_URL, params=params)
    resp.raise_for_status()
    pages = resp.json()["query"]["pages"]
    wikitext = next(iter(pages.values()))["revisions"][0]["slots"]["main"]["*"]
    
    # 2) Parse avec mwparserfromhell
    wikicode = mwparserfromhell.parse(wikitext)
    
    # 3) Compte des templates non sourcés, incluant {{refnec}}
    cn_templates = {
        "référence nécessaire",
        "source nécessaire",
        "refnec"                # ajout du template court
    }
    cn_count = 0
    for tpl in wikicode.filter_templates():
        name = tpl.name.strip().lower()
        if name in cn_templates:
            cn_count += 1
    
    # 4) Compte des balises <ref>…</ref>
    ref_count = len(wikicode.filter_tags(matches=lambda node: node.tag == "ref"))
    
    # 5) Calcul de la proportion
    pct = (cn_count / ref_count * 100) if ref_count else 0.0
    
    return {
        "page":            page_title,
        "citation_needed": cn_count,
        "references":      ref_count,
        "percent_cn":      pct
    }


In [4]:

if __name__ == "__main__":
    page  = "Sécession"
    stats = citation_stats_with_mwpfh(page)
    print(f"Page : {stats['page']}")
    print(f"• Passages non sourcés (templates) : {stats['citation_needed']}")
    print(f"• Balises <ref> (références)        : {stats['references']}")
    print(f"→ {stats['percent_cn']:.1f}% de passages sans source")




Page : Sécession
• Passages non sourcés (templates) : 2
• Balises <ref> (références)        : 3
→ 66.7% de passages sans source
