In [81]:
example_text = '''
{{a|Ulster}} {{IPA|ga|/mˠaːsˠ/|/mˠaːʃ/|qual2=before {{m|ga|é}}, {{m|ga|ea}}, {{m|ga|í}}, {{m|ga|iad}} and their emphatic equivalents}}
{{a|Galway}} {{IPA|ga|/lʲoːbˠ/}} {{a|corresponding to the spelling {{m|ga|leob}}}}<ref>{{R:ga:Finck|I|196}}</ref><ref>{{R:ga:GCFD|308}}</ref>
{{IPA|ga|/n̪ˠõːsˠ/|ref={{R:ga:Quiggin|17}}}}
'''

In [82]:
REFERENCE_TEMPLATES = {
    "Quiggin": {
        "positional": ["page", "section"],
    },
    "Finck": {
        "positional": ["volume", "page"],
        "named": ["section"]
    },
    "SjPh": {
        "positional": ["page", "page_end"],
        "named": ["section"]
    },
    "Achill": {
        "named": ["page", "section"]
    },
    "GCFD": {
        "positional": ["section"],
        "named": ["page", "edition"]
    },
    "IGT i": {
        "positional": ["page", "page_end", "section", "section_end", "text"],
        "named": ["page", "pages", "section", "sections", "text"]
    },
    "IGT ii": {
        "positional": ["page", "page_end", "section", "section_end"],
        "named": ["page", "pages", "section", "sections", "part"]
    },
    "IGT iii": {
        "positional": ["page", "page_end", "section", "section_end"],
        "named": ["page", "pages", "section"]
    },
    "TBhU": {
        "positional": ["page"],
        "named": ["section", "passage"]
    },
    "Ó Searcaigh": {
        "positional": ["section"],
        "named": ["page"]
    },
    "Ó Dónaill": {
        "positional": ["term"]
    },
    "Muskerry": {
        "named": ["page", "section", "line"]
    },
    "Torr": {
        "named": ["page", "section"]
    },
    "Tourmakeady": {
        "named": ["page", "section"]
    },
    "Corpas": {
        "positional": ["search_type", "term"],
        "named": ["pos"]
    },
    "DIL": {
        "positional": ["entry_id"],
        "named": ["head"]
    },
    "TYI": {
        "positional": ["page"]
    },
    "Ros Goill": {
        "named": ["page", "section"]
    },
    "Ros Muc": {
        "positional": ["page"]
    },
    "SnaG": {
        "named": ["page", "pages", "chapter", "section", "text", "t"]
    },
    "IPD": {
        "positional": ["term"]
    },
    "NEID": {
        "positional": ["term"]
    },
    "Foras": {
        "positional": ["term"]
    },
    "GCD": {
        "positional": ["section"],
        "named": ["page"]
    },
    "FFG": {
        "positional": ["page"]
    },
    "MacBain": {
        "positional": ["page", "page_end"],
        "named": ["head"]
    },
    "Gaelic Personal Names": {
        "positional": ["page"]
    },
    "Greene 1976": {
        "positional": ["term"]
    },
    "EID": {
        "positional": ["term"]
    },
    "EDPC": {
        "named": ["page", "pages", "head"]
    },
    "KPV": {
        "named": ["page", "pages", "head"]
    },
    "Breatnach": {
        "positional": ["page"],
        "named": ["page", "pages", "section", "line"]
    },
    "Hughes": {
        "positional": ["page"],
        "named": ["chapter", "passage"]
    },
}


In [104]:
!pip install pywikibot

Collecting pywikibot
  Downloading pywikibot-10.0.0-py3-none-any.whl (718 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.5/718.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting mwparserfromhell>=0.5.2
  Downloading mwparserfromhell-0.6.6.tar.gz (138 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.9/138.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: mwparserfromhell
  Building wheel for mwparserfromhell (setup.py) ... [?25ldone
[?25h  Created wheel for mwparserfromhell: filename=mwparserfromhell-0.6.6-cp310-cp310-macosx_10_9_x86_64.whl size=96901 sha256=ff1d2aee3f83a229094ba2a6d7e32ab8cee4f11248d8320b1fba1eeb13e38f46
  Stored in directory: /Users/joregan/Library/Caches/pip/wheels/97/c4/2e/c3f4c2948811c40e6a165f16f5cf60e8b06b03e9bf47b33884
Successfully built mwparserfromhell
Installing collected packages: mwparserfromhel

In [106]:
import pywikibot
from pywikibot.page import ItemPage
import json
import re

def parse_wikitext(text):
    # Using pywikibot to parse the wikitext
    site = pywikibot.Site()
    parsed_data = pywikibot.textlib.extract_templates_and_params(text)

    result = []

    for template, params in parsed_data:
        template_data = {"name": template, "args": []}
        for param_name, param_value in params.items():
            # Handle numeric parameters (qualN, refN)
            num_match = re.match(r"(qual|ref)(\d+)", param_name)
            if num_match:
                key_type, index = num_match.groups()
                template_data["args"].append({"type": key_type, "index": int(index), "value": param_value.strip()})
            else:
                template_data["args"].append({param_name: param_value.strip()})

        # Handle 'q' templates
        if template == "q":
            content = ' '.join(param_value.strip() for param_value in params.values())
            result.append({"qualifier": content.strip()})
        else:
            result.append(template_data)

    return result

# Example Usage and Test Cases
examples = [
    "{{IPA|ga|/n̪ˠõːsˠ/|ref={{R:ga:Quiggin|17}}}}",
    "{{a|Munster|Aran}} {{IPA|ga|/kɑt̪ˠ/}}",
    "{{IPA|ga|/bʲɪɟ/}}&lt;ref&gt;{{R:ga:Quiggin|43}}&lt;/ref&gt;&lt;ref&gt;{{R:ga:SjPh|30}}&lt;/ref&gt;",
    "{{a|Ulster}} {{IPA|ga|/kɾˠʌpˠ/}}&lt;ref&gt;{{R:ga:Quiggin|26}}&lt;/ref&gt; {{q|This is a qualifier with nested template {{m|ga|example}}}}",
    "{{q|Another qualifier without special interpretation}}",
    "{{IPA|ga|/ɛɾʲ/|/əɾʲ/|qual1=stressed|qual2=unstressed}}",
    "{{qualifier|before ''a'', ''o'', ''u'', ''fha'', ''fho'', ''fhu''}} {{IPA|ga|[xan̪ˠ]}}",
    "{{a|Connemara|Mayo|Ulster}} {{IPA|ga|/bˠɾˠat̪ˠ/}}",
    "{{a|Ulster}} {{IPA|ga|/ˈãːlˠəʃ/|/ˈãːl̪ˠəʃ/}}&lt;ref&gt;{{R:ga:Quiggin|65}}&lt;/ref&gt;",
    "{{a|Galway}} {{IPA|ga|/lʲoːbˠ/}} {{q|as if spelled {{m|ga|leob}}}}&lt;ref&gt;{{R:ga:Finck|I|196}}&lt;/ref&gt;&lt;ref&gt;{{R:ga:GCFD|308}}&lt;/ref&gt;"
]

for example in examples:
    parsed_output = parse_wikitext(example)
    print("Input:", example)
    print("Parsed Output:", json.dumps(parsed_output, indent=2))
    print("-" * 80)


Input: {{IPA|ga|/n̪ˠõːsˠ/|ref={{R:ga:Quiggin|17}}}}
Parsed Output: [
  {
    "name": "IPA",
    "args": [
      {
        "1": "ga"
      },
      {
        "2": "/n\u032a\u02e0\u00f5\u02d0s\u02e0/"
      },
      {
        "ref": "{{R:ga:Quiggin|17}}"
      }
    ]
  },
  {
    "name": "R:ga:Quiggin",
    "args": [
      {
        "1": "17"
      }
    ]
  }
]
--------------------------------------------------------------------------------
Input: {{a|Munster|Aran}} {{IPA|ga|/kɑt̪ˠ/}}
Parsed Output: [
  {
    "name": "a",
    "args": [
      {
        "1": "Munster"
      },
      {
        "2": "Aran"
      }
    ]
  },
  {
    "name": "IPA",
    "args": [
      {
        "1": "ga"
      },
      {
        "2": "/k\u0251t\u032a\u02e0/"
      }
    ]
  }
]
--------------------------------------------------------------------------------
Input: {{IPA|ga|/bʲɪɟ/}}&lt;ref&gt;{{R:ga:Quiggin|43}}&lt;/ref&gt;&lt;ref&gt;{{R:ga:SjPh|30}}&lt;/ref&gt;
Parsed Output: [
  {
    "name": "IPA",
    "a