- Write a regex pattern that
  - looks at full rules
  - identifies the content between the `<suggestion>...</suggestion>` tags
  - finds any `\{number}` and replaces with `<match no="{number}"/>`


# single rule


In [1]:
rule_xml = R"""<rule id="BRIEFCATCH_186490410459524787354643479142450466081" name="BRIEFCATCH_PUNCHINESS_1017">
    <antipattern>
        <token inflected="yes">be<exception>art</exception>
            <!--<exception>'s</exception>-->
            <exception>been</exception>
            <exception>is</exception>
            <exception>are</exception>
        </token>
        <token min="0" regexp="yes">almost|also|barely|consistently|even|generally|impermissibly|improperly|inconsistently|increasingly|justifiably|largely|mainly|mostly|nearly|never|occasionally|often|partially|partly|perhaps|permissibly|possibly|practically|primarily|probably|rarely|seldom|sometimes|somewhat|sporadically|still|therefore|thus|typically|understandably|unreliably|usually</token>
        <token>involved</token>
        <token>in</token>
        <token postag="DT"/>
        <token regexp="yes">activites|case|causing|construction|creation|dangerous|decision|design|development|dispute|illegal|implementation|investigation|lawsuit|litigation|murder|ongoing|planning|polluting|process|production|program|research|unlawful|various</token>
    </antipattern>
    <antipattern>
        <token regexp="yes">company|cia|country|government|plaintiff|participants|employee|population|defendant|plaintiffs|brain|students|parties|police|world|parents|people|children</token>
        <token inflected="yes">be<exception>art</exception>
            <!--<exception>'s</exception>-->
            <exception>been</exception>
            <exception>is</exception>
            <exception>are</exception>
        </token>
        <token min="0" regexp="yes">almost|also|barely|consistently|even|generally|impermissibly|improperly|inconsistently|increasingly|justifiably|largely|mainly|mostly|nearly|never|occasionally|often|partially|partly|perhaps|permissibly|possibly|practically|primarily|probably|rarely|seldom|sometimes|somewhat|sporadically|still|therefore|thus|typically|understandably|unreliably|usually</token>
        <token min="0" regexp="yes">absolutely|actually|certainly|clearly|completely|considerably|decidedly|definitely|drastically|dramatically|entirely|extremely|flatly|fully|fundamentally|greatly|highly|obviously|perfectly|plainly|quite|really|strongly|surely|totally|truly|utterly|very|wholly|widely|justly</token>
        <token>involved</token>
        <token>in</token>
    </antipattern>
    <pattern>
        <token inflected="yes">be<exception>art</exception>
            <!--<exception>'s</exception>-->
            <exception>been</exception>
            <exception>is</exception>
            <exception>are</exception>
        </token>
        <token min="0" regexp="yes">almost|also|barely|consistently|even|generally|impermissibly|improperly|inconsistently|increasingly|justifiably|largely|mainly|mostly|nearly|never|occasionally|often|partially|partly|perhaps|permissibly|possibly|practically|primarily|probably|rarely|seldom|sometimes|somewhat|sporadically|still|therefore|thus|typically|understandably|unreliably|usually</token>
        <token>involved</token>
        <token>in</token>
    </pattern>
    <message>Would a stronger verb help engage the reader?|**Example** from Justice Kagan: "In 1988 Judulang **took part in** a fight in which another person shot and killed someone."|**Example** from Justice Alito: "[T]hose documents **are part of** the apparatus used to enforce federal and state income tax laws."|**Example** from Loretta Lynch: "[G]ender inequalities diminish women's ability to **participate in** the workforce."|**Example** from Steve Susman: "Universities should . . . evaluate applicants based on all the information available in the file including . . . an essay describing the ways in which the applicant will **contribute to** the life and diversity of the [school]."|**Example** from United Parcel Service's privacy notice: "[I]f you choose to withdraw your consent you may not be able to **participate in** or benefit from our programs[.]"</message>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">participate</match> in</suggestion>
    <suggestion>\1 \2 part of</suggestion>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">engage</match> in</suggestion>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">take</match> part in</suggestion>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">contribute</match> to</suggestion>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">join</match></suggestion>
    <suggestion>\2 <match no="1" postag="(V.*)" postag_regexp="yes" postag_replace="$1">affect</match></suggestion>
    <short>{"ruleGroup":null,"ruleGroupIdx":0,"isConsistency":false,"isStyle":true,"correctionCount":7,"priority":"4.670","WORD":true,"OUTLOOK":true}</short>
    <example correction="participated in|was part of|engaged in|took part in|contributed to|joined|affected">She <marker>was involved in</marker> coaching.</example>
    <example>The CIA was certainly involved in shenanigans.</example>
    <example>He was almost involved in the case against Donald J. Trump.</example>
</rule>"""

In [2]:
import re


def resolve_alias_in_suggestion(rule_xml):
    suggestion_tag_pattern = r"<suggestion>.*?</suggestion>"
    suggest_tags = re.findall(suggestion_tag_pattern, rule_xml)
    for old_suggest in suggest_tags:
        pattern = r"\\\b([1-9][0-9]?|100)\b"
        # print(1, old_suggest)
        new_suggest = re.sub(
            pattern, lambda x: f"""<match no="{x[0][1:]}"/>""", old_suggest
        )
        rule_xml = rule_xml.replace(old_suggest, new_suggest)
        # print(2, new_suggest)
    return rule_xml


resolve_alias_in_suggestion(rule_xml)

'<rule id="BRIEFCATCH_186490410459524787354643479142450466081" name="BRIEFCATCH_PUNCHINESS_1017">\n    <antipattern>\n        <token inflected="yes">be<exception>art</exception>\n            <!--<exception>\'s</exception>-->\n            <exception>been</exception>\n            <exception>is</exception>\n            <exception>are</exception>\n        </token>\n        <token min="0" regexp="yes">almost|also|barely|consistently|even|generally|impermissibly|improperly|inconsistently|increasingly|justifiably|largely|mainly|mostly|nearly|never|occasionally|often|partially|partly|perhaps|permissibly|possibly|practically|primarily|probably|rarely|seldom|sometimes|somewhat|sporadically|still|therefore|thus|typically|understandably|unreliably|usually</token>\n        <token>involved</token>\n        <token>in</token>\n        <token postag="DT"/>\n        <token regexp="yes">activites|case|causing|construction|creation|dangerous|decision|design|development|dispute|illegal|implementation|inves

# do against bulk


In [3]:
import os

rules_dir = "/Users/consult/wd/width/briefcatch-example-rules/"
with open(os.path.join(rules_dir, "grammar copy.xml"), "r") as f:
    grammar_xml = f.read()

new_grammar_xml = resolve_alias_in_suggestion(grammar_xml)
fpath = "grammar.xml"
with open(os.path.join(rules_dir, "grammar.xml"), "w") as f:
    f.write(new_grammar_xml)