In [28]:
from nltk.sem.logic import *

In [307]:
import random
import string
import copy

In [101]:
def replace_at_index(s, index, replacement):
    return s[:index] + replacement + s[index + 1:]

In [495]:
class Grammar():
    def __init__(self):
        self.rules = []

    def from_string(self, string: str):
        self.rules = []
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                rule_split = rule_str.split(" ")
                lhs_cat = rule_split[0].split("/")[0]
                lhs_sem = rule_split[0].split("/")[1]
                rhs = rule_split[2]
                rule = {"lhs":{"cat":lhs_cat, "sem":lhs_sem}, "rhs":rhs}
                self.rules.append(rule)
    
    def add_rule(self, string: str):
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                rule_split = rule_str.split(" ")
                lhs_cat = rule_split[0].split("/")[0]
                lhs_sem = rule_split[0].split("/")[1]
                rhs = rule_split[2]
                rule = {"lhs":{"cat":lhs_cat, "sem":lhs_sem}, "rhs":rhs}
                self.rules.append(rule)
    
    def to_string(self):
        rules_str = ""
        for rule in self.rules:
            rules_str += f"{rule['lhs']['cat']}/{rule['lhs']['sem']} -> {rule['rhs']}\n"
        print(rules_str)

    def check_chunk01able(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return False

        diff_count = 0
        for char1, char2 in zip(str1, str2):
            if char1 != char2:
                diff_count += 1
                if not (char1.islower() and char2.islower()):
                    return False

        return diff_count == 1
    
    def find_diff_position_for_chunk01(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None

        diff_count = 0
        diff_index = None
        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if not (char1.islower() and char2.islower()):
                    return None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem
        else:
            return None
    
    def check_chunk02able(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return False

        diff_count = 0
        for char1, char2 in zip(str1, str2):
            if char1 != char2:
                diff_count += 1
                if not ((char1.islower() and char2.isupper()) or (char1.isupper() and char2.islower())):
                    return False

        return diff_count == 1
    
    def find_diff_position_for_chunk02(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None, None

        diff_count = 0
        diff_index = None
        upper_in_str = None

        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if (char1.islower() and char2.isupper()):
                    lower_in_str = 0
                    upper_in_str = 1
                elif (char1.isupper() and char2.islower()):
                    lower_in_str = 1
                    upper_in_str = 0
                else:
                    return None, None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem, upper_in_str, lower_in_str
        else:
            return None, None

    def chunk01(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    if self.check_chunk01able(first_sentence, second_sentence):
                        print(f"Apply chunk01 for {first_sem}:{first_sentence} and {second_sem}:{second_sentence}")
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]

                        diff_index, diff_index_sem = self.find_diff_position_for_chunk01(first_sentence, second_sentence)

                        chuncked_rules.append(rules[i])
                        chuncked_rules.append(rules[j])

                        # TODO: variable selection
                        if diff_index_sem == 0:
                            var = Expression.fromstring("X")
                        else:
                            var = Expression.fromstring("x")
                        first_sem_elements_abstracted = copy.deepcopy(first_sem_elements)
                        first_sem_elements_abstracted[diff_index_sem] = var
                        new_sem_str = f"{str(first_sem_elements_abstracted[0])}({str(first_sem_elements_abstracted[1])},{str(first_sem_elements_abstracted[2])})"

                        random_category = random.choice(string.ascii_uppercase)
                        new_sen_str = replace_at_index(first_sentence, diff_index, random_category)
                        new_rule_0 = {"lhs": {"cat":"S", "sem": new_sem_str}, "rhs": new_sen_str}

                        new_rule_1 = {"lhs": {"cat":random_category, "sem": str(first_sem_elements[diff_index_sem])}, "rhs": first_sentence[diff_index]}
                        new_rule_2 = {"lhs": {"cat":random_category, "sem": str(second_sem_elements[diff_index_sem])}, "rhs": second_sentence[diff_index]}
                        new_rules += [new_rule_0] + [new_rule_1] + [new_rule_2]
        rules = [rule for rule in rules if rule not in chuncked_rules]
        self.rules = rules + new_rules

    def chunk02(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    if self.check_chunk02able(first_sentence, second_sentence):
                        print(f"Apply chunk02 for {first_sem}:{first_sentence} and {second_sem}:{second_sentence}")
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]

                        diff_index, diff_index_sem, upper_in_str, lower_in_str = self.find_diff_position_for_chunk02(first_sentence, second_sentence)

                        target_position = [i,j][lower_in_str]
                        nontarget_position = [i,j][upper_in_str]
                        chuncked_rules.append(rules[target_position])

                        target_sem = Expression.fromstring(rules[target_position]["lhs"]["sem"])
                        target_sem_elements = [target_sem.pred, target_sem.args[0], target_sem.args[1]]
                        target_sentence = rules[target_position]["rhs"]
                        nontarget_sentence = rules[nontarget_position]["rhs"]
                        new_rule = {"lhs": {"cat":nontarget_sentence[diff_index], "sem": str(target_sem_elements[diff_index_sem])}, "rhs": target_sentence[diff_index]}
                        new_rules.append(new_rule)
        rules = [rule for rule in rules if rule not in chuncked_rules]
        self.rules = rules + new_rules

In [508]:
grammar = Grammar()
grammar.from_string("""S/_l(_j,_m) -> jlm
S/_l(_j,_s) -> jls
S/_h(_j,_s) -> jhs
S/_r(_a,_t) -> art""")

In [509]:
grammar.rules

[{'lhs': {'cat': 'S', 'sem': '_l(_j,_m)'}, 'rhs': 'jlm'},
 {'lhs': {'cat': 'S', 'sem': '_l(_j,_s)'}, 'rhs': 'jls'},
 {'lhs': {'cat': 'S', 'sem': '_h(_j,_s)'}, 'rhs': 'jhs'},
 {'lhs': {'cat': 'S', 'sem': '_r(_a,_t)'}, 'rhs': 'art'}]

In [510]:
grammar.chunk01()

Apply chunk01 for _l(_j,_m):jlm and _l(_j,_s):jls
Apply chunk01 for _l(_j,_s):jls and _h(_j,_s):jhs


In [511]:
grammar.rules

[{'lhs': {'cat': 'S', 'sem': '_r(_a,_t)'}, 'rhs': 'art'},
 {'lhs': {'cat': 'S', 'sem': '_l(_j,x)'}, 'rhs': 'jlO'},
 {'lhs': {'cat': 'O', 'sem': '_m'}, 'rhs': 'm'},
 {'lhs': {'cat': 'O', 'sem': '_s'}, 'rhs': 's'},
 {'lhs': {'cat': 'S', 'sem': 'X(_j,_s)'}, 'rhs': 'jNs'},
 {'lhs': {'cat': 'N', 'sem': '_l'}, 'rhs': 'l'},
 {'lhs': {'cat': 'N', 'sem': '_h'}, 'rhs': 'h'}]

In [512]:
grammar.to_string()

S/_r(_a,_t) -> art
S/_l(_j,x) -> jlO
O/_m -> m
O/_s -> s
S/X(_j,_s) -> jNs
N/_l -> l
N/_h -> h



In [513]:
grammar = Grammar()
grammar.from_string("""S/_l(_j,_m) -> jlm
S/_l(x,_s) -> Nls
S/_l(_j,_s) -> jls
S/_h(_j,_s) -> jhs
S/_r(_a,_t) -> art""")

In [514]:
grammar.chunk02()

Apply chunk02 for _l(x,_s):Nls and _l(_j,_s):jls


In [515]:
grammar.rules

[{'lhs': {'cat': 'S', 'sem': '_l(_j,_m)'}, 'rhs': 'jlm'},
 {'lhs': {'cat': 'S', 'sem': '_l(x,_s)'}, 'rhs': 'Nls'},
 {'lhs': {'cat': 'S', 'sem': '_h(_j,_s)'}, 'rhs': 'jhs'},
 {'lhs': {'cat': 'S', 'sem': '_r(_a,_t)'}, 'rhs': 'art'},
 {'lhs': {'cat': 'N', 'sem': '_j'}, 'rhs': 'j'}]

In [516]:
grammar.add_rule("S/X(_j,_s) -> jKs")

In [517]:
grammar.rules

[{'lhs': {'cat': 'S', 'sem': '_l(_j,_m)'}, 'rhs': 'jlm'},
 {'lhs': {'cat': 'S', 'sem': '_l(x,_s)'}, 'rhs': 'Nls'},
 {'lhs': {'cat': 'S', 'sem': '_h(_j,_s)'}, 'rhs': 'jhs'},
 {'lhs': {'cat': 'S', 'sem': '_r(_a,_t)'}, 'rhs': 'art'},
 {'lhs': {'cat': 'N', 'sem': '_j'}, 'rhs': 'j'},
 {'lhs': {'cat': 'S', 'sem': 'X(_j,_s)'}, 'rhs': 'jKs'}]

In [518]:
grammar.chunk02()

Apply chunk02 for _h(_j,_s):jhs and X(_j,_s):jKs


In [519]:
grammar.rules

[{'lhs': {'cat': 'S', 'sem': '_l(_j,_m)'}, 'rhs': 'jlm'},
 {'lhs': {'cat': 'S', 'sem': '_l(x,_s)'}, 'rhs': 'Nls'},
 {'lhs': {'cat': 'S', 'sem': '_r(_a,_t)'}, 'rhs': 'art'},
 {'lhs': {'cat': 'N', 'sem': '_j'}, 'rhs': 'j'},
 {'lhs': {'cat': 'S', 'sem': 'X(_j,_s)'}, 'rhs': 'jKs'},
 {'lhs': {'cat': 'K', 'sem': '_h'}, 'rhs': 'h'}]

In [520]:
grammar.to_string()

S/_l(_j,_m) -> jlm
S/_l(x,_s) -> Nls
S/_r(_a,_t) -> art
N/_j -> j
S/X(_j,_s) -> jKs
K/_h -> h

