- 記号の使い分け
  - 統語範疇: `英大文字`
  - 動詞（形式）: `ギリシャ小文字`
  - 名詞（形式）: `英小文字`
  - 動詞（意味表示内の定数）: `_ギリシャ小文字`
  - 名詞（意味表示内の定数）: `_英小文字`
  - 動詞（意味表示内の変数）: `X`
  - 名詞（意味表示内の変数）: `x`

In [1]:
import random
import string
import ast
import time

In [2]:
from tqdm import tqdm

In [3]:
# 動詞にはギリシャ文字を使用する
def generate_noun(n_sorts: str, stopword: str = ""):
    assert n_sorts <= len(string.ascii_lowercase)
    chars_wo_stopword = [char for char in string.ascii_lowercase[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)
def generate_verb(n_sorts: str, stopword: str = ""):
    greek_lowercase = [chr(i) for i in range(945, 970)]
    chars_wo_stopword = [char for char in greek_lowercase[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)

In [4]:
NOUNS = [
    "john", "alice", "bob", "carol", "eve", "frank", "grace", "helen", 
    "ivan", "jack", "karen", "larry", "mike", "nina", "oscar", "paul", 
    "quincy", "rachel", "steve", "tracy", "ursula", "victor", "wendy", 
    "xander", "yasmine", "zach"
]

VERBS = [
    "know", "admire", "like", "kick", "meet", "call", "defend", "encourage"
    , "follow", "greet", "help", "invite", "judge", "notice", "obey", 
    "praise", "question", "respect", "support", "trust", "understand", 
    "value", "warn", "x-ray", "yell", "zap"
]

In [5]:
# 動詞にはギリシャ文字を使用する
def generate_noun_sem(n_sorts: str, stopword: str = ""):
    assert n_sorts <= len(NOUNS)
    chars_wo_stopword = [char for char in NOUNS[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)
def generate_verb_sem(n_sorts: str, stopword: str = ""):
    chars_wo_stopword = [char for char in VERBS[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)

In [6]:
# 全体論的なルールの生成
def generate_holistic_rules(n):
    result = ""
    for i in range(n):
        subj = generate_noun(5, ["x", "y"])
        obj = generate_noun(5, [subj, "x", "y"])
        verb = generate_verb(5)
        sentence = f"{subj}{verb}{obj}"
        subjsem = generate_noun_sem(5)
        objsem = generate_noun_sem(5, [subjsem])
        verbsem = generate_verb_sem(5)
        semrepr = f"_{verbsem}(_{subjsem},_{objsem})"
        result += f"S/{semrepr} -> {sentence}\n"
    return result[:-1]

In [7]:
# TODO: remove duplicates
def generate_queries(n):
    result = []
    for i in range(n):
        subjsem = generate_noun_sem(5)
        objsem = generate_noun_sem(5, [subjsem])
        verbsem = generate_verb_sem(5)
        semrepr = f"_{verbsem}(_{subjsem},_{objsem})"
        result.append(semrepr)
    return result

In [8]:
print(generate_holistic_rules(10))

S/_meet(_bob,_carol) -> cαa
S/_like(_eve,_carol) -> bγa
S/_kick(_carol,_john) -> cαb
S/_like(_eve,_bob) -> cεb
S/_meet(_eve,_bob) -> dβc
S/_meet(_eve,_carol) -> aδb
S/_kick(_carol,_eve) -> eγd
S/_kick(_eve,_bob) -> aβe
S/_like(_alice,_eve) -> aεc
S/_meet(_carol,_alice) -> aβc


In [9]:
from nltk.sem.logic import *

import random
import string
import copy

def replace_at_index(s, index, replacement):
    return s[:index] + replacement + s[index + 1:]

In [10]:
N_SORTS = 5
NONTERMINALS = [char for char in string.ascii_uppercase if char != "S"]
INDIVIDUAL_VARIABLES = ["x", "y"]
GREEK_LOWER = [chr(i) for i in range(945, 970)]
ENGLISH_LOWER = [char for char in string.ascii_lowercase[:N_SORTS] if char not in ["x", "y"]]

In [34]:
class Grammar():
    def __init__(self):
        self.rules = []

    def from_string(self, string: str):
        self.rules = []
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                rule_split = rule_str.split(" ")
                lhs_cat = rule_split[0].split("/")[0]
                lhs_sem = rule_split[0].split("/")[1]
                rhs = rule_split[2]
                rule = {"lhs":{"cat":lhs_cat, "sem":lhs_sem}, "rhs":rhs}
                self.rules.append(rule)
    
    def add_rule(self, string: str):
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                rule_split = rule_str.split(" ")
                lhs_cat = rule_split[0].split("/")[0]
                lhs_sem = rule_split[0].split("/")[1]
                rhs = rule_split[2]
                rule = {"lhs":{"cat":lhs_cat, "sem":lhs_sem}, "rhs":rhs}
                self.rules.append(rule)
    
    def to_string(self):
        rules_list = []
        rules_str = ""
        for rule in self.rules:
            rules_list.append(f"{rule['lhs']['cat']}/{rule['lhs']['sem']} -> {rule['rhs']}\n")
        sorted_list = sorted(rules_list, key=lambda x: -len(x))
        rules_str += ''.join(sorted_list)+"\n"
        return rules_str
    
    def str2dict(self, string):
        return ast.literal_eval(string)
    
    def sem_list(self):
        return [d['lhs']['sem'] for d in self.rules]
    
    def cat_list(self):
        return [d['lhs']['cat'] for d in self.rules]
    
    def sentence_list(self):
        return [d['rhs'] for d in self.rules]

    def can_chunk01(self, rule1, rule2):
        str1, str2 = rule1["rhs"], rule2["rhs"]
        sem1_logic = Expression.fromstring(rule1["lhs"]["sem"])
        sem2_logic = Expression.fromstring(rule2["lhs"]["sem"])
        sem1, sem2 = [sem1_logic.args[0], sem1_logic.pred, sem1_logic.args[1]], [sem2_logic.args[0], sem2_logic.pred, sem2_logic.args[1]]

        if len(str1) != 3 or len(str2) != 3:
            return False

        # print(str1, str2)
        diff_count_str = 0
        diff_positions_str = []
        for i, (char1, char2) in enumerate(zip(str1, str2)):
            if (char1 != char2):
                diff_count_str += 1
                diff_positions_str.append(i)
                if not (char1.islower() and char2.islower()):
                    return None, False
        diff_count_sem = 0
        diff_positions_sem = []
        for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
            if (elm1 != elm2):
                diff_count_sem += 1
                diff_positions_sem.append(i)
        if diff_count_str == diff_count_sem == 1:
            if diff_positions_str[0] == diff_positions_sem[0]:
                return diff_positions_str[0], True
        return None, False
    
    def find_diff_position_for_chunk01(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None

        diff_count = 0
        diff_index = None
        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if not (char1.islower() and char2.islower()):
                    return None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem
        else:
            return None
    
    def can_chunk02(self, rule1, rule2):
        str1, str2 = rule1["rhs"], rule2["rhs"]
        sem1_logic = Expression.fromstring(rule1["lhs"]["sem"])
        sem2_logic = Expression.fromstring(rule2["lhs"]["sem"])
        sem1, sem2 = [sem1_logic.args[0], sem1_logic.pred, sem1_logic.args[1]], [sem2_logic.args[0], sem2_logic.pred, sem2_logic.args[1]]

        if len(str1) != 3 or len(str2) != 3:
            return None, False, None, None

        # print(str1, str2)
        diff_count_str = 0
        diff_positions_str = []
        for i, (char1, char2) in enumerate(zip(str1, str2)):
            if (char1 != char2):
                if (char1.islower() and char2.isupper()):
                    diff_count_str += 1
                    diff_positions_str.append(i)
                    lower_in_str = 0
                    upper_in_str = 1
                elif (char1.isupper() and char2.islower()):
                    diff_count_str += 1
                    diff_positions_str.append(i)
                    lower_in_str = 1
                    upper_in_str = 0
                else:
                    None, False, None, None
        diff_count_sem = 0
        diff_positions_sem = []
        for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
            if (elm1 != elm2):
                diff_count_sem += 1
                diff_positions_sem.append(i)
        if diff_count_str == diff_count_sem == 1:
            if diff_positions_str[0] == diff_positions_sem[0]:
                return diff_positions_str[0], True, upper_in_str, lower_in_str
        return None, False, None, None
    
    def find_diff_position_for_chunk02(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None, None

        diff_count = 0
        diff_index = None
        upper_in_str = None

        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if (char1.islower() and char2.isupper()):
                    lower_in_str = 0
                    upper_in_str = 1
                elif (char1.isupper() and char2.islower()):
                    lower_in_str = 1
                    upper_in_str = 0
                else:
                    return None, None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem, upper_in_str, lower_in_str
        else:
            return None, None

    def highlight_for_replace(self, str1, str2):
        if (len(str1) == 3) & (len(str2) == 1):
            if str1.count(str2) == 1:
                non_word_level, word_level = 0, 1
                return non_word_level, word_level
            else:
                return None, None
        elif (len(str1) == 1) & (len(str2) == 3):
            if str2.count(str1) == 1:
                non_word_level, word_level = 1, 0
                return non_word_level, word_level
            else:
                return None, None
        else:
            return None, None

    def find_diff_position_for_replace(self, str1, str2):
        if len(str1) != 3 or len(str2) != 1:
            return None

        for i, char in enumerate(str1):
            if str2 == char:
                diff_index = i
            else:
                continue

        if diff_index == 0:
            diff_index_sem = 1
        if diff_index == 1:
            diff_index_sem = 0
        if diff_index == 2:
            diff_index_sem = 2
        return diff_index, diff_index_sem
    
    def existing_variables(self, elements: list):
        result = [str(element) for element in elements if not isinstance(element, ConstantExpression)]
        return result
    # TODO: sentence-meaningの位置は必ずしも対応しなくて良い
    def chunk01(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    diff_index, can_chunk = self.can_chunk01(rules[i], rules[j])
                    if can_chunk:
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]

                        if diff_index == 0:
                            diff_index_sem = 1
                        if diff_index == 1:
                            diff_index_sem = 0
                        if diff_index == 2:
                            diff_index_sem = 2

                        chuncked_rules.append(rules[i])
                        chuncked_rules.append(rules[j])

                        if diff_index_sem == 0:
                            var = Expression.fromstring("X")
                        else:
                            # FIX: efficiency
                            existing_variables = self.existing_variables(first_sem_elements)
                            var = [var for var in INDIVIDUAL_VARIABLES if var not in existing_variables][0]
                        first_sem_elements_abstracted = copy.deepcopy(first_sem_elements)
                        first_sem_elements_abstracted[diff_index_sem] = var
                        new_sem_str = f"{str(first_sem_elements_abstracted[0])}({str(first_sem_elements_abstracted[1])},{str(first_sem_elements_abstracted[2])})"
                        random_category = random.choice(NONTERMINALS)
                        new_sen_str = replace_at_index(first_sentence, diff_index, random_category)
                        new_rule_0 = {"lhs": {"cat":"S", "sem": new_sem_str}, "rhs": new_sen_str}

                        new_rule_1 = {"lhs": {"cat":random_category, "sem": str(first_sem_elements[diff_index_sem])}, "rhs": first_sentence[diff_index]}
                        new_rule_2 = {"lhs": {"cat":random_category, "sem": str(second_sem_elements[diff_index_sem])}, "rhs": second_sentence[diff_index]}
                        new_rules += [new_rule_0] + [new_rule_1] + [new_rule_2]
        rules = [rule for rule in rules if rule not in chuncked_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]

    def chunk02(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    diff_index, can_chunk, upper_in_str, lower_in_str = self.can_chunk02(rules[i], rules[j])
                    if can_chunk:
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]

                        if diff_index == 0:
                            diff_index_sem = 1
                        if diff_index == 1:
                            diff_index_sem = 0
                        if diff_index == 2:
                            diff_index_sem = 2

                        target_position = [i,j][lower_in_str]
                        nontarget_position = [i,j][upper_in_str]
                        chuncked_rules.append(rules[target_position])

                        target_sem = Expression.fromstring(rules[target_position]["lhs"]["sem"])
                        target_sem_elements = [target_sem.pred, target_sem.args[0], target_sem.args[1]]
                        target_sentence = rules[target_position]["rhs"]
                        nontarget_sentence = rules[nontarget_position]["rhs"]
                        new_rule = {"lhs": {"cat":nontarget_sentence[diff_index], "sem": str(target_sem_elements[diff_index_sem])}, "rhs": target_sentence[diff_index]}
                        new_rules.append(new_rule)
        rules = [rule for rule in rules if rule not in chuncked_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]
        
    def abstract(self, element, var, idx, diff_idx):
        if idx == diff_idx:
            return var
        else:
            return element

    def replace(self):
        rules = self.rules

        replaced_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                non_word_level, word_level = self.highlight_for_replace(first_sentence, second_sentence)
                if (non_word_level is not None) and (word_level is not None):
                    target_position = [i,j][non_word_level]
                    nontarget_position  = [i,j][word_level]

                    replaced_rules.append(rules[target_position])

                    target_sem = Expression.fromstring(rules[target_position]["lhs"]["sem"])
                    nontarget_sem = Expression.fromstring(rules[nontarget_position]["lhs"]["sem"])

                    target_sentence = rules[target_position]["rhs"]
                    nontarget_sentence = rules[nontarget_position]["rhs"]

                    target_sem_elements = [target_sem.pred, target_sem.args[0], target_sem.args[1]]
                    nontarget_cat = rules[nontarget_position]["lhs"]["cat"]

                    diff_index, diff_index_sem = self.find_diff_position_for_replace(target_sentence, nontarget_sentence)

                    if diff_index_sem == 0:
                        var = Expression.fromstring("X")
                    else:
                        # FIX: efficiency
                        existing_variables = self.existing_variables(target_sem_elements)
                        var = [var for var in INDIVIDUAL_VARIABLES if var not in existing_variables][0]
                    target_sem_elements_abstract = [self.abstract(element, var, i, diff_index_sem) for i, element in enumerate(target_sem_elements)]

                    new_sem_str = f"{str(target_sem_elements_abstract[0])}({str(target_sem_elements_abstract[1])},{str(target_sem_elements_abstract[2])})"
                    new_sen_str = replace_at_index(target_sentence, diff_index, nontarget_cat)
                    new_rule = {"lhs": {"cat":nontarget_cat, "sem": new_sem_str}, "rhs": new_sen_str}
                    new_rules.append(new_rule)
        rules = [rule for rule in rules if rule not in replaced_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]
    
    def repaint_rule(self, rule, replacee, replacer):
        # print(rule, replacee, replacer)
        if rule["lhs"]["cat"] == replacee["lhs"]["cat"]:
            assert len(rule["rhs"]) == 1
            # print(rule["lhs"]["cat"], "->",replacer["lhs"]["cat"])
            rule["lhs"]["cat"] = replacer["lhs"]["cat"]
        else:
            rhs = copy.deepcopy(rule["rhs"])
            new_rhs = rhs.replace(replacee["lhs"]["cat"], replacer["lhs"]["cat"])
            rule["rhs"] = new_rhs
            # print(rhs, "->", new_rhs)
        return rule

    def can_merge(self, rule1, rule2):
        rule1_cat = rule1["lhs"]["cat"]
        rule1_sem = rule1["lhs"]["sem"]
        rule1_rhs = rule1["rhs"]
        rule2_cat = rule2["lhs"]["cat"]
        rule2_sem = rule2["lhs"]["sem"]
        rule2_rhs = rule2["rhs"]
        if (len(rule1_rhs) == len(rule2_rhs) == 1) & (rule1_sem == rule2_sem) & (rule1_rhs == rule2_rhs) & (rule1_cat != rule2_cat):
            return True
        else:
            return False

    def merge(self):
        rules = self.rules
        invited = []
        replacer_cats = []
        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                if self.can_merge(rules[i], rules[j]):
                    # print(f"can merge {rules[i]} and {rules[j]}")
                    if (rules[i]["lhs"]["cat"] not in replacer_cats) and (rules[j]["lhs"]["cat"] not in replacer_cats):
                        indices = [i, j]
                        # print(indices)
                        random.shuffle(indices)
                        replacer_rule = copy.deepcopy(rules[indices[0]])
                        replacee_rule = copy.deepcopy(rules[indices[1]])
                        replacer_cats.append(replacer_rule["lhs"]["cat"])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                    elif (rules[i]["lhs"]["cat"] in replacer_cats) and (rules[j]["lhs"]["cat"] in replacer_cats):
                        continue
                    elif rules[i]["lhs"]["cat"] in replacer_cats:
                        replacer_rule = copy.deepcopy(rules[i])
                        replacee_rule = copy.deepcopy(rules[j])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                    else:
                        replacer_rule = copy.deepcopy(rules[j])
                        replacee_rule = copy.deepcopy(rules[i])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                else:
                    continue
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]

    def invent_wordrule(self, sem):
        random_category = random.choice(NONTERMINALS)
        if str(sem).replace("_","") in NOUNS:
            rhs = generate_noun(5, ["x", "y"])
        if str(sem).replace("_","") in VERBS:
            rhs = generate_verb(5)
        self.add_rule(f"{random_category}/{str(sem)} -> {rhs}")
        return rhs

    def invent_holisticrule(self, sem):
        subj = generate_noun(5, ["x", "y"])
        obj = generate_noun(5, [subj, "x", "y"])
        verb = generate_verb(5)
        sentence = f"{subj}{verb}{obj}"
        self.add_rule(f"S/{sem} -> {sentence}")
        return sentence
    # TODO: test
    def generate(self, query_str, debug=False):
        rules = self.rules
        if debug:
            print("query: ", query_str)
        query = Expression.fromstring(query_str)
        query_elements = [query.args[0], query.pred, query.args[1]]
        sem_list = self.sem_list()
        sentence_list = self.sentence_list()
        if query_str in sem_list:
            holistic_rule = rules[sem_list.index(query_str)]
            holistic_rule_rhs = holistic_rule['rhs']
            if debug:
                print("generated by a holictic rule: ", f"S/{str(query)} -> {holistic_rule_rhs}")
            return f"S/{str(query)} -> {holistic_rule_rhs}"
        else:
            for i, sem_str in enumerate(sem_list):
                sem = Expression.fromstring(sem_str)
                if isinstance(sem, ApplicationExpression):
                    sem_elements = [sem.args[0], sem.pred, sem.args[1]]
                    matches = [(i, (query_element,sem_element)) for i, (query_element,sem_element) in enumerate(zip(query_elements, sem_elements)) if (query_element==sem_element) & (isinstance(query_element, ConstantExpression) & isinstance(sem_element, ConstantExpression))]
                    slots = [(i, (query_element,sem_element)) for i, (query_element,sem_element) in enumerate(zip(query_elements, sem_elements)) if (query_element!=sem_element) & (not isinstance(sem_element, ConstantExpression))]
                    if (len(matches)==2) & (len(slots)==1):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_category = sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                generated_sentence = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                generated_sentence = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                generated_sentence = sentence_with_slot[:2] + selected_rule['rhs']
                            return f"S/{str(query)} -> {generated_sentence}"
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if slot_position == 0:
                                generated_sentence = invented_form + sentence_with_slot[1:]
                            if slot_position == 1:
                                generated_sentence = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if slot_position == 2:
                                generated_sentence = sentence_with_slot[:2] + invented_form
                            return f"S/{str(query)} -> {generated_sentence}"
                    if (len(matches)==1) & (len(slots)==2):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_category = sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slots[0][1][0]))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + selected_rule['rhs']
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if slot_position == 0:
                                new_sentence_with_slot = invented_form + sentence_with_slot[1:]
                            if slot_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if slot_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + invented_form
                        slot_position = slots[1][0]
                        slot_category = new_sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slots[1][1][0]))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                generated_sentence = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                generated_sentence = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                generated_sentence = sentence_with_slot[:2] + selected_rule['rhs']
                            return f"S/{str(query)} -> {generated_sentence}"
                        if len(word_rules) == 0:
                            generated_sentence = self.invent_holisticrule(query)
                            return f"S/{str(query)} -> {generated_sentence}"
                    if (len(matches)==0) & (len(slots)==3):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_category = sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slots[0][1][0]))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + selected_rule['rhs']
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if slot_position == 0:
                                new_sentence_with_slot = invented_form + sentence_with_slot[1:]
                            if slot_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if slot_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + invented_form
                        slot_position = slots[1][0]
                        slot_category = new_sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slots[1][1][0]))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + selected_rule['rhs']
                        slot_position = slots[2][0]
                        slot_category = new_sentence_with_slot[slot_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slots[2][1][0]))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if slot_position == 0:
                                generated_sentence = selected_rule['rhs'] + sentence_with_slot[1:]
                            if slot_position == 1:
                                generated_sentence = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if slot_position == 2:
                                generated_sentence = sentence_with_slot[:2] + selected_rule['rhs']
                            return f"S/{str(query)} -> {generated_sentence}"
                        if len(word_rules) == 0:
                            generated_sentence = self.invent_holisticrule(query)
                            return f"S/{str(query)} -> {generated_sentence}"
                    else:
                        generated_sentence = self.invent_holisticrule(query)
                        return f"S/{str(query)} -> {generated_sentence}"

In [94]:
grammar = Grammar()
grammar.from_string("S/_admire(_alice,_carol) -> afc\nS/_admire(_john,_carol) -> bfc\nS/_know(_carol,_john) -> cgb\nR/_know -> g\nA/_john -> b")

In [95]:
len(grammar.rules)

5

In [96]:
print(grammar.to_string())

S/_admire(_alice,_carol) -> afc
S/_admire(_john,_carol) -> bfc
S/_know(_carol,_john) -> cgb
R/_know -> g
A/_john -> b




In [97]:
grammar.chunk01()

In [98]:
print(grammar.to_string())

S/_know(_carol,_john) -> cgb
S/_admire(x,_carol) -> Yfc
Y/_alice -> a
Y/_john -> b
A/_john -> b
R/_know -> g




In [101]:
grammar.merge()

In [102]:
print(grammar.to_string())

S/_know(_carol,_john) -> cgb
S/_admire(x,_carol) -> Yfc
Y/_alice -> a
Y/_john -> b
R/_know -> g




In [103]:
grammar.chunk02()

In [104]:
print(grammar.to_string())

S/_know(_carol,_john) -> cgb
S/_admire(x,_carol) -> Yfc
Y/_alice -> a
Y/_john -> b
R/_know -> g




In [105]:
grammar.replace()

In [106]:
print(grammar.to_string())

S/_admire(x,_carol) -> Yfc
Y/_know(_carol,x) -> cgY
R/X(_carol,_john) -> cRb
Y/_alice -> a
Y/_john -> b
R/_know -> g




In [107]:
grammar.replace()

In [108]:
print(grammar.to_string())

S/_admire(x,_carol) -> Yfc
R/X(_carol,x) -> cRY
Y/X(_carol,x) -> cRY
Y/_alice -> a
Y/_john -> b
R/_know -> g




In [109]:
grammar.merge()

In [110]:
print(grammar.to_string())

S/_admire(x,_carol) -> Yfc
R/X(_carol,x) -> cRY
Y/X(_carol,x) -> cRY
Y/_alice -> a
Y/_john -> b
R/_know -> g




In [123]:
grammar.generate("_know(_carol,_john)") #cga 

'S/_know(_carol,_john) -> cRb'

In [124]:
grammar.generate("_admire(_alice,_carol)") # afc

'S/_admire(_alice,_carol) -> aεb'

In [125]:
grammar.generate("_like(_alice,_carol)") # ???

'S/_like(_alice,_carol) -> eαc'

In [247]:
grammar = Grammar()
grammar.from_string(generate_holistic_rules(100))
utterances = grammar.to_string().split("\n") [:-2]

In [248]:
utterances[:10]

['S/_admire(_alice,_carol) -> bαc',
 'S/_admire(_alice,_john) -> aαe',
 'S/_admire(_john,_carol) -> aαb',
 'S/_admire(_carol,_eve) -> bεd',
 'S/_admire(_alice,_eve) -> cβb',
 'S/_know(_carol,_alice) -> bγa',
 'S/_admire(_eve,_carol) -> cγb',
 'S/_admire(_eve,_alice) -> aβe',
 'S/_admire(_bob,_carol) -> bδe',
 'S/_kick(_alice,_carol) -> dδc']

In [249]:
OPERATIONS = ["chunk01", "chunk02", "merge", "replace"]

In [477]:
grammar = Grammar()
grammar.from_string(generate_holistic_rules(100))
utterances = grammar.to_string().split("\n") [:-2]

N_GENS = 200

# N_GENS世代継承
# TODO: 各世代のlog
# TODO: 全クエリ生成させて、表現度を計算・ボトルネックをサンプリング
for i in tqdm(range(N_GENS)):
    last_generation = Grammar()
    for utterance in utterances:
        last_generation.add_rule(utterance)
        operation = random.sample(OPERATIONS, 1)[0]
        # print(operation)
        # print("Before: ", last_generation.to_string())
        if len(last_generation.rules) > 1:
            if operation == "chunk01":
                last_generation.chunk01()
            if operation == "chunk02":
                last_generation.chunk02()
            if operation == "merge":
                last_generation.merge()
            if operation == "replace":
                last_generation.chunk01()
        # print("After: ", last_generation.to_string())

    utterances = []

    if i != (N_GENS - 1):
        queries = generate_queries(100)
        for query in queries:
            utterance = last_generation.generate(query)
            utterances.append(utterance)
        utterances = random.sample(utterances, 80)

print(last_generation.to_string())

  0%|          | 0/200 [00:00<?, ?it/s]100%|██████████| 200/200 [22:45<00:00,  6.83s/it]

S/_admire(_carol,_alice) -> bβd
S/_admire(_john,_alice) -> eδd
S/_admire(_carol,_john) -> aβb
S/_kick(_alice,_carol) -> aαe
S/_know(_carol,_alice) -> eεc
S/_admire(_bob,_alice) -> aαb
S/_admire(_alice,_bob) -> eεb
S/_admire(_alice,_eve) -> eδd
S/_know(_carol,_john) -> bγe
S/_like(_john,_carol) -> bβd
S/_admire(_bob,_john) -> eδb
S/_know(_john,_carol) -> cεe
S/_kick(_john,_carol) -> bδc
S/_like(_carol,_john) -> bαa
S/_kick(_john,_alice) -> cαa
S/_like(_eve,_alice) -> cαe
S/_meet(_alice,_bob) -> dβb
S/_know(_carol,_eve) -> eαa
S/_know(_alice,_eve) -> aαd
S/_kick(_alice,_eve) -> cεd
S/_meet(_eve,_carol) -> dεe
S/_know(_bob,_carol) -> eεc
S/_kick(_eve,_alice) -> dβa
S/_like(_alice,_bob) -> dγa
S/_admire(_bob,_eve) -> cεd
S/_know(_bob,_alice) -> eδb
S/_like(_carol,_eve) -> cαb
S/_kick(_eve,_carol) -> eαc
S/_kick(_alice,_bob) -> bγc
S/_meet(_bob,_alice) -> cεb
S/_like(_carol,_bob) -> cαb
S/_know(_eve,_alice) -> eγc
S/_meet(_eve,_alice) -> dβa
S/_kick(_john,_eve) -> dαe
S/_like(_john,_bob) ->




In [475]:
last_generation.chunk01()

In [476]:
print(last_generation.to_string())

S/_admire(_john,_carol) -> aβc
S/_admire(_eve,_carol) -> bαa
S/_meet(_carol,_alice) -> dγb
S/_like(_alice,_carol) -> bβd
S/_admire(_bob,_alice) -> bαa
S/_admire(_eve,_alice) -> bβd
S/_admire(_carol,_bob) -> aγc
S/_admire(_john,_eve) -> dβe
S/_like(_alice,_john) -> aδe
S/_know(_carol,_john) -> aεd
S/_know(_john,_carol) -> cαd
S/_admire(_eve,_john) -> dγb
S/_kick(_john,_alice) -> aδc
S/_kick(_carol,_john) -> bδa
S/_kick(_alice,_john) -> bδd
S/_admire(_john,_bob) -> eβa
S/_know(_bob,_carol) -> aεb
S/_kick(_bob,_carol) -> cβd
S/_meet(_bob,_carol) -> aδe
S/_meet(_eve,_carol) -> aεb
S/_know(_carol,_eve) -> aγb
S/_know(_bob,_alice) -> cδb
S/_kick(_bob,_alice) -> aγb
S/_like(_bob,_alice) -> dδc
S/_kick(_carol,_eve) -> eεb
S/_know(_eve,_alice) -> eεb
S/_admire(_eve,_bob) -> dβb
S/_like(_eve,_alice) -> eβd
S/_kick(_carol,_bob) -> bγd
S/_meet(_alice,_eve) -> eεa
S/_admire(_bob,_eve) -> aγc
S/_like(_john,_bob) -> eγc
S/_kick(_bob,_john) -> eαa
S/_meet(_eve,_john) -> cγe
S/_like(_eve,_john) -> cγa


In [323]:
grammar = Grammar()
grammar.from_string(generate_holistic_rules(30))

In [324]:
grammar.chunk01()

In [325]:
print(grammar.to_string())

S/_admire(x,_carol) -> Fβa
S/_admire(_alice,x) -> eγX
S/_admire(x,_alice) -> Oβd
S/_admire(x,_carol) -> Kγd
S/_admire(_alice,x) -> eεR
S/_admire(x,_carol) -> Nγb
S/_admire(x,_carol) -> Cβa
S/_admire(_alice,x) -> eγQ
S/_admire(_alice,x) -> eεM
S/X(_carol,_alice) -> eZa
S/X(_alice,_carol) -> cFd
S/_know(_alice,x) -> eγL
S/X(_carol,_john) -> eQa
S/X(_john,_carol) -> bIa
S/_like(_carol,x) -> dγK
S/X(_carol,_john) -> eDa
S/_admire(_bob,x) -> eγU
S/_know(x,_carol) -> Bεa
S/_admire(x,_eve) -> Bεa
S/_kick(x,_carol) -> Vεe
S/_kick(_alice,x) -> eβN
S/X(_carol,_john) -> eTa
S/X(_alice,_john) -> aHc
S/_know(_alice,x) -> eγQ
S/_kick(x,_carol) -> Nεe
S/X(_alice,_eve) -> eOa
S/X(_alice,_eve) -> eKa
S/_know(_john,x) -> bεA
S/X(_eve,_carol) -> cMe
S/X(_alice,_bob) -> eCb
S/X(_alice,_eve) -> eGa
S/_know(x,_john) -> Fαa
S/_know(_john,x) -> bεZ
S/X(_eve,_alice) -> bNa
S/X(_carol,_eve) -> dEc
S/X(_alice,_eve) -> eWa
S/X(_alice,_bob) -> eQb
S/X(_eve,_alice) -> bPa
S/X(_alice,_eve) -> eYa
S/_like(x,_eve) -> 

In [326]:
grammar.merge()

can merge {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'} and {'lhs': {'cat': 'L', 'sem': '_eve'}, 'rhs': 'a'}
replacee_rule:  {'lhs': {'cat': 'L', 'sem': '_eve'}, 'rhs': 'a'}
replacer_rule:  {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'}
can merge {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'} and {'lhs': {'cat': 'U', 'sem': '_eve'}, 'rhs': 'a'}
replacee_rule:  {'lhs': {'cat': 'U', 'sem': '_eve'}, 'rhs': 'a'}
replacer_rule:  {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'}
can merge {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'} and {'lhs': {'cat': 'M', 'sem': '_eve'}, 'rhs': 'a'}
replacee_rule:  {'lhs': {'cat': 'M', 'sem': '_eve'}, 'rhs': 'a'}
replacer_rule:  {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'}
can merge {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'} and {'lhs': {'cat': 'C', 'sem': '_eve'}, 'rhs': 'a'}
replacee_rule:  {'lhs': {'cat': 'C', 'sem': '_eve'}, 'rhs': 'a'}
replacer_rule:  {'lhs': {'cat': 'Q', 'sem': '_eve'}, 'rhs': 'a'}
can merge {'lhs': {'cat': 'B', '

In [327]:
print(grammar.to_string())

S/_admire(x,_carol) -> Bγb
S/_admire(x,_carol) -> Qβa
S/_admire(_alice,x) -> eγQ
S/_admire(_alice,x) -> eεQ
S/_admire(_alice,x) -> eγB
S/_admire(x,_alice) -> Qβd
S/_admire(x,_carol) -> Qγd
S/X(_carol,_alice) -> eBa
S/X(_alice,_carol) -> cQd
S/X(_carol,_john) -> eQa
S/_kick(x,_carol) -> Bεe
S/X(_carol,_john) -> eBa
S/_know(x,_carol) -> Bεa
S/_admire(x,_eve) -> Bεa
S/_like(_carol,x) -> dγQ
S/_admire(_bob,x) -> eγQ
S/X(_alice,_john) -> aQc
S/_kick(_alice,x) -> eβB
S/_know(_alice,x) -> eγQ
S/X(_john,_carol) -> bBa
S/_know(x,_john) -> Qαa
S/X(_alice,_eve) -> eBa
S/X(_eve,_alice) -> bBa
S/_know(_john,x) -> bεB
S/X(_eve,_alice) -> bQa
S/X(_eve,_carol) -> cQe
S/X(_alice,_eve) -> eQa
S/X(_carol,_eve) -> dEc
S/X(_alice,_bob) -> eQb
S/_like(x,_eve) -> Bγc
S/_know(x,_eve) -> Bβa
S/_know(x,_bob) -> Bγb
S/_kick(_eve,x) -> bβQ
S/_like(x,_bob) -> Qαc
S/_kick(_eve,x) -> cεQ
S/_like(x,_bob) -> Bδe
Q/_admire -> ε
Q/_admire -> δ
Q/_admire -> γ
Q/_admire -> α
B/_admire -> δ
B/_admire -> ε
Q/_alice -> c
B/_

In [495]:
_5verbs_sem = ["_kick", "_know", "_meet", "_like", "_admire"]
_5nouns_sem = ["_john", "_alice", "_eve", "_carol", "_bob"]
_5nouns = ["d", "a", "e", "c", "b"]
_5verbs = ["α", "β", "γ", "δ", "ε"]

In [499]:
# generate fully compositional utterances
with open("semantic_space.txt", "w") as f:
    for verb, verb_sem in zip(_5verbs, _5verbs_sem):
        for subj, subj_sem in zip(_5nouns, _5nouns_sem):
            for obj, obj_sem in zip(_5nouns, _5nouns_sem):
                if subj != obj:
                    f.write(f"S/{verb_sem}({subj_sem},{obj_sem}) -> {subj}{verb}{obj}\n")
                else:
                    continue

In [501]:
# whole semantic space / fully compositional utterances
semantic_space = """S/_kick(_john,_alice) -> dαa
S/_kick(_john,_eve) -> dαe
S/_kick(_john,_carol) -> dαc
S/_kick(_john,_bob) -> dαb
S/_kick(_alice,_john) -> aαd
S/_kick(_alice,_eve) -> aαe
S/_kick(_alice,_carol) -> aαc
S/_kick(_alice,_bob) -> aαb
S/_kick(_eve,_john) -> eαd
S/_kick(_eve,_alice) -> eαa
S/_kick(_eve,_carol) -> eαc
S/_kick(_eve,_bob) -> eαb
S/_kick(_carol,_john) -> cαd
S/_kick(_carol,_alice) -> cαa
S/_kick(_carol,_eve) -> cαe
S/_kick(_carol,_bob) -> cαb
S/_kick(_bob,_john) -> bαd
S/_kick(_bob,_alice) -> bαa
S/_kick(_bob,_eve) -> bαe
S/_kick(_bob,_carol) -> bαc
S/_know(_john,_alice) -> dβa
S/_know(_john,_eve) -> dβe
S/_know(_john,_carol) -> dβc
S/_know(_john,_bob) -> dβb
S/_know(_alice,_john) -> aβd
S/_know(_alice,_eve) -> aβe
S/_know(_alice,_carol) -> aβc
S/_know(_alice,_bob) -> aβb
S/_know(_eve,_john) -> eβd
S/_know(_eve,_alice) -> eβa
S/_know(_eve,_carol) -> eβc
S/_know(_eve,_bob) -> eβb
S/_know(_carol,_john) -> cβd
S/_know(_carol,_alice) -> cβa
S/_know(_carol,_eve) -> cβe
S/_know(_carol,_bob) -> cβb
S/_know(_bob,_john) -> bβd
S/_know(_bob,_alice) -> bβa
S/_know(_bob,_eve) -> bβe
S/_know(_bob,_carol) -> bβc
S/_meet(_john,_alice) -> dγa
S/_meet(_john,_eve) -> dγe
S/_meet(_john,_carol) -> dγc
S/_meet(_john,_bob) -> dγb
S/_meet(_alice,_john) -> aγd
S/_meet(_alice,_eve) -> aγe
S/_meet(_alice,_carol) -> aγc
S/_meet(_alice,_bob) -> aγb
S/_meet(_eve,_john) -> eγd
S/_meet(_eve,_alice) -> eγa
S/_meet(_eve,_carol) -> eγc
S/_meet(_eve,_bob) -> eγb
S/_meet(_carol,_john) -> cγd
S/_meet(_carol,_alice) -> cγa
S/_meet(_carol,_eve) -> cγe
S/_meet(_carol,_bob) -> cγb
S/_meet(_bob,_john) -> bγd
S/_meet(_bob,_alice) -> bγa
S/_meet(_bob,_eve) -> bγe
S/_meet(_bob,_carol) -> bγc
S/_like(_john,_alice) -> dδa
S/_like(_john,_eve) -> dδe
S/_like(_john,_carol) -> dδc
S/_like(_john,_bob) -> dδb
S/_like(_alice,_john) -> aδd
S/_like(_alice,_eve) -> aδe
S/_like(_alice,_carol) -> aδc
S/_like(_alice,_bob) -> aδb
S/_like(_eve,_john) -> eδd
S/_like(_eve,_alice) -> eδa
S/_like(_eve,_carol) -> eδc
S/_like(_eve,_bob) -> eδb
S/_like(_carol,_john) -> cδd
S/_like(_carol,_alice) -> cδa
S/_like(_carol,_eve) -> cδe
S/_like(_carol,_bob) -> cδb
S/_like(_bob,_john) -> bδd
S/_like(_bob,_alice) -> bδa
S/_like(_bob,_eve) -> bδe
S/_like(_bob,_carol) -> bδc
S/_admire(_john,_alice) -> dεa
S/_admire(_john,_eve) -> dεe
S/_admire(_john,_carol) -> dεc
S/_admire(_john,_bob) -> dεb
S/_admire(_alice,_john) -> aεd
S/_admire(_alice,_eve) -> aεe
S/_admire(_alice,_carol) -> aεc
S/_admire(_alice,_bob) -> aεb
S/_admire(_eve,_john) -> eεd
S/_admire(_eve,_alice) -> eεa
S/_admire(_eve,_carol) -> eεc
S/_admire(_eve,_bob) -> eεb
S/_admire(_carol,_john) -> cεd
S/_admire(_carol,_alice) -> cεa
S/_admire(_carol,_eve) -> cεe
S/_admire(_carol,_bob) -> cεb
S/_admire(_bob,_john) -> bεd
S/_admire(_bob,_alice) -> bεa
S/_admire(_bob,_eve) -> bεe
S/_admire(_bob,_carol) -> bεc"""

In [502]:
grammar = Grammar()
grammar.from_string(semantic_space)

In [503]:
# test chunk01 on whole semantic space
grammar.chunk01()

In [504]:
print(grammar.to_string())

S/_admire(x,_carol) -> Rεc
S/_admire(x,_alice) -> Gεa
S/_admire(_alice,x) -> aεV
S/_admire(x,_carol) -> Hεc
S/_admire(_carol,x) -> cεL
S/_admire(_alice,x) -> aεY
S/_admire(_carol,x) -> cεN
S/_admire(_alice,x) -> aεE
S/_admire(x,_alice) -> Bεa
S/_admire(x,_carol) -> Jεc
S/_admire(_alice,x) -> aεX
S/_admire(_alice,x) -> aεB
S/_admire(x,_alice) -> Aεa
S/_admire(_carol,x) -> cεU
S/_admire(x,_alice) -> Cεa
S/_admire(_carol,x) -> cεE
S/_admire(x,_carol) -> Nεc
S/_admire(_carol,x) -> cεA
S/_admire(_alice,x) -> aεF
S/_admire(x,_carol) -> Yεc
S/_admire(x,_alice) -> Rεa
S/X(_carol,_alice) -> cUa
S/X(_alice,_carol) -> aWc
S/X(_carol,_alice) -> cTa
S/X(_alice,_carol) -> aKc
S/X(_carol,_alice) -> cOa
S/X(_alice,_carol) -> aVc
S/_admire(x,_john) -> Dεd
S/X(_alice,_carol) -> aBc
S/_admire(_john,x) -> dεD
S/_admire(_john,x) -> dεF
S/X(_carol,_alice) -> cXa
S/_admire(x,_john) -> Uεd
S/X(_carol,_alice) -> cKa
S/X(_carol,_alice) -> cBa
S/X(_alice,_carol) -> aGc
S/X(_carol,_alice) -> cQa
S/X(_alice,_carol

In [513]:
# generate all possible queries
query_space = []

for verb_sem in _5verbs_sem:
    for subj_sem in _5nouns_sem:
        for obj_sem in _5nouns_sem:
            if subj_sem != obj_sem:
                query_space.append(f"{verb_sem}({subj_sem},{obj_sem})")
            else:
                continue

In [515]:
len(query_space)

100

In [None]:
grammar = Grammar()
grammar.from_string(generate_holistic_rules(100))
utterances = grammar.to_string().split("\n") [:-2]

N_GENS = 200

# N_GENS世代継承
# TODO: 各世代のlog
# TODO: <done>全クエリ生成させて、</done>表現度を計算・ボトルネックをサンプリング
for i in tqdm(range(N_GENS)):
    last_generation = Grammar()
    for utterance in utterances:
        last_generation.add_rule(utterance)
        operation = random.sample(OPERATIONS, 1)[0]
        # print(operation)
        # print("Before: ", last_generation.to_string())
        if len(last_generation.rules) > 1:
            if operation == "chunk01":
                last_generation.chunk01()
            if operation == "chunk02":
                last_generation.chunk02()
            if operation == "merge":
                last_generation.merge()
            if operation == "replace":
                last_generation.chunk01()
        # print("After: ", last_generation.to_string())

    utterances = []

    if i != (N_GENS - 1):
        queries = query_space # all queries
        for query in queries:
            utterance = last_generation.generate(query) # generate all
            utterances.append(utterance)
        utterances = random.sample(utterances, 50) # sample 50 instances

print(last_generation.to_string())