- 記号の使い分け
  - 統語範疇: `英大文字`
  - 動詞（形式）: `ギリシャ小文字`
  - 名詞（形式）: `英小文字`
  - 動詞（意味表示内の定数）: `_ギリシャ小文字`
  - 名詞（意味表示内の定数）: `_英小文字`
  - 動詞（意味表示内の変数）: `X`
  - 名詞（意味表示内の変数）: `x`

In [3]:
import random
import string
import ast
import time

In [4]:
from tqdm import tqdm

In [5]:
# 動詞にはギリシャ文字を使用する
def generate_noun(n_sorts: str, stopword: str = ""):
    assert n_sorts <= len(string.ascii_lowercase)
    chars_wo_stopword = [char for char in string.ascii_lowercase[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)
def generate_verb(n_sorts: str, stopword: str = ""):
    greek_lowercase = [chr(i) for i in range(945, 970)]
    chars_wo_stopword = [char for char in greek_lowercase[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)

In [835]:
NOUNS = [
    "david", "alice", "bob", "carol", "eve", "frank", "grace", "helen", 
    "ivan", "jack", "karen", "larry", "mike", "nina", "oscar", "paul", 
    "quincy", "rachel", "steve", "tracy", "ursula", "victor", "wendy", 
    "xander", "yasmine", "zach"
]

VERBS = [
    "know", "admire", "like", "kick", "meet", "call", "defend", "encourage"
    , "follow", "greet", "help", "invite", "judge", "notice", "obey", 
    "praise", "question", "respect", "support", "trust", "understand", 
    "value", "warn", "x-ray", "yell", "zap"
]

VERBS_PSS = [
    "known", "admired", "liked", "kicked", "met"
]

In [836]:
# 動詞にはギリシャ文字を使用する
def generate_noun_sem(n_sorts: str, stopword: str = ""):
    assert n_sorts <= len(NOUNS)
    chars_wo_stopword = [char for char in NOUNS[:n_sorts] if char not in stopword]
    return random.choice(chars_wo_stopword)
def generate_verb_sem(n_sorts: str, stopword: str = ""):
    chars_wo_stopword = [char for char in (VERBS[:n_sorts]+VERBS_PSS[:n_sorts]) if char not in stopword]
    return random.choice(chars_wo_stopword)

In [837]:
CONCEPTS = [0, 1]

In [838]:
# 全体論的なルールの生成
def generate_holistic_rules(n):
    result = ""
    for i in range(n):
        con = random.sample(CONCEPTS, 1)[0]
        subj = generate_noun(5, ["x", "y"])
        obj = generate_noun(5, [subj, "x", "y"])
        verb = generate_verb(5)
        sentence = f"{subj}{verb}{obj}"
        subjsem = generate_noun_sem(5)
        objsem = generate_noun_sem(5, [subjsem])
        verbsem = generate_verb_sem(5)
        semrepr = f"_{verbsem}(_{subjsem},_{objsem})"
        result += f"S/{semrepr}/{con} -> {sentence}\n"
    return result[:-1]

In [839]:
# TODO: remove duplicates
def generate_queries(n):
    result = []
    for i in range(n):
        subjsem = generate_noun_sem(5)
        objsem = generate_noun_sem(5, [subjsem])
        verbsem = generate_verb_sem(5)
        semrepr = f"_{verbsem}(_{subjsem},_{objsem})"
        result.append(semrepr)
    return result

In [840]:
print(generate_holistic_rules(10))

S/_like(_alice,_bob)/1 -> eγb
S/_kicked(_carol,_bob)/1 -> cβb
S/_admired(_eve,_alice)/0 -> dδb
S/_meet(_david,_eve)/0 -> cβe
S/_known(_eve,_bob)/0 -> aγd
S/_kicked(_eve,_david)/1 -> bεa
S/_kick(_alice,_carol)/1 -> dγa
S/_liked(_carol,_david)/0 -> eαc
S/_admire(_david,_carol)/0 -> aεc
S/_liked(_carol,_david)/1 -> bεe


In [841]:
from nltk.sem.logic import *

import random
import string
import copy

def replace_at_index(s, index, replacement):
    return s[:index] + replacement + s[index + 1:]

In [842]:
N_SORTS = 5
NONTERMINALS = [char for char in string.ascii_uppercase if char != "S"]
INDIVIDUAL_VARIABLES = ["x", "y"]
FUNCTIONAL_VARIABLES = ["X"]
GREEK_LOWER = [chr(i) for i in range(945, 970)]
ENGLISH_LOWER = [char for char in string.ascii_lowercase[:N_SORTS] if char not in ["x", "y"]]

In [1041]:
class Grammar():
    def __init__(self):
        self.rules = []

    def from_string(self, string: str):
        self.rules = []
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                if len(rule_str.split("\t")) == 2:
                    rule_body, asignments = rule_str.split("\t")[0], rule_str.split("\t")[1]
                    asignments = asignments.replace("(", "").replace(")", "")
                    asignments_list = asignments.split(", ")
                    asignments_dict = {str(asig.split(":")[0]):int(asig.split(":")[1]) for asig in asignments_list}
                    rule = rule_body.split(" ")
                    lhs = rule[0].split("/")
                    rhs = rule[2]
                    lhs_cat = lhs[0]
                    lhs_sem = lhs[1]
                    lhs_con = int(lhs[2])
                    rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem, 'con':lhs_con}, 'rhs':rhs, 'var':asignments_dict}
                    self.rules.append(rule)
                if len(rule_str.split("\t")) == 1:
                    rule = rule_str.split(" ")
                    lhs = rule[0].split("/")
                    rhs = rule[2]
                    if len(lhs) == 3:
                        lhs_cat = lhs[0]
                        lhs_sem = lhs[1]
                        lhs_con = int(lhs[2])
                        rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem, 'con':lhs_con}, 'rhs':rhs}
                    if len(lhs) == 2:
                        lhs_cat = lhs[0]
                        lhs_sem = lhs[1]
                        rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem}, 'rhs':rhs}
                    self.rules.append(rule)
    
    def add_rule(self, string: str):
        rules_str = string.split("\n")
        for rule_str in rules_str:
            if rule_str != "\n":
                if len(rule_str.split("\t")) == 2:
                    rule_body, asignments = rule_str.split("\t")[0], rule_str.split("\t")[1]
                    asignments = asignments.replace("(", "").replace(")", "")
                    asignments_list = asignments.split(", ")
                    asignments_dict = {str(asig.split(":")[0]):int(asig.split(":")[1]) for asig in asignments_list}
                    rule = rule_body.split(" ")
                    lhs = rule[0].split("/")
                    rhs = rule[2]
                    lhs_cat = lhs[0]
                    lhs_sem = lhs[1]
                    lhs_con = int(lhs[2])
                    rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem, 'con':lhs_con}, 'rhs':rhs, 'var':asignments_dict}
                    self.rules.append(rule)
                if len(rule_str.split("\t")) == 1:
                    rule = rule_str.split(" ")
                    lhs = rule[0].split("/")
                    rhs = rule[2]
                    if len(lhs) == 3:
                        lhs_cat = lhs[0]
                        lhs_sem = lhs[1]
                        lhs_con = int(lhs[2])
                        rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem, 'con':lhs_con}, 'rhs':rhs}
                    if len(lhs) == 2:
                        lhs_cat = lhs[0]
                        lhs_sem = lhs[1]
                        rule = {'lhs':{'cat':lhs_cat, 'sem':lhs_sem}, 'rhs':rhs}
                    self.rules.append(rule)
    
    def to_string(self):
        rules_list = []
        rules_str = ""
        for rule in self.rules:
            if 'var' in rule:
                asigs_str = "(" + ", ".join(f"{k}:{v}" for k, v in rule['var'].items()) + ")"
                rules_list.append(f"{rule['lhs']['cat']}/{rule['lhs']['sem']}/{rule['lhs']['con']} -> {rule['rhs']}\t{asigs_str}\n")
            elif 'con' in rule['lhs']:
                rules_list.append(f"{rule['lhs']['cat']}/{rule['lhs']['sem']}/{rule['lhs']['con']} -> {rule['rhs']}\n")
            else:
                rules_list.append(f"{rule['lhs']['cat']}/{rule['lhs']['sem']} -> {rule['rhs']}\n")
        sorted_list = sorted(rules_list, key=lambda x: -len(x))
        rules_str += ''.join(sorted_list)+"\n"
        return rules_str
    
    def str2dict(self, string):
        return ast.literal_eval(string)
    
    def sem_list(self):
        return [d['lhs']['sem'] for d in self.rules]
    
    def cat_list(self):
        return [d['lhs']['cat'] for d in self.rules]
    
    def sentence_list(self):
        return [d['rhs'] for d in self.rules]

    def can_chunk01(self, rule1, rule2):
        str1, str2 = rule1["rhs"], rule2["rhs"]
        sem1_logic = Expression.fromstring(rule1["lhs"]["sem"])
        sem2_logic = Expression.fromstring(rule2["lhs"]["sem"])
        con1 = rule1["lhs"]["con"]
        con2 = rule2["lhs"]["con"]

        if len(str1) != 3 or len(str2) != 3: # TODO: Better to judge by category (not sentence length)
            return None, False
        if con1 == con2 == 0:
            sem1, sem2 = [sem1_logic.args[0], sem1_logic.pred, sem1_logic.args[1]], [sem2_logic.args[0], sem2_logic.pred, sem2_logic.args[1]]
            # print(str1, str2)
            diff_count_str = 0
            diff_positions_str = []
            for i, (char1, char2) in enumerate(zip(str1, str2)):
                if (char1 != char2):
                    diff_count_str += 1
                    diff_positions_str.append(i)
            diff_count_sem = 0
            diff_positions_sem = []
            for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
                if (elm1 != elm2):
                    diff_count_sem += 1
                    diff_positions_sem.append(i)
            if diff_count_str == diff_count_sem == 1:
                if diff_positions_str[0] == diff_positions_sem[0]:
                    if str1[diff_positions_str[0]].islower() and str2[diff_positions_str[0]].islower():
                        return diff_positions_str[0], True
                    else:
                        return None, False
                else:
                    return None, False
            else:
                return None, False
        elif con1 == con2 == 1:
            sem1, sem2 = [sem1_logic.args[1], sem1_logic.pred, sem1_logic.args[0]], [sem2_logic.args[1], sem2_logic.pred, sem2_logic.args[0]]
            # print(str1, str2)
            diff_count_str = 0
            diff_positions_str = []
            for i, (char1, char2) in enumerate(zip(str1, str2)):
                if (char1 != char2):
                    diff_count_str += 1
                    diff_positions_str.append(i)
            diff_count_sem = 0
            diff_positions_sem = []
            for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
                if (elm1 != elm2):
                    diff_count_sem += 1
                    diff_positions_sem.append(i)
            if diff_count_str == diff_count_sem == 1:
                if diff_positions_str[0] == diff_positions_sem[0]:
                    if str1[diff_positions_str[0]].islower() and str2[diff_positions_str[0]].islower():
                        return diff_positions_str[0], True
                    else:
                        return None, False
                else:
                    return None, False
            else:
                return None, False
        else:
            return None, False

    def find_diff_position_for_chunk01(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None

        diff_count = 0
        diff_index = None
        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if not (char1.islower() and char2.islower()):
                    return None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem
        else:
            return None
    
    def can_chunk02(self, rule1, rule2):
        str1, str2 = rule1["rhs"], rule2["rhs"]
        sem1_logic = Expression.fromstring(rule1["lhs"]["sem"])
        sem2_logic = Expression.fromstring(rule2["lhs"]["sem"])
        sem1, sem2 = [sem1_logic.args[0], sem1_logic.pred, sem1_logic.args[1]], [sem2_logic.args[0], sem2_logic.pred, sem2_logic.args[1]]
        con1 = rule1["lhs"]["con"]
        con2 = rule2["lhs"]["con"]

        if len(str1) != 3 or len(str2) != 3: # TODO: Better to judge by category (not sentence length)
            return None, False, None, None
        elif con1 == con2 == 0:
            sem1, sem2 = [sem1_logic.args[0], sem1_logic.pred, sem1_logic.args[1]], [sem2_logic.args[0], sem2_logic.pred, sem2_logic.args[1]]
            # print(str1, str2)
            diff_count_str = 0
            diff_positions_str = []
            for i, (char1, char2) in enumerate(zip(str1, str2)):
                if (char1 != char2):
                    # if (char1.islower() and char2.isupper()):
                    diff_count_str += 1
                    diff_positions_str.append(i)
                    # elif (char1.isupper() and char2.islower()):
                    # else:
                    #    None, False, None, None
            diff_count_sem = 0
            diff_positions_sem = []
            for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
                if (elm1 != elm2):
                    diff_count_sem += 1
                    diff_positions_sem.append(i)
            if diff_count_str == diff_count_sem == 1:
                if diff_positions_str[0] == diff_positions_sem[0]:
                    if (str1[diff_positions_str[0]].islower() and str2[diff_positions_str[0]].isupper()):
                        upper_in_str, lower_in_str = 1, 0
                        return diff_positions_str[0], True, upper_in_str, lower_in_str
                    elif (str1[diff_positions_str[0]].isupper() and str2[diff_positions_str[0]].islower()):
                        upper_in_str, lower_in_str = 0, 1
                        return diff_positions_str[0], True, upper_in_str, lower_in_str
                    else:
                        return None, False, None, None
                else:
                    return None, False, None, None
            else:
                return None, False, None, None
        elif con1 == con2 == 1:
            sem1, sem2 = [sem1_logic.args[1], sem1_logic.pred, sem1_logic.args[0]], [sem2_logic.args[1], sem2_logic.pred, sem2_logic.args[0]]
            # print(str1, str2)
            diff_count_str = 0
            diff_positions_str = []
            for i, (char1, char2) in enumerate(zip(str1, str2)):
                if (char1 != char2):
                    # if (char1.islower() and char2.isupper()):
                    diff_count_str += 1
                    diff_positions_str.append(i)
                    # elif (char1.isupper() and char2.islower()):
                    # else:
                    #    None, False, None, None
            diff_count_sem = 0
            diff_positions_sem = []
            for i, (elm1, elm2) in enumerate(zip(sem1, sem2)):
                if (elm1 != elm2):
                    diff_count_sem += 1
                    diff_positions_sem.append(i)
            if diff_count_str == diff_count_sem == 1:
                if diff_positions_str[0] == diff_positions_sem[0]:
                    if (str1[diff_positions_str[0]].islower() and str2[diff_positions_str[0]].isupper()):
                        upper_in_str, lower_in_str = 1, 0
                        return diff_positions_str[0], True, upper_in_str, lower_in_str
                    elif (str1[diff_positions_str[0]].isupper() and str2[diff_positions_str[0]].islower()):
                        upper_in_str, lower_in_str = 0, 1
                        return diff_positions_str[0], True, upper_in_str, lower_in_str
                    else:
                        return None, False, None, None
                else:
                    return None, False, None, None
            else:
                return None, False, None, None
        else:
            return None, False, None, None
    
    def find_diff_position_for_chunk02(self, str1, str2):
        if len(str1) != 3 or len(str2) != 3:
            return None, None

        diff_count = 0
        diff_index = None
        upper_in_str = None

        for index, (char1, char2) in enumerate(zip(str1, str2)):
            if char1 != char2:
                diff_count += 1
                diff_index = index
                if (char1.islower() and char2.isupper()):
                    lower_in_str = 0
                    upper_in_str = 1
                elif (char1.isupper() and char2.islower()):
                    lower_in_str = 1
                    upper_in_str = 0
                else:
                    return None, None

        if diff_count == 1:
            if diff_index == 0:
                diff_index_sem = 1
            if diff_index == 1:
                diff_index_sem = 0
            if diff_index == 2:
                diff_index_sem = 2
            return diff_index, diff_index_sem, upper_in_str, lower_in_str
        else:
            return None, None

    def highlight_for_replace(self, str1, str2):
        if (len(str1) == 3) & (len(str2) == 1):
            if str1.count(str2) == 1:
                non_word_level, word_level = 0, 1
                return non_word_level, word_level
            else:
                return None, None
        elif (len(str1) == 1) & (len(str2) == 3):
            if str2.count(str1) == 1:
                non_word_level, word_level = 1, 0
                return non_word_level, word_level
            else:
                return None, None
        else:
            return None, None

    def find_diff_position_for_replace(self, str1, str2):
        if len(str1) != 3 or len(str2) != 1:
            return None

        for i, char in enumerate(str1):
            if str2 == char:
                diff_index = i
            else:
                continue

        if diff_index == 0:
            diff_index_sem = 1
        if diff_index == 1:
            diff_index_sem = 0
        if diff_index == 2:
            diff_index_sem = 2
        return diff_index, diff_index_sem
    
    def existing_variables(self, elements: list):
        result = [str(element) for element in elements if not isinstance(element, ConstantExpression)]
        return result

    # TODO: Sentence-meaning positions do not necessarily have to correspond
    def chunk01(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    diff_index, can_chunk = self.can_chunk01(rules[i], rules[j])
                    if can_chunk:
                        assert rules[i]["lhs"]["con"] == rules[j]["lhs"]["con"]
                        con = rules[i]["lhs"]["con"]
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]
                        
                        if "var" in rules[i]:
                            asignments = rules[i]["var"]
                        else:
                            asignments = {}

                        if con == 0:
                            if diff_index == 0:
                                diff_index_sem = 1
                            if diff_index == 1:
                                diff_index_sem = 0
                            if diff_index == 2:
                                diff_index_sem = 2
                        elif con == 1:
                            if diff_index == 0:
                                diff_index_sem = 2
                            if diff_index == 1:
                                diff_index_sem = 0
                            if diff_index == 2:
                                diff_index_sem = 1

                        chuncked_rules.append(rules[i])
                        chuncked_rules.append(rules[j])

                        if diff_index_sem == 0:
                            var = Expression.fromstring("X")
                        else:
                            # FIX: efficiency
                            existing_variables = self.existing_variables(first_sem_elements)
                            var = [var for var in INDIVIDUAL_VARIABLES if var not in existing_variables][0]
                        first_sem_elements_abstracted = copy.deepcopy(first_sem_elements)
                        first_sem_elements_abstracted[diff_index_sem] = var
                        new_sem_str = f"{str(first_sem_elements_abstracted[0])}({str(first_sem_elements_abstracted[1])},{str(first_sem_elements_abstracted[2])})"
                        random_category = random.choice(NONTERMINALS)
                        new_sen_str = replace_at_index(first_sentence, diff_index, random_category)
                        asignments[str(var)] = diff_index
                        new_rule_0 = {"lhs": {"cat":"S", "sem": new_sem_str, "con":con}, "rhs": new_sen_str, "var":asignments}

                        new_rule_1 = {"lhs": {"cat":random_category, "sem": str(first_sem_elements[diff_index_sem])}, "rhs": first_sentence[diff_index]}
                        new_rule_2 = {"lhs": {"cat":random_category, "sem": str(second_sem_elements[diff_index_sem])}, "rhs": second_sentence[diff_index]}
                        new_rules += [new_rule_0] + [new_rule_1] + [new_rule_2]
        rules = [rule for rule in rules if rule not in chuncked_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]

    def chunk02(self):
        rules = self.rules

        chuncked_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                if isinstance(first_sem, ApplicationExpression) & isinstance(second_sem, ApplicationExpression):
                    diff_index, can_chunk, upper_in_str, lower_in_str = self.can_chunk02(rules[i], rules[j])
                    if can_chunk:
                        assert rules[i]["lhs"]["con"] == rules[j]["lhs"]["con"]
                        con = rules[i]["lhs"]["con"]
                        first_sem_elements = [first_sem.pred, first_sem.args[0], first_sem.args[1]]
                        second_sem_elements = [second_sem.pred, second_sem.args[0], second_sem.args[1]]

                        if con == 0:
                            if diff_index == 0:
                                diff_index_sem = 1
                            if diff_index == 1:
                                diff_index_sem = 0
                            if diff_index == 2:
                                diff_index_sem = 2
                        if con == 1:
                            if diff_index == 0:
                                diff_index_sem = 2
                            if diff_index == 1:
                                diff_index_sem = 0
                            if diff_index == 2:
                                diff_index_sem = 1

                        target_position = [i,j][lower_in_str]
                        nontarget_position = [i,j][upper_in_str]
                        chuncked_rules.append(rules[target_position])

                        target_sem = Expression.fromstring(rules[target_position]["lhs"]["sem"])
                        target_sem_elements = [target_sem.pred, target_sem.args[0], target_sem.args[1]]
                        target_sentence = rules[target_position]["rhs"]
                        nontarget_sentence = rules[nontarget_position]["rhs"]
                        new_rule = {"lhs": {"cat":nontarget_sentence[diff_index], "sem": str(target_sem_elements[diff_index_sem])}, "rhs": target_sentence[diff_index]}
                        new_rules.append(new_rule)
        rules = [rule for rule in rules if rule not in chuncked_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]
        
    def abstract(self, element, var, idx, diff_idx):
        if idx == diff_idx:
            return var
        else:
            return element

    def replace(self):
        rules = self.rules

        replaced_rules = []
        new_rules = []

        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                first_sem = Expression.fromstring(rules[i]["lhs"]["sem"])
                second_sem = Expression.fromstring(rules[j]["lhs"]["sem"])
                first_sentence = rules[i]["rhs"]
                second_sentence = rules[j]["rhs"]
                non_word_level, word_level = self.highlight_for_replace(first_sentence, second_sentence)
                if (non_word_level is not None) and (word_level is not None):
                    target_position = [i,j][non_word_level]
                    nontarget_position  = [i,j][word_level]

                    if "var" in rules[target_position]:
                        asignments = rules[target_position]["var"]
                    else:
                        asignments = {}

                    replaced_rules.append(rules[target_position])

                    target_sem = Expression.fromstring(rules[target_position]["lhs"]["sem"])
                    nontarget_sem = Expression.fromstring(rules[nontarget_position]["lhs"]["sem"])

                    target_sentence = rules[target_position]["rhs"]
                    nontarget_sentence = rules[nontarget_position]["rhs"]
                    
                    target_con = rules[target_position]["lhs"]["con"]

                    target_sem_elements = [target_sem.pred, target_sem.args[0], target_sem.args[1]]
                    nontarget_cat = rules[nontarget_position]["lhs"]["cat"]

                    diff_index, diff_index_sem = self.find_diff_position_for_replace(target_sentence, nontarget_sentence)

                    if target_con == 1:
                        if diff_index_sem == 1:
                            diff_index_sem = 2
                        elif diff_index_sem == 2:
                            diff_index_sem = 1

                    if diff_index_sem == 0:
                        var = Expression.fromstring("X")
                    else:
                        # FIX: efficiency
                        existing_variables = self.existing_variables(target_sem_elements)
                        var = [var for var in INDIVIDUAL_VARIABLES if var not in existing_variables][0]
                    target_sem_elements_abstract = [self.abstract(element, var, i, diff_index_sem) for i, element in enumerate(target_sem_elements)]

                    new_sem_str = f"{str(target_sem_elements_abstract[0])}({str(target_sem_elements_abstract[1])},{str(target_sem_elements_abstract[2])})"
                    new_sen_str = replace_at_index(target_sentence, diff_index, nontarget_cat)
                    asignments[str(var)] = diff_index
                    new_rule = {"lhs": {"cat":"S", "sem": new_sem_str, "con":target_con}, "rhs": new_sen_str, "var":asignments}
                    new_rules.append(new_rule)
        rules = [rule for rule in rules if rule not in replaced_rules]
        rules = rules + new_rules
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]
    
    def repaint_rule(self, rule, replacee, replacer):
        # print(rule, replacee, replacer)
        if rule["lhs"]["cat"] == replacee["lhs"]["cat"]:
            assert len(rule["rhs"]) == 1
            # print(rule["lhs"]["cat"], "->",replacer["lhs"]["cat"])
            rule["lhs"]["cat"] = replacer["lhs"]["cat"]
        else:
            rhs = copy.deepcopy(rule["rhs"])
            new_rhs = rhs.replace(replacee["lhs"]["cat"], replacer["lhs"]["cat"])
            rule["rhs"] = new_rhs
            # print(rhs, "->", new_rhs)
        return rule

    def can_merge(self, rule1, rule2):
        rule1_cat = rule1["lhs"]["cat"]
        rule1_sem = rule1["lhs"]["sem"]
        rule1_rhs = rule1["rhs"]
        rule2_cat = rule2["lhs"]["cat"]
        rule2_sem = rule2["lhs"]["sem"]
        rule2_rhs = rule2["rhs"]
        if (len(rule1_rhs) == len(rule2_rhs) == 1) & (rule1_sem == rule2_sem) & (rule1_rhs == rule2_rhs) & (rule1_cat != rule2_cat):
            return True
        else:
            return False

    def merge(self):
        rules = self.rules
        invited = []
        replacer_cats = []
        for i in range(len(rules)):
            for j in range(i+1, len(rules)):
                if self.can_merge(rules[i], rules[j]):
                    # print(f"can merge {rules[i]} and {rules[j]}")
                    if (rules[i]["lhs"]["cat"] not in replacer_cats) and (rules[j]["lhs"]["cat"] not in replacer_cats):
                        indices = [i, j]
                        # print(indices)
                        random.shuffle(indices)
                        replacer_rule = copy.deepcopy(rules[indices[0]])
                        replacee_rule = copy.deepcopy(rules[indices[1]])
                        replacer_cats.append(replacer_rule["lhs"]["cat"])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                    elif (rules[i]["lhs"]["cat"] in replacer_cats) and (rules[j]["lhs"]["cat"] in replacer_cats):
                        continue
                    elif rules[i]["lhs"]["cat"] in replacer_cats:
                        replacer_rule = copy.deepcopy(rules[i])
                        replacee_rule = copy.deepcopy(rules[j])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                    else:
                        replacer_rule = copy.deepcopy(rules[j])
                        replacee_rule = copy.deepcopy(rules[i])
                        # print("replacee_rule: ", replacee_rule)
                        # print("replacer_rule: ", replacer_rule)
                        rules = [self.repaint_rule(rule, replacee_rule, replacer_rule) for rule in rules]
                else:
                    continue
        rules_unique = list(set([str(rule) for rule in rules]))
        self.rules = [self.str2dict(rule) for rule in rules_unique]

    def invent_wordrule(self, sem):
        random_category = random.choice(NONTERMINALS)
        print(sem)
        if str(sem).replace("_","") in NOUNS:
            rhs = generate_noun(5, ["x", "y"])
        if str(sem).replace("_","") in (VERBS+VERBS_PSS):
            rhs = generate_verb(5)
        self.add_rule(f"{random_category}/{str(sem)} -> {rhs}")
        return rhs

    def invent_holisticrule(self, sem, query_con):
        subj = generate_noun(5, ["x", "y"])
        obj = generate_noun(5, [subj, "x", "y"])
        verb = generate_verb(5)
        sentence = f"{subj}{verb}{obj}"
        self.add_rule(f"S/{sem}/{query_con} -> {sentence}")
        return sentence
    # TODO: test
    # TODO: 概念化0/1に対応する（現在0のみ）
    def generate(self, query_str, query_con, debug=False):
        rules = self.rules

        if debug:
            print("query: ", query_str)

        query = Expression.fromstring(query_str)
        query_elements = [query.args[0], query.pred, query.args[1]]

        sem_list = self.sem_list()
        sentence_list = self.sentence_list()

        if (query_str in sem_list) and (rules[sem_list.index(query_str)]['lhs']['con']==query_con):
            holistic_rule = rules[sem_list.index(query_str)]
            holistic_rule_rhs = holistic_rule['rhs']
            if debug:
                print("generated by a holictic rule: ", f"S/{str(query)}/{query_con} -> {holistic_rule_rhs}")
            return f"S/{str(query)}/{query_con} -> {holistic_rule_rhs}", "by-holistic-rule"
        else:
            for i, sem_str in enumerate(sem_list):
                sem = Expression.fromstring(sem_str)
                if isinstance(sem, ApplicationExpression):
                    rule = rules[i]
                    con = rule["lhs"]["con"]
                    sem_elements = [sem.args[0], sem.pred, sem.args[1]]
                    matches = [(i, (query_element,sem_element)) for i, (query_element,sem_element) in enumerate(zip(query_elements, sem_elements)) if (query_element==sem_element) & (isinstance(query_element, ConstantExpression) & isinstance(sem_element, ConstantExpression))]
                    slots = [(i, (query_element,sem_element)) for i, (query_element,sem_element) in enumerate(zip(query_elements, sem_elements)) if (query_element!=sem_element) & (not isinstance(sem_element, ConstantExpression))]
                    if (len(matches)==2) & (len(slots)==1) & (query_con==con):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_var_sem = slots[0][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                generated_sentence = selected_rule['rhs'] + sentence_with_slot[1:]
                            if substitute_position == 1:
                                generated_sentence = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if substitute_position == 2:
                                generated_sentence = sentence_with_slot[:2] + selected_rule['rhs']
                            return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-composition"
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if substitute_position == 0:
                                generated_sentence = invented_form + sentence_with_slot[1:]
                            if substitute_position == 1:
                                generated_sentence = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if substitute_position == 2:
                                generated_sentence = sentence_with_slot[:2] + invented_form
                            return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-word-invention"
                    if (len(matches)==1) & (len(slots)==2) & (query_con==con):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_var_sem = slots[0][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + sentence_with_slot[1:]
                            if substitute_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if substitute_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + selected_rule['rhs']
                            strategy = "by-composition"
                            print(strategy)
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if substitute_position == 0:
                                new_sentence_with_slot = invented_form + sentence_with_slot[1:]
                            if substitute_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if substitute_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + invented_form
                            strategy = "by-word-invention"
                            print(strategy)
                        slot_position = slots[1][0]
                        slot_sem = slots[1][1][0]
                        slot_var_sem = slots[1][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                generated_sentence = selected_rule['rhs'] + new_sentence_with_slot[1:]
                            if substitute_position == 1:
                                generated_sentence = new_sentence_with_slot[0] + selected_rule['rhs'] + new_sentence_with_slot[2]
                            if substitute_position == 2:
                                generated_sentence = new_sentence_with_slot[:2] + selected_rule['rhs']
                            if strategy == "by-composition":
                                print(strategy)
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", strategy
                            else:
                                print(strategy)
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", strategy
                        if len(word_rules) == 0:
                            if strategy == "by-composition":
                                invented_form = self.invent_wordrule(slot_sem)
                                if substitute_position == 0:
                                    generated_sentence = invented_form + new_sentence_with_slot[1:]
                                if substitute_position == 1:
                                    generated_sentence = new_sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                                if substitute_position == 2:
                                    generated_sentence = new_sentence_with_slot[:2] + invented_form
                                strategy = "by-word-invention"
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", strategy
                            else:
                                self.rules = self.rules[:-1]
                                generated_sentence = self.invent_holisticrule(query, query_con)
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-holistic-invention"
                    if (len(matches)==0) & (len(slots)==3) & (query_con==con):
                        sentence_with_slot = sentence_list[i]
                        slot_position = slots[0][0]
                        slot_sem = slots[0][1][0]
                        slot_var_sem = slots[0][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + sentence_with_slot[1:]
                            if substitute_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + selected_rule['rhs'] + sentence_with_slot[2]
                            if substitute_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + selected_rule['rhs']
                            strategy = "by-composition"
                        if len(word_rules) == 0:
                            invented_form = self.invent_wordrule(slot_sem)
                            if substitute_position == 0:
                                new_sentence_with_slot = invented_form + sentence_with_slot[1:]
                            if substitute_position == 1:
                                new_sentence_with_slot = sentence_with_slot[0] + invented_form + sentence_with_slot[2]
                            if substitute_position == 2:
                                new_sentence_with_slot = sentence_with_slot[:2] + invented_form
                            strategy = "by-word-invention"
                        slot_sem = slots[1][1][0]
                        slot_var_sem = slots[1][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                new_sentence_with_slot = selected_rule['rhs'] + new_sentence_with_slot[1:]
                            if substitute_position == 1:
                                new_sentence_with_slot = new_sentence_with_slot[0] + selected_rule['rhs'] + new_sentence_with_slot[2]
                            if substitute_position == 2:
                                new_sentence_with_slot = new_sentence_with_slot[:2] + selected_rule['rhs']
                        if len(word_rules) == 0:
                            if strategy == "by-composition":
                                invented_form = self.invent_wordrule(slot_sem)
                                if substitute_position == 0:
                                    new_sentence_with_slot = invented_form + new_sentence_with_slot[1:]
                                if substitute_position == 1:
                                    new_sentence_with_slot = new_sentence_with_slot[0] + invented_form + new_sentence_with_slot[2]
                                if substitute_position == 2:
                                    new_sentence_with_slot = new_sentence_with_slot[:2] + invented_form
                                strategy = "by-word-invention"
                            else:
                                self.rules = self.rules[:-1]
                                generated_sentence = self.invent_holisticrule(query, query_con)
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-holistic-invention"
                        slot_position = slots[2][0]
                        slot_sem = slots[2][1][0]
                        slot_var_sem = slots[2][1][1]
                        substitute_position = rule["var"][str(slot_var_sem)]
                        slot_category = sentence_with_slot[substitute_position]
                        word_rules = [rule for rule in rules if (rule['lhs']['cat']==slot_category) & (rule['lhs']['sem']==str(slot_sem))]
                        if len(word_rules) > 0:
                            selected_rule = self.str2dict(str(random.sample(word_rules, 1)[0]))
                            if substitute_position == 0:
                                generated_sentence = selected_rule['rhs'] + new_sentence_with_slot[1:]
                            if substitute_position == 1:
                                generated_sentence = new_sentence_with_slot[0] + selected_rule['rhs'] + new_sentence_with_slot[2]
                            if substitute_position == 2:
                                generated_sentence = new_sentence_with_slot[:2] + selected_rule['rhs']
                            return f"S/{str(query)}/{query_con} -> {generated_sentence}", strategy
                        if len(word_rules) == 0:
                            if strategy == "by-composition":
                                invented_form = self.invent_wordrule(slot_sem)
                                if substitute_position == 0:
                                    generated_sentence = invented_form + new_sentence_with_slot[1:]
                                if substitute_position == 1:
                                    generated_sentence = new_sentence_with_slot[0] + invented_form + new_sentence_with_slot[2]
                                if substitute_position == 2:
                                    generated_sentence = new_sentence_with_slot[:2] + invented_form
                                strategy = "by-word-invention"
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", strategy
                            else:
                                self.rules = self.rules[:-1]
                                generated_sentence = self.invent_holisticrule(query, query_con)
                                return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-holistic-invention"
                    else:
                        continue
                else:
                    continue
            generated_sentence = self.invent_holisticrule(query, query_con)
            return f"S/{str(query)}/{query_con} -> {generated_sentence}", "by-holistic-invention"

In [1042]:
grammar = Grammar()
grammar.from_string("S/_admire(_eve,x)/0 -> eαJ\t(x:2)\nS/X(x,y)/1 -> KVJ\t(x:2, y:0, X:1)\nS/X(x,y)/0 -> KVJ\t(x:0, y:2, X:1)\nK/_carol -> c\nJ/_john -> d\nS/_kicked(x,y)/1 -> CαJ\t(x:2, y:0)\nV/_kick -> α\nV/_kicked -> ζ")

In [1043]:
len(grammar.rules)

8

In [1044]:
gen, strategy = grammar.generate("_kicked(_john,_carol)", 1)
print(gen)
print(strategy) # == by-composition

S/_kicked(_john,_carol)/1 -> cζd
by-composition


In [1045]:
gen, strategy = grammar.generate("_admired(_john,_carol)", 1)
print(gen)
print(strategy) # == by-word-invention

_admired
S/_admired(_john,_carol)/1 -> cβd
by-word-invention


In [1046]:
gen, strategy = grammar.generate("_admire(_eve,_john)", 0)
print(gen)
print(strategy) # == by-composition

S/_admire(_eve,_john)/0 -> eαd
by-composition


In [1047]:
gen, strategy = grammar.generate("_admired(_eve,_bob)", 1)
print(gen)
print(strategy) # == by-holistic-invention

_eve
S/_admired(_eve,_bob)/1 -> aεc
by-holistic-invention


In [1048]:
gen, strategy = grammar.generate("_admired(_eve,_bob)", 1)
print(gen)
print(strategy) # == by-holistic-rule

S/_admired(_eve,_bob)/1 -> aεc
by-holistic-rule


In [1049]:
grammar = Grammar()
grammar.from_string("S/_admire(_eve,x)/0 -> eαJ\t(x:2)\nS/_admire(_david,x)/0 -> dαJ\t(x:2)\nF/_carol -> c\nS/_kicked(_david,_eve)/1 -> eθd")

In [1050]:
grammar.chunk01()

In [1051]:
print(grammar.to_string())

S/_admire(y,x)/0 -> MαJ	(x:2, y:0)
S/_kicked(_david,_eve)/1 -> eθd
F/_carol -> c
M/_david -> d
M/_eve -> e




In [1052]:
grammar = Grammar()
grammar.from_string("S/_admired(_eve,x)/1 -> Jαe\t(x:0)\nS/_admired(_david,x)/1 -> Jαd\t(x:0)")
grammar.chunk01()
print(grammar.to_string())

S/_admired(y,x)/1 -> JαX	(x:0, y:2)
X/_david -> d
X/_eve -> e




In [1053]:
grammar = Grammar()
grammar.from_string("S/_admired(_eve,_david)/1 -> dζe\nS/_kicked(_eve,_david)/1 -> dθe")
grammar.chunk01()
print(grammar.to_string())

S/X(_eve,_david)/1 -> dFe	(X:1)
F/_admired -> ζ
F/_kicked -> θ




In [1054]:
grammar = Grammar()
grammar.from_string("S/_admired(_eve,_david)/1 -> dζe\nS/_admired(_eve,x)/1 -> Dζe\t(x:0)")
grammar.chunk02()
print(grammar.to_string())

S/_admired(_eve,x)/1 -> Dζe	(x:0)
D/_david -> d




In [1055]:
grammar = Grammar()
grammar.from_string("S/X(_eve,_david)/1 -> dAe\t(X:1)\nS/X(_eve,x)/1 -> DAe\t(x:0, X:1)")
grammar.chunk02()
print(grammar.to_string())

S/X(_eve,x)/1 -> DAe	(x:0, X:1)
D/_david -> d




In [1056]:
grammar = Grammar()
grammar.from_string("S/_admired(_eve,_david)/1 -> dζe\nE/_eve -> e")
grammar.replace()
print(grammar.to_string())

S/_admired(x,_david)/1 -> dζE	(x:2)
E/_eve -> e




In [1057]:
grammar = Grammar()
grammar.from_string("S/_admired(x,_david)/1 -> dζE\t(x:2)\nD/_david -> d\nE/_eve -> e")
grammar.replace()
print(grammar.to_string())

S/_admired(x,y)/1 -> DζE	(x:2, y:0)
D/_david -> d
E/_eve -> e




In [1058]:
grammar = Grammar()
grammar.from_string("S/_admired(x,y)/1 -> DζE\t(x:2, y:0)\nD/_david -> d\nE/_eve -> e\nA/_admired -> ζ")
grammar.replace()
print(grammar.to_string())

S/X(x,y)/1 -> DAE	(x:2, y:0, X:1)
A/_admired -> ζ
D/_david -> d
E/_eve -> e




In [1059]:
OPERATIONS = ["chunk01", "chunk02", "merge", "replace"]

In [1060]:
_5verbs_sem = ["_kick", "_know", "_meet", "_like", "_admire"]
_5verbs_pss_sem = ["_kicked", "_known", "_met", "_liked", "_admired"]
_5nouns_sem = ["_alice", "_bob", "_carol", "_david", "_eve"]
_5nouns = ["a", "b", "c", "d", "e"]
_5verbs = ["α", "β", "γ", "δ", "ε"]
_5verbs_pss = ["ζ", "η", "θ", "ι", "κ"]

In [1061]:
# generate fully compositional utterances
with open("semantic_space.txt", "w") as f:
    for con in CONCEPTS:
        if con == 0:
            for verb, verb_sem in zip(_5verbs, _5verbs_sem):
                for subj, subj_sem in zip(_5nouns, _5nouns_sem):
                    for obj, obj_sem in zip(_5nouns, _5nouns_sem):
                        if subj != obj:
                            f.write(f"S/{verb_sem}({subj_sem},{obj_sem})/{con} -> {subj}{verb}{obj}\n")
                    else:
                        continue
        if con == 1:
            for verb, verb_sem in zip(_5verbs_pss, _5verbs_pss_sem):
                for subj, subj_sem in zip(_5nouns, _5nouns_sem):
                    for obj, obj_sem in zip(_5nouns, _5nouns_sem):
                        if subj != obj:
                            f.write(f"S/{verb_sem}({subj_sem},{obj_sem})/{con} -> {obj}{verb}{subj}\n")
                    else:
                        continue

In [1062]:
# whole semantic space / fully compositional utterances
with open("semantic_space.txt", "r") as f:
    semantic_space = f.read()[:-1]

In [1063]:
grammar = Grammar()
grammar.from_string(semantic_space)

In [1064]:
# test chunk01 on whole semantic space
grammar.chunk01()

In [1065]:
print(grammar.to_string())

S/_admired(x,_david)/1 -> dκX	(x:2)
S/_admired(_david,x)/1 -> Jκd	(x:0)
S/_admired(x,_carol)/1 -> cκO	(x:2)
S/_admired(_alice,x)/1 -> Pκa	(x:0)
S/_admired(_carol,x)/1 -> Pκc	(x:0)
S/_admired(_alice,x)/1 -> Qκa	(x:0)
S/_admired(x,_alice)/1 -> aκG	(x:2)
S/_admired(_david,x)/1 -> Wκd	(x:0)
S/_admired(x,_david)/1 -> dκV	(x:2)
S/_admired(_alice,x)/1 -> Zκa	(x:0)
S/_admired(x,_alice)/1 -> aκJ	(x:2)
S/_admired(x,_david)/1 -> dκT	(x:2)
S/_admired(_david,x)/1 -> Xκd	(x:0)
S/_admired(_carol,x)/1 -> Yκc	(x:0)
S/_admired(x,_alice)/1 -> aκD	(x:2)
S/_admired(x,_carol)/1 -> cκC	(x:2)
S/_admired(_alice,x)/1 -> Tκa	(x:0)
S/_admired(x,_alice)/1 -> aκZ	(x:2)
S/_admired(x,_carol)/1 -> cκN	(x:2)
S/_admired(_carol,x)/1 -> Mκc	(x:0)
S/_admired(_alice,x)/1 -> Kκa	(x:0)
S/_admired(x,_alice)/1 -> aκT	(x:2)
S/_admired(_carol,x)/1 -> Vκc	(x:0)
S/_admired(x,_david)/1 -> dκM	(x:2)
S/_admired(x,_alice)/1 -> aκC	(x:2)
S/_admired(x,_carol)/1 -> cκB	(x:2)
S/_admired(x,_carol)/1 -> cκJ	(x:2)
S/_admired(_david,x)/1 -> Aκ

In [1066]:
# generate all possible queries
query_space = []

for con in CONCEPTS:
    if con == 0:
        for verb_sem in _5verbs_sem:
            for subj_sem in _5nouns_sem:
                for obj_sem in _5nouns_sem:
                    if subj_sem != obj_sem:
                        query_space.append((f"{verb_sem}({subj_sem},{obj_sem})", con))
                    else:
                        continue
    if con == 1:
        for verb_pss_sem in _5verbs_pss_sem:
            for subj_sem in _5nouns_sem:
                for obj_sem in _5nouns_sem:
                    if subj_sem != obj_sem:
                        query_space.append((f"{verb_pss_sem}({obj_sem},{subj_sem})", con))
                    else:
                        continue

In [1067]:
query_space

[('_kick(_alice,_bob)', 0),
 ('_kick(_alice,_carol)', 0),
 ('_kick(_alice,_david)', 0),
 ('_kick(_alice,_eve)', 0),
 ('_kick(_bob,_alice)', 0),
 ('_kick(_bob,_carol)', 0),
 ('_kick(_bob,_david)', 0),
 ('_kick(_bob,_eve)', 0),
 ('_kick(_carol,_alice)', 0),
 ('_kick(_carol,_bob)', 0),
 ('_kick(_carol,_david)', 0),
 ('_kick(_carol,_eve)', 0),
 ('_kick(_david,_alice)', 0),
 ('_kick(_david,_bob)', 0),
 ('_kick(_david,_carol)', 0),
 ('_kick(_david,_eve)', 0),
 ('_kick(_eve,_alice)', 0),
 ('_kick(_eve,_bob)', 0),
 ('_kick(_eve,_carol)', 0),
 ('_kick(_eve,_david)', 0),
 ('_know(_alice,_bob)', 0),
 ('_know(_alice,_carol)', 0),
 ('_know(_alice,_david)', 0),
 ('_know(_alice,_eve)', 0),
 ('_know(_bob,_alice)', 0),
 ('_know(_bob,_carol)', 0),
 ('_know(_bob,_david)', 0),
 ('_know(_bob,_eve)', 0),
 ('_know(_carol,_alice)', 0),
 ('_know(_carol,_bob)', 0),
 ('_know(_carol,_david)', 0),
 ('_know(_carol,_eve)', 0),
 ('_know(_david,_alice)', 0),
 ('_know(_david,_bob)', 0),
 ('_know(_david,_carol)', 0),
 (

In [1068]:
def infer_concept(gold, p_correct):
    if random.random() < p_correct:
        return gold
    else:
        return 1 - gold

In [1069]:
P = 0.8

In [1072]:
grammar = Grammar()
grammar.from_string(semantic_space)
utterances = grammar.to_string().split("\n") [:-2]

N_GENS = 10
NUM_SAMPLES = 100

use_rules_only_hist = []
grammars = []

# N_GENS世代継承
# TODO: 各世代のlog
# TODO: <done>全クエリ生成させて、</done>表現度を計算・ボトルネックをサンプリング
for i in tqdm(range(N_GENS)):
    last_generation = Grammar()
    for utterance in utterances:
        # print(utterance)
        utt_split = utterance.split(" ")
        lhs = utt_split[0].split("/")
        rhs = utt_split[2]
        lhs_cat = lhs[0]
        lhs_sem = lhs[1]
        lhs_con_gold = int(lhs[2])
        infered_con = infer_concept(lhs_con_gold, P)
        print(f"gold: {lhs_con_gold}", "infered: ", infered_con)
        infered_utt = f"S/{lhs_sem}/{infered_con} -> {rhs}"
        last_generation.add_rule(str(infered_utt))
        operation = random.sample(OPERATIONS, 1)[0]
        print(infered_utt)
        print(operation)
        # print("Before: ", last_generation.to_string())
        if len(last_generation.rules) > 1:
            if operation == "chunk01":
                last_generation.chunk01()
            if operation == "chunk02":
                last_generation.chunk02()
            if operation == "merge":
                last_generation.merge()
            if operation == "replace":
                last_generation.chunk01()
        # print("After: ", last_generation.to_string())
        grammars.append(last_generation.rules)

    if i != (N_GENS - 1):
        utterances = []
        use_rules_only = 0
        use_invention = 0
        for query, query_con in query_space:
            utterance, strategy = last_generation.generate(query, query_con) # generate all
            if (strategy == "by-composition") or (strategy == "by-holistic-rule"):
                use_rules_only += 1
            else:
                use_invention += 1
            print(strategy)
            utterances.append(utterance)
        use_rules_only_hist.append(use_rules_only)
        utterances = random.sample(utterances, NUM_SAMPLES) # sample 50 instances

print(last_generation.to_string())

  0%|          | 0/10 [00:00<?, ?it/s]

gold: 1 infered:  1
S/_admired(_alice,_carol)/1 -> cκa
replace
gold: 1 infered:  0
S/_admired(_alice,_david)/0 -> dκa
chunk02
gold: 1 infered:  1
S/_admired(_carol,_alice)/1 -> aκc
merge
gold: 1 infered:  1
S/_admired(_carol,_david)/1 -> dκc
merge
gold: 1 infered:  1
S/_admired(_david,_alice)/1 -> aκd
chunk02
gold: 1 infered:  0
S/_admired(_david,_carol)/0 -> cκd
chunk02
gold: 0 infered:  0
S/_admire(_alice,_carol)/0 -> aεc
merge
gold: 0 infered:  0
S/_admire(_alice,_david)/0 -> aεd
replace
gold: 0 infered:  1
S/_admire(_carol,_alice)/1 -> cεa
chunk01
gold: 0 infered:  0
S/_admire(_carol,_david)/0 -> cεd
chunk01
gold: 0 infered:  0
S/_admire(_david,_alice)/0 -> dεa
chunk02
gold: 0 infered:  0
S/_admire(_david,_carol)/0 -> dεc
chunk02
gold: 1 infered:  1
S/_kicked(_alice,_carol)/1 -> cζa
chunk01
gold: 1 infered:  1
S/_kicked(_alice,_david)/1 -> dζa
replace
gold: 1 infered:  1
S/_kicked(_carol,_alice)/1 -> aζc
merge
gold: 1 infered:  1
S/_kicked(_carol,_david)/1 -> dζc
merge
gold: 1 infe

 10%|█         | 1/10 [00:21<03:15, 21.69s/it]

gold: 0 infered:  0
S/_know(_eve,_bob)/0 -> eβb
merge
gold: 1 infered:  1
S/_admired(_carol,_eve)/1 -> eκc
merge
gold: 1 infered:  1
S/_known(_eve,_carol)/1 -> cηe
merge
gold: 1 infered:  0
S/_kicked(_alice,_david)/0 -> dζa
chunk01
gold: 1 infered:  1
S/_kicked(_david,_alice)/1 -> aζd
replace
gold: 0 infered:  0
S/_meet(_carol,_eve)/0 -> cγe
merge
gold: 1 infered:  1
S/_kicked(_bob,_david)/1 -> dζb
replace
gold: 0 infered:  0
S/_meet(_alice,_david)/0 -> aγd
chunk01
gold: 1 infered:  0
S/_liked(_bob,_alice)/0 -> aιb
merge
gold: 1 infered:  1
S/_admired(_david,_eve)/1 -> eκd
replace
gold: 0 infered:  0
S/_admire(_bob,_david)/0 -> bεd
replace
gold: 0 infered:  0
S/_like(_david,_alice)/0 -> dδa
replace
gold: 0 infered:  1
S/_know(_alice,_carol)/1 -> aβc
merge
gold: 1 infered:  1
S/_liked(_bob,_carol)/1 -> cιb
replace
gold: 0 infered:  0
S/_kick(_david,_bob)/0 -> dαb
chunk01
gold: 1 infered:  1
S/_known(_david,_carol)/1 -> cηd
merge
gold: 1 infered:  1
S/_liked(_bob,_eve)/1 -> bγe
merge
gol

 20%|██        | 2/10 [00:31<01:58, 14.85s/it]

gold: 0 infered:  0
S/_admire(_carol,_alice)/0 -> cεa
merge
gold: 1 infered:  1
S/_kicked(_david,_carol)/1 -> cζd
merge
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-compo

 30%|███       | 3/10 [00:41<01:27, 12.49s/it]

by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by

 40%|████      | 4/10 [00:51<01:08, 11.36s/it]

by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
gold: 0 infered:  1
S/_know(_alice,_david)/1 -> aβd
chunk02
gold: 0 infered:  0
S/_meet(_bob,_david)/0 -> bγd
chunk02
gold: 0 infered:  0
S/_admire(_carol,_david)/0 -> cεd
merge
gold: 1 infered:  1
S/_liked(_eve,_carol)/1 -> cιe
chunk01
gold: 1 infered:  1
S/_met(_carol,_eve)/1 -> eθc
chunk02
gold: 0 infered:  0
S/_know(_david,_carol)/0 -> dβc
replace
gold: 1 infered:  1
S/_met(_eve,_david)/1 -> dθe
merge
gold: 0 infered:  0
S/_kick(_carol,_alice)/0 -> cαa
replace
gold: 0 infered:  0
S/_like(_alice,_david)/0 -> aδd
chunk01
gold: 0 infered:  0
S/_kick(_david,_bob)/0 -> dαb
chunk02
gold: 0 infered:  0
S/_like(_bob,_carol)/0 -> bδc
merge
gold: 1 infered:  1
S/_met(_bob,_david)/1 -> dθb
chunk02
gold: 0 infer

 50%|█████     | 5/10 [01:01<00:55, 11.12s/it]

by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
_carol
by-word-invention
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
_eve
by-word-invention
_bob
by-word-invention
by-word-invention
by-word-invention
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-composition
by-holistic-rule
_bob
by-word-invention
by-word-invention
by-word-invention
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
gol

 60%|██████    | 6/10 [01:14<00:46, 11.60s/it]

by-composition
by-composition
_alice
by-word-invention
_bob
by-word-invention
by-composition
by-composition
by-composition
_bob
by-word-invention
_carol
by-word-invention
by-composition
_alice
by-word-invention
_bob
by-word-invention
_carol
by-word-invention
by-composition
by-composition
_carol
by-word-invention
by-composition
by-composition
_alice
by-word-invention
_carol
by-word-invention
by-composition
by-composition
_alice
by-word-invention
by-composition
by-composition
by-composition
by-composition
by-composition
_carol
by-word-invention
by-composition
_alice
by-word-invention
by-composition
_carol
by-word-invention
by-composition
by-composition
by-composition
by-composition
by-composition
_alice
by-word-invention
by-composition
by-composition
by-holistic-rule
_alice
by-word-invention
by-composition
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-holistic-rule
by-composition
by-composition
by-composition
by-compositio

 70%|███████   | 7/10 [01:30<00:38, 12.95s/it]

_liked
by-word-invention
by-holistic-invention
by-composition
by-composition
by-composition
by-holistic-rule
by-holistic-rule
by-holistic-invention
by-composition
by-holistic-invention
by-holistic-invention
by-holistic-invention
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-invention
by-holistic-invention
by-holistic-invention
by-composition
by-composition
by-composition
by-composition
_admired
by-word-invention
by-holistic-invention
by-holistic-rule
by-holistic-rule
by-holistic-rule
by-holistic-invention
by-composition
gold: 1 infered:  1
S/_met(_bob,_david)/1 -> bβd
chunk01
gold: 0 infered:  0
S/_know(_david,_eve)/0 -> dβe
replace
gold: 1 infered:  1
S/_liked(_alice,_david)/1 -> aιd
replace
gold: 1 infered:  1
S/_liked(_bob,_alice)/1 -> cαe
chunk01
gold: 0 infered:  0
S/_kick(_david,_eve)/0 -> aα

 80%|████████  | 8/10 [01:46<00:28, 14.23s/it]

by-holistic-invention
by-holistic-rule
by-holistic-invention
by-composition
by-holistic-invention
by-holistic-rule
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-holistic-invention
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-holistic-rule
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-invention
by-holistic-invention
by-holistic-invention
by-composition
gold: 1 infered:  1
S/_kicked(_david,_bob)/1 -> bβa
replace
gold: 0 infered:  0
S/_know(_carol,_eve)/0 -> aβe
chunk01
gold: 1 infered:  1
S/_admired(_bob,_carol)/1 -> bεd
merge
gold: 1 infered:  1
S/_liked(_david,_eve)/1 -> eιd
merge
gold: 0 infered:  1
S/_meet(_bob,_eve)/1 -> dγe
chunk02
gold: 0 infered:  0
S/_admire(_alice,_eve)/0 -> aδe
chunk01
gold: 0 infered:  0

 90%|█████████ | 9/10 [02:07<00:16, 16.08s/it]

by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-composition
by-holistic-rule
by-holistic-rule
by-holistic-invention
by-holistic-rule
gold: 0 infered:  0
S/_know(_carol,_alice)/0 -> aβd
merge
gold: 0 infered:  0
S/_meet(_eve,_alice)/0 -> eγd
replace
gold: 0 infered:  0
S/_kick(_eve,_alice)/0 -> eβa
merge
gold: 1 infered:  0
S/_admired(_david,_alice)/0 -> cδe
chunk02
gold: 0 infered:  0
S/_know(_david,_bob)/0 -> cαa
chunk01
gold: 0 infered:  0
S/_meet(_carol,_alice)/0 -> aγd
chunk02
gold: 1 infered:  1
S/_admired(_david,_eve)/1 -> eβd
chunk01
gold: 0 infered:  0
S/_meet(_alice,_carol)/0 -> aγa
merge
gold: 1 infered:  1
S/_met(_bob,_alice)/1 -> bδe
merge
gold: 0 infered:  0
S/_know(_alice,_eve)/0 -> aαe
chunk01
gold: 0 infered:  0
S/_kick(_david,_bob)/0 -> aαe
merge
gold: 1 infered:  1
S/_met(_eve,_david)/1 -> aβe
merge
gold: 1 infered:  1
S/_kicked(_eve,_carol)/1 -> aζe
chunk01
gold: 1 infered:  1
S

100%|██████████| 10/10 [02:24<00:00, 14.42s/it]

S/_admire(x,_carol)/0 -> Rδa	(x:0)
S/X(x,_david)/1 -> bQR	(x:2, X:1)
S/_kicked(_carol,_david)/0 -> aγe
S/X(_alice,_carol)/1 -> bQa	(X:1)
S/_liked(x,_alice)/1 -> cαR	(x:2)
S/X(x,_alice)/0 -> RQd	(X:1, x:0)
S/X(_alice,x)/0 -> aQR	(X:1, x:2)
S/X(_david,_alice)/0 -> cRe	(X:1)
S/_kicked(x,_eve)/1 -> eζR	(x:2)
S/_admired(_david,_eve)/1 -> eβd
S/_admired(_bob,_david)/1 -> bκa
S/_liked(_david,_alice)/0 -> aδc
S/_admired(_david,_bob)/0 -> eδb
S/_admired(_eve,_carol)/1 -> cκe
S/_known(_carol,_alice)/1 -> cγb
S/_liked(_carol,_david)/1 -> bιa
S/_admired(_carol,_eve)/1 -> dεb
S/X(_alice,_bob)/1 -> bDe	(X:1)
S/_admire(_bob,_alice)/0 -> bδd
S/_admire(_carol,_eve)/0 -> bδd
S/_kicked(_bob,_david)/0 -> bζa
S/_admire(_carol,_bob)/0 -> cεb
S/X(_bob,_carol)/0 -> cQa	(X:1)
S/_admire(_alice,_bob)/0 -> dβb
S/X(_eve,_david)/1 -> aQe	(X:1)
S/X(_bob,_alice)/1 -> dQa	(X:1)
S/_kick(_carol,_alice)/0 -> aαa
S/_know(_david,_alice)/1 -> bεd
S/_kicked(_bob,_alice)/1 -> cβe
S/_meet(_carol,_david)/0 -> dδa
S/_kick(_alice




In [1073]:
use_rules_only_hist

[189, 200, 200, 190, 188, 137, 171, 165, 107]

In [1074]:
grammars

[[{'lhs': {'cat': 'S', 'sem': '_admired(_alice,_carol)', 'con': 1},
   'rhs': 'cκa'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_alice,_david)', 'con': 0},
   'rhs': 'dκa'}],
 [{'lhs': {'cat': 'S', 'sem': '_admired(_alice,_david)', 'con': 0},
   'rhs': 'dκa'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_alice,_carol)', 'con': 1},
   'rhs': 'cκa'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_carol,_alice)', 'con': 1},
   'rhs': 'aκc'}],
 [{'lhs': {'cat': 'S', 'sem': '_admired(_carol,_alice)', 'con': 1},
   'rhs': 'aκc'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_alice,_david)', 'con': 0},
   'rhs': 'dκa'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_alice,_carol)', 'con': 1},
   'rhs': 'cκa'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_carol,_david)', 'con': 1},
   'rhs': 'dκc'}],
 [{'lhs': {'cat': 'S', 'sem': '_admired(_carol,_david)', 'con': 1},
   'rhs': 'dκc'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_carol,_alice)', 'con': 1},
   'rhs': 'aκc'},
  {'lhs': {'cat': 'S', 'sem': '_admired(_alice,_dav