# Tableau Decompose

In [None]:
import pandas as pd
import os
from tqdm import tqdm
from utils import *

In [None]:
with open("./config.json", "r") as fp:
    config = json.load(fp)

In [None]:
DATA_DIR_PATH = config["data_dir_path"]

In [None]:
sentence = "Alice didn't run, Bob got an email and Smith replied it."
tree = sentence2tree(sentence)
print(ET.tostring(tree, encoding="utf-8", pretty_print=True).decode())


In [None]:
import hashlib

class TabEntry:
    def __init__(self, sign, sentence=None, tree=None, origin=None, exist_eq_entries=False):
        # constractor receives sign and sentence and converts them to XML
        self.sign = sign
        if tree is not None:
            self.tree = tree
        elif sentence is not None:
            self.tree = sentence2tree(sentence) if sentence is not None else None
        else:
            raise ValueError("sentence or tree must be exist.")
        self.origin = origin # origin indicates what entry is original of this entry (P, H, PH)
        self.exist_eq_entries = exist_eq_entries # exist_eq_entries indicates whether the equal entry exists or not in the tableau
            
    def sentence(self, token="word", sep=None):
        return " ".join(tree2tokenlist(self.tree, token))
    
    def __repr__(self):
        return "[{}: {}]{}{}".format("T" if self.sign == True else "F",
                                     self.sentence(),
                                     self.origin if self.origin is not None else "",
                                     "*" if self.exist_eq_entries else "")
    
    def __eq__(self, other):
        return type(other) == TabEntry and self.sentence() == other.sentence() and self.sign == other.sign
    
    def __hash__(self):
        return int(hashlib.md5(self.sentence().encode()).hexdigest(), 16) * 2 + int(self.sign)
    
    def pretty_print(self):
        return ET.tostring(self.tree, encoding="utf-8", pretty_print=True).decode()
    
    def dump(self):
        return {
            "sign": self.sign,
            "tree": ET.tostring(self.tree, encoding="utf-8").decode(),
            "sentence": self.sentence(),
            "origin": self.origin,
            "exist_eq_entries": self.exist_eq_entries
        }
    
    @classmethod
    def load(cls, data):
        return TabEntry(data["sign"], tree=ET.fromstring(data["tree"]), origin=data["origin"], exist_eq_entries=data["exist_eq_entries"])

tab_entry = TabEntry(True, "She isn’t beautiful.")
print(tab_entry)
tab_entry_clone = TabEntry.load(tab_entry.dump())
print(tab_entry_clone)
print(tab_entry == tab_entry_clone)
entry_set = set()
entry_set.add(tab_entry)
entry_set.add(tab_entry_clone)
print(entry_set)

tab_entry_clone.sign = False
print(tab_entry == tab_entry_clone)
entry_set = set()
entry_set.add(tab_entry)
entry_set.add(tab_entry_clone)
print(entry_set)

In [None]:
class TabNode:
    def __init__(self, entries=None, parent_node=None):
        self.parent_node = parent_node
        self.child_nodes = []
        self.entries = []
        if entries is not None:
            self.append_entries(entries)

    def append_entry(self, entry):
        if type(entry) != TabEntry:
            raise TypeError("expected TabEntry, but got "+str(type(entry)))
        self.entries.append(entry)

    def append_entries(self, entries):
        if type(entries) != list:
            raise TypeError("expected list, but got "+str(type(entries)))
        for entry in entries:
            self.append_entry(entry)

    def append_branches(self, branches, origin=None):
        if type(branches) != list:
            raise TypeError("expected list, but got "+str(type(branches)))
        
        exist_entries = self.ancestor_entries(include_self=True)
        entry_count = 0
        tmp_branches = []
        for branch in branches:
            tmp_branch = []
            for entry in branch:
                if entry not in exist_entries:
                    entry.origin = origin
                    entry.exist_eq_entries = False
                    tmp_branch.append(entry)
            if len(tmp_branch) > 0:
                tmp_branches.append(tmp_branch)
                entry_count += len(tmp_branch)
        branches = tmp_branches
        
        if len(branches) == 1:
            entries = branches[0]
            self.append_entries(entries)
        else:
            if len(self.child_nodes) == 0:
                for entries in branches:
                    self.child_nodes.append(TabNode(entries=entries, parent_node=self))
            else:
                for child_node in self.child_nodes:
                    child_node.append_branches(branches, origin)
        return entry_count
    
    def ancestor_entries(self, include_self=False):
        retval = self.parent_node.ancestor_entries(include_self=True) if self.parent_node is not None else []
        if include_self == True:
            retval.extend(self.entries)
        return retval
    
    def __repr__(self):
        retval = ""
        retval += " ".join([str(entry) for entry in self.entries]) + "\n"
        for i, child_node in enumerate(self.child_nodes):
            lines = str(child_node).split("\n")
            for j, line in enumerate(lines):
                if i == len(self.child_nodes) - 1:
                    retval += ("└" if j == 0 else "  ") + line + "\n"
                else:
                    retval += ("├" if j == 0 else "│") + line + "\n"
        return retval.rstrip()
    
    def dump(self):
        return {
            "entries": [entry.dump() for entry in self.entries],
            "child_nodes": [child_node.dump() for child_node in self.child_nodes]
        }

    @classmethod
    def load(cls, data, parent_node=None):
        tab_node = TabNode(entries=[TabEntry.load(entry) for entry in data["entries"]], parent_node=parent_node)
        tab_node.child_nodes = [TabNode.load(child_node, parent_node=tab_node) for child_node in data["child_nodes"]]
        return tab_node

tab_node = TabNode([TabEntry(True, "Alice or Bob got an email.", origin="P"), TabEntry(True, "Alice or Bob got a text.", origin="H")])
tab_node.append_branches([
    [TabEntry(True, "Alice got an email.")],
    [TabEntry(True, "Bob got an email.")]
], origin="P")
tab_node.append_branches([
    [TabEntry(True, "Alice got a text.")],
    [TabEntry(True, "Bob got a text.")]
], origin="H")
print(tab_node)
print(TabNode.load(tab_node.dump()))

In [None]:
import copy

class Tableau:
    def __init__(self, initial_entries):
        self.root = TabNode(entries=initial_entries)
        self.rule_table = []
        self.eq_rules = set()
        self.initial_size = len(initial_entries)

    def append_rule(self, rule):
        if type(rule) is not tuple:
            raise TypeError("rule expected tupple, but got "+str(type(rule)))
        if type(rule[0]) is not int:
            raise TypeError("rule[0] expected int, but got "+str(type(rule[0])))
        if rule[0] < 1:
            raise ValueError("rule[0] must be larger than 1, but got "+str(rule[0]))
        if type(rule[1]) is not str:
            raise TypeError("rule[1] expected str, but got "+str(type(rule[1])))
        if rule[2] not in ["EQ", "NEQ"]:
            raise TypeError("rule[2] expected EQ or NEQ, but got "+str(rule[2]))

        if len(self.rule_table) < rule[0]:
            self.rule_table.extend([[] for i in range(len(self.rule_table), rule[0])])
        self.rule_table[rule[0] - 1].append(rule[1])
        
        if rule[2] == "EQ":
            self.eq_rules.add(rule[1])

    def append_rules(self, rules):
        for rule in rules:
            self.append_rule(rule)

    def decompose(self):

        target_nodes = [self.root]
        while len(target_nodes) > 0:
            target_node = target_nodes.pop(0)
            # print("CurrentNode:", target_node)
            ancestor_entries = target_node.ancestor_entries()
            
            i = 1
            while i <= len(target_node.entries):
                for n, rules in enumerate(self.rule_table):
                    sequences = self.seq_with_rep(n+1, ancestor_entries + target_node.entries[:i-1], target_node.entries[i-1])
                    for rule_name in rules:
                        rule_func = eval(rule_name)
                        for sequence in sequences:
                            try:
                                result = rule_func([copy.deepcopy(entry) for entry in sequence])
                                if result is not None:
                                    if rule_name in self.eq_rules:
                                        for entry in sequence:
                                            entry.exist_eq_entries = True
                                    target_node.append_branches(result, origin=self.get_origin_from_sequence(sequence))
                            except:
                                pass
                        
                i += 1
    
    def __repr__(self):
        return str(self.root)
    
    def size(self):
        entry_set = set()
        
        target_nodes = [self.root]
        
        while len(target_nodes) > 0:
            target_node = target_nodes.pop(0)
            entry_set = entry_set | set(target_node.entries)
            target_nodes.extend(target_node.child_nodes)
        return len(entry_set)
    
    def dump(self):
        return {
            "root": self.root.dump(),
            "size": self.size(),
            "initial_size": self.initial_size
        }
    
    @classmethod
    def seq_with_rep(cls, n, entries, include_entry=None, including=False):
        if n == 1:
            if include_entry is not None and including == False:
                return [[include_entry]]
            else:
                return [[entry] for entry in entries + [include_entry]]
        retval = []

        for entry in entries:
            for sequence in cls.seq_with_rep(n-1, entries, include_entry, including):
                retval.append([entry] + sequence)
        
        if include_entry is not None:
            for sequence in cls.seq_with_rep(n-1, entries, include_entry, True):
                retval.append([include_entry] + sequence)
        return retval
    
    @classmethod
    def get_origin_from_sequence(cls, sequence):
        exist_P = False
        exist_H = False
        for entry in sequence:
            if entry.origin == "PH":
                return "PH"
            elif entry.origin == "P":
                exist_P = True
            elif entry.origin == "H":
                exist_H = True
        if exist_P == True and exist_H == True:
            return "PH"
        elif exist_P == True:
            return "P"
        elif exist_H == True:
            return "H"
        else:
            return None

tableau = Tableau([TabEntry(True, "Alice or Bob got an email.", origin="P"), TabEntry(False, "Alice or Bob got a text.", origin="H")])
tableau.decompose()
tableau

# Define Utility Functions for Rule Definition

In [None]:
def test_rule(rule, antecedents):
    tableau = Tableau(antecedents)
    tableau.append_rule(rule)
    tableau.decompose()
    print(tableau)

In [None]:
def search_in_bros(callback, dt):
    if len(dt) == 0:
        try:
            return 0 if callback(dt) == True else -1
        except:
            return 0
    else:
        for i, child in enumerate(dt):
            try:
                if child.tag == "dt" and callback(child) == True or child.tag == "pw" and callback(dt) == True:
                    return i
            except:
                continue
        return -1

def pw_index(dt):
    if len(dt) == 0:
        return -1
    else:
        for i, child in enumerate(dt):
            if child.tag == "pw":
                return i

def include_negation(dt):
    # not（n't）,never,no,neitherを取得する
    negative_lemma = ["not", "never", "no", "neither"]
    return search_in_bros(lambda dt: dt.attrib["lemma"] in negative_lemma, dt)

def include_sentence(dt):
    return search_in_bros(lambda dt:\
                          (dt.attrib["dependency"] == "conj" and\
                          # その部分木AのUPOSはVERBである
                          dt.attrib["upos"] == "VERB" and\
                          # その部分木Aは次の部分木Bを持つ
                          search_in_bros(lambda dt:\
                                         # その部分木Bは主語を持つ
                                         dt.attrib["dependency"] == "nsubj",
                          dt) != -1), dt)

def remove_all_children(dt):
    for sub_dt in dt:
        dt.remove(sub_dt)
    return dt

# List of Rules

In [None]:
rule_set = set()

## S1 and S2

In [None]:
rule = (1, "TRUE_S_AND_S", "EQ")
def TRUE_S_AND_S(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    
    if entry.tree.attrib["upos"] == "VERB":
        s = entry.tree
        # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
        s_list = [] # 分割したsのリスト
        step = 0
        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt":
                step += 1
            else:
                break
        # PW
        if s[step].tag == "pw":
            step += 1
        else:
            return

        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] != "conj":
                step += 1
            else:
                break

        main_s = s[0:step] # 最初の主語の範囲は[0, step]

        and_flag = False
        # (DT{and ,})*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] == "conj" and search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj", s[step]) != -1:
                while s[step][0].tag == "dt" and s[step][0].attrib["lemma"] in [",", "and"]:
                    if s[step][0].attrib["lemma"] == "and":
                        and_flag = True
                    s[step].remove(s[step][0])
                while s[step][-1].tag == "dt" and s[step][-1].attrib["lemma"] == ",":
                    s[step].remove(s[step][-1])
                s[step].attrib["dependency"] = "root"
                s_list.append(s[step])
                step += 1
            else:
                break
        if and_flag is False:
            return

        modification = s[step:]
        main_s.extend(modification)
        for _s in s_list:
            _s.extend(copy.deepcopy(modification))

        # print(entry.pretty_print())

        # メインの主語を持つ木を生成
        remove_all_children(s)
        s.extend(main_s)
        while s[-1].tag == "dt" and s[-1].attrib["lemma"] == ",":
            s.remove(s[-1])

        new_entries = [entry]

        for _s in s_list:
            new_entry = copy.deepcopy(entry)
            new_entry.tree = _s
            new_entries.append(new_entry)

        return [new_entries]
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice didn't run, Bob got an email and Smith replied it.")])
test_rule(rule, [TabEntry(True, "Alice didn't love him and he love her.")])

In [None]:
rule = (1, "FALSE_S_AND_S", "EQ")
def FALSE_S_AND_S(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    
    if entry.tree.attrib["upos"] == "VERB":
        s = entry.tree
        # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
        s_list = [] # 分割したsのリスト
        step = 0
        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt":
                step += 1
            else:
                break
        # PW
        if s[step].tag == "pw":
            step += 1
        else:
            return

        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] != "conj":
                step += 1
            else:
                break

        main_s = s[0:step] # 最初の主語の範囲は[0, step]

        and_flag = False
        # (DT{and ,})*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] == "conj" and search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj", s[step]) != -1:
                while s[step][0].tag == "dt" and s[step][0].attrib["lemma"] in [",", "and"]:
                    if s[step][0].attrib["lemma"] == "and":
                        and_flag = True
                    s[step].remove(s[step][0])
                while s[step][-1].tag == "dt" and s[step][-1].attrib["lemma"] == ",":
                    s[step].remove(s[step][-1])
                s[step].attrib["dependency"] = "root"
                s_list.append(s[step])
                step += 1
            else:
                break
        if and_flag is False:
            return

        modification = s[step:]
        main_s.extend(modification)
        for _s in s_list:
            _s.extend(copy.deepcopy(modification))

        # print(entry.pretty_print())

        # メインの主語を持つ木を生成
        remove_all_children(s)
        s.extend(main_s)
        while s[-1].tag == "dt" and s[-1].attrib["lemma"] == ",":
            s.remove(s[-1])

        new_entries = [[entry]]

        for _s in s_list:
            new_entry = copy.deepcopy(entry)
            new_entry.tree = _s
            new_entries.append([new_entry])

        return new_entries
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice didn't run, Bob got an email and Smith replied it.")])
test_rule(rule, [TabEntry(False, "Alice didn't love him and he love her.")])

## NSUBJ1 and NSUBJ2

In [None]:
rule = (1, "TRUE_NSUBJ_AND_NSUBJ", "EQ")
def TRUE_NSUBJ_AND_NSUBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # 「conjで結ばれており，andを先頭に持ち，主語を持つ動詞をメインとした部分木である」という条件を探す
    nsubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj",
                           entry.tree)
    # nsubjが存在し，文を内部に持たず,否定語を持たない
    if nsubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            nsubj = entry.tree[nsubj_i] # nsubjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            nsubj_list = [] # 分割したnsubjのリスト
            step = 0
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if nsubj[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            
            main_nsubj = nsubj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] == "conj":
                    while nsubj[step][0].tag == "dt" and nsubj[step][0].attrib["lemma"] in [",", "and"]:
                        if nsubj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        nsubj[step].remove(nsubj[step][0])
                    while nsubj[step][-1].tag == "dt" and nsubj[step][-1].attrib["lemma"] == ",":
                        nsubj[step].remove(nsubj[step][-1])
                    nsubj[step].attrib["dependency"] = "nsubj" # 依存関係をnsubjにする
                    nsubj_list.append(nsubj[step]) # 主語を追加
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = nsubj[step:]
            main_nsubj.extend(modification)
            for _nsubj in nsubj_list:
                _nsubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(nsubj)
            nsubj.extend(main_nsubj)
            while nsubj[-1].tag == "dt" and nsubj[-1].attrib["lemma"] == ",":
                nsubj.remove(nsubj[-1])

            new_entries = [entry]

            # 主語リスト
            for _nsubj in nsubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[nsubj_i], _nsubj)
                new_entries.append(new_entry)


            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice, Bob and Smith and Chris work.")])
test_rule(rule, [TabEntry(True, "The apple and the orange is delicious")])
test_rule(rule, [TabEntry(False, "The apple and the orange is delicious")])
test_rule(rule, [TabEntry(True, "Men and a women in suimsuits hangout on rocks above water.")])

In [None]:
rule = (1, "FALSE_NSUBJ_AND_NSUBJ", "EQ")
def FALSE_NSUBJ_AND_NSUBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # 「conjで結ばれており，andを先頭に持ち，主語を持つ動詞をメインとした部分木である」という条件を探す
    nsubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj",
                           entry.tree)
    # nsubjが存在し，文を内部に持たず,否定語を持たない
    if nsubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            nsubj = entry.tree[nsubj_i] # nsubjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            nsubj_list = [] # 分割したnsubjのリスト
            step = 0

            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if nsubj[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            
            main_nsubj = nsubj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] == "conj":
                    while nsubj[step][0].tag == "dt" and nsubj[step][0].attrib["lemma"] in [",", "and"]:
                        if nsubj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        nsubj[step].remove(nsubj[step][0])
                    while nsubj[step][-1].tag == "dt" and nsubj[step][-1].attrib["lemma"] == ",":
                        nsubj[step].remove(nsubj[step][-1])
                    nsubj[step].attrib["dependency"] = "nsubj" # 依存関係をnsubjにする
                    nsubj_list.append(nsubj[step]) # 主語を追加
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = nsubj[step:]
            main_nsubj.extend(modification)
            for _nsubj in nsubj_list:
                _nsubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(nsubj)
            nsubj.extend(main_nsubj)
            while nsubj[-1].tag == "dt" and nsubj[-1].attrib["lemma"] == ",":
                nsubj.remove(nsubj[-1])

            new_branches = [[entry]]

            # 主語リスト
            for _nsubj in nsubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[nsubj_i], _nsubj)
                new_branches.append([new_entry])


            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice, Bob, Smith and Chris work.")])
test_rule(rule, [TabEntry(False, "The apple and the orange is delicious")])
test_rule(rule, [TabEntry(False, "The apple and the orange is delicious")])

## CSUBJ1 and CSUBJ2

In [None]:
rule = (1, "TRUE_CSUBJ_AND_CSUBJ", "EQ")
def TRUE_CSUBJ_AND_CSUBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    
    csubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "csubj",
                           entry.tree)
    # csubjが存在し，文を内部に持たず,否定語を持たない
    if csubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            csubj = entry.tree[csubj_i] # csubjオブジェクト
            # DT* PW DT* (DT{conj})* DT("and"が[0]要素)というパターンである
            csubj_list = [] # 分割したcsubjのリスト
            step = 0
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if csubj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            # PW
            main_csubj = csubj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] == "conj":
                    while csubj[step][0].tag == "dt" and csubj[step][0].attrib["lemma"] in [",", "and"]:
                        if csubj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        csubj[step].remove(csubj[step][0])
                    while csubj[step][-1].tag == "dt" and csubj[step][-1].attrib["lemma"] == ",":
                        csubj[step].remove(csubj[step][-1])
                    csubj[step].attrib["dependency"] = "csubj" # 依存関係をcsubjにする
                    csubj_list.append(csubj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = csubj[step:]
            main_csubj.extend(modification)
            for _csubj in csubj_list:
                _csubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(csubj)
            csubj.extend(main_csubj)
            while csubj[-1].tag == "dt" and csubj[-1].attrib["lemma"] == ",":
                csubj.remove(csubj[-1])

            new_entries = [entry]

            # 主語リスト
            for _csubj in csubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[csubj_i], _csubj)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return

rule_set.add(rule)

test_rule(rule, [TabEntry(True, "To create works and to like it is what I want")])
test_rule(rule, [TabEntry(True, "Creating works and liking it is what I want")])

In [None]:
rule = (1, "FALSE_CSUBJ_AND_CSUBJ", "EQ")
def FALSE_CSUBJ_AND_CSUBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    #print(entry.pretty_print())
    
    csubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "csubj",
                           entry.tree)
    # csubjが存在し，文を内部に持たず,否定語を持たない
    if csubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            csubj = entry.tree[csubj_i] # csubjオブジェクト
            # DT* PW DT* (DT{conj})* DT("and"が[0]要素)というパターンである
            csubj_list = [] # 分割したcsubjのリスト
            step = 0
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if csubj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            # PW
            main_csubj = csubj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] == "conj":
                    while csubj[step][0].tag == "dt" and csubj[step][0].attrib["lemma"] in [",", "and"]:
                        if csubj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        csubj[step].remove(csubj[step][0])
                    while csubj[step][-1].tag == "dt" and csubj[step][-1].attrib["lemma"] == ",":
                        csubj[step].remove(csubj[step][-1])
                    csubj[step].attrib["dependency"] = "csubj" # 依存関係をcsubjにする
                    csubj_list.append(csubj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = csubj[step:]
            main_csubj.extend(modification)
            for _csubj in csubj_list:
                _csubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(csubj)
            csubj.extend(main_csubj)
            while csubj[-1].tag == "dt" and csubj[-1].attrib["lemma"] == ",":
                csubj.remove(csubj[-1])

            new_branches = [[entry]]

            # 主語リスト
            for _csubj in csubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[csubj_i], _csubj)
                new_branches.append([new_entry])

            return new_branches
        except:
            return

rule_set.add(rule)

test_rule(rule, [TabEntry(False, "To create works and to like it is what I want")])
test_rule(rule, [TabEntry(False, "Creating works and liking it is what I want")])

## OBJ1 and OBJ2

In [None]:
rule = (1, "TRUE_OBJ_AND_OBJ","EQ")
def TRUE_OBJ_AND_OBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    obj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "obj",
                           entry.tree)
    # objが存在し，文を内部に持たず,否定語を持たない
    if obj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            obj = entry.tree[obj_i] # objオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            obj_list = [] # 分割したobjのリスト
            step = 0

            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if obj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_obj = obj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] == "conj":
                    while obj[step][0].tag == "dt" and obj[step][0].attrib["lemma"] in [",", "and"]:
                        if obj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        obj[step].remove(obj[step][0])
                    while obj[step][-1].tag == "dt" and obj[step][-1].attrib["lemma"] == ",":
                        obj[step].remove(obj[step][-1])
                    obj[step].attrib["dependency"] = "obj" # 依存関係をobjにする
                    obj_list.append(obj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = obj[step:]
            main_obj.extend(modification)
            for _obj in obj_list:
                _obj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(obj)
            obj.extend(main_obj)
            while obj[-1].tag == "dt" and obj[-1].attrib["lemma"] == ",":
                obj.remove(obj[-1])

            new_entries = [entry]

            # 目的語リスト
            for _obj in obj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[obj_i], _obj)
                new_entries.append(new_entry)


            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice likes apple and orange.")])
test_rule(rule, [TabEntry(True, "He ate beaf and chicken.")])
test_rule(rule, [TabEntry(False, "He ate beaf and chicken.")])

In [None]:
rule = (1, "FALSE_OBJ_AND_OBJ", "EQ")
def FALSE_OBJ_AND_OBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    obj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "obj",
                           entry.tree)
    # objが存在し，文を内部に持たず,否定語を持たない
    if obj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            obj = entry.tree[obj_i] # objオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            obj_list = [] # 分割したobjのリスト
            step = 0

            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if obj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_obj = obj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] == "conj":
                    while obj[step][0].tag == "dt" and obj[step][0].attrib["lemma"] in [",", "and"]:
                        if obj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        obj[step].remove(obj[step][0])
                    while obj[step][-1].tag == "dt" and obj[step][-1].attrib["lemma"] == ",":
                        obj[step].remove(obj[step][-1])
                    obj[step].attrib["dependency"] = "obj" # 依存関係をobjにする
                    obj_list.append(obj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = obj[step:]
            main_obj.extend(modification)
            for _obj in obj_list:
                _obj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(obj)
            obj.extend(main_obj)
            while obj[-1].tag == "dt" and obj[-1].attrib["lemma"] == ",":
                obj.remove(obj[-1])

            new_branches = [[entry]]

            # 目的語リスト
            for _obj in obj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[obj_i], _obj)
                new_branches.append([new_entry])

            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice likes apple and orange.")])
test_rule(rule, [TabEntry(False, "He ate beaf and chicken.")])
test_rule(rule, [TabEntry(True, "He ate beaf and chicken.")])

## IOBJ1 and IOBJ2

In [None]:
rule = (1, "TRUE_IOBJ_AND_IOBJ","EQ")
def TRUE_IOBJ_AND_IOBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    iobj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "iobj",
                           entry.tree)
    # iobjが存在し，文を内部に持たず,否定語を持たない
    if iobj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            iobj = entry.tree[iobj_i] # iobjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            iobj_list = [] # 分割したiobjのリスト
            step = 0

            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if iobj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_iobj = iobj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] == "conj":
                    while iobj[step][0].tag == "dt" and iobj[step][0].attrib["lemma"] in [",", "and"]:
                        if iobj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        iobj[step].remove(iobj[step][0])
                    while iobj[step][-1].tag == "dt" and iobj[step][-1].attrib["lemma"] == ",":
                        iobj[step].remove(iobj[step][-1])
                    iobj[step].attrib["dependency"] = "iobj" # 依存関係をiobjにする
                    iobj_list.append(iobj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = iobj[step:]
            main_iobj.extend(modification)
            for _iobj in iobj_list:
                _iobj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(iobj)
            iobj.extend(main_iobj)
            while iobj[-1].tag == "dt" and iobj[-1].attrib["lemma"] == ",":
                iobj.remove(iobj[-1])

            new_entries = [entry]

            # 目的語リスト
            for _iobj in iobj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[iobj_i], _iobj)
                new_entries.append(new_entry)


            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He gave her and her sister a book.")])

In [None]:
rule = (1, "FALSE_IOBJ_AND_IOBJ","EQ")
def FALSE_IOBJ_AND_IOBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    iobj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "iobj",
                           entry.tree)
    # iobjが存在し，文を内部に持たず,否定語を持たない
    if iobj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            iobj = entry.tree[iobj_i] # iobjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            iobj_list = [] # 分割したiobjのリスト
            step = 0

            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if iobj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_iobj = iobj[0:step] # 最初の主語の範囲は[0, step]

            and_flag = False
            # (DT{and ,})*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] == "conj":
                    while iobj[step][0].tag == "dt" and iobj[step][0].attrib["lemma"] in [",", "and"]:
                        if iobj[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        iobj[step].remove(iobj[step][0])
                    while iobj[step][-1].tag == "dt" and iobj[step][-1].attrib["lemma"] == ",":
                        iobj[step].remove(iobj[step][-1])
                    iobj[step].attrib["dependency"] = "iobj" # 依存関係をiobjにする
                    iobj_list.append(iobj[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = iobj[step:]
            main_iobj.extend(modification)
            for _iobj in iobj_list:
                _iobj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(iobj)
            iobj.extend(main_iobj)
            while iobj[-1].tag == "dt" and iobj[-1].attrib["lemma"] == ",":
                iobj.remove(iobj[-1])

            new_branches = [[entry]]

            # 目的語リスト
            for _iobj in iobj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[iobj_i], _iobj)
                new_branches.append([new_entry])


            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He gave her and her sister a book.")])

## XCOMP1 and XCOMP2

In [None]:
rule = (1, "TRUE_XCOMP_AND_XCOMP", "EQ")
def TRUE_XCOMP_AND_XCOMP(entries):
    entry = entries[0]
    
    if entry.sign != True:
        return
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    xcomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "xcomp",
                           entry.tree)
    # xcompが存在し，文を内部に持たず,否定語を持たない
    if xcomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            xcomp = entry.tree[xcomp_i] # xcompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            xcomp_list = [] # 分割したxcompのリスト
            step = 0

            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt":
                    step += 1
                else:
                    break

            # PW
            if xcomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_xcomp = xcomp[0:step] # 最初の主語の範囲は[0, step])

            and_flag = False
            # (DT{and ,})*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] == "conj":
                    while xcomp[step][0].tag == "dt" and xcomp[step][0].attrib["lemma"] in [",", "and"]:
                        if xcomp[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        xcomp[step].remove(xcomp[step][0])
                    while xcomp[step][-1].tag == "dt" and xcomp[step][-1].attrib["lemma"] == ",":
                        xcomp[step].remove(xcomp[step][-1])
                    xcomp[step].attrib["dependency"] = "xcomp" # 依存関係をxcompにする
                    xcomp_list.append(xcomp[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = xcomp[step:]
            main_xcomp.extend(modification)
            for _xcomp in xcomp_list:
                _xcomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(xcomp)
            xcomp.extend(main_xcomp)
            while xcomp[-1].tag == "dt" and xcomp[-1].attrib["lemma"] == ",":
                xcomp.remove(xcomp[-1])

            new_entries = [entry]

            # 補体リスト
            for _xcomp in xcomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[xcomp_i], _xcomp)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He likes to swim and to run.")])
test_rule(rule, [TabEntry(True, "Sue asked George to respond to her offer.")])
test_rule(rule, [TabEntry(True, "Sue asked George to respond and to meet to her offer.")])

In [None]:
rule = (1, "FALSE_XCOMP_AND_XCOMP", "EQ")
def FALSE_XCOMP_AND_XCOMP(entries):
    entry = entries[0]
    
    if entry.sign != False:
        return
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    xcomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "xcomp",
                           entry.tree)
    # xcompが存在し，文を内部に持たず,否定語を持たない
    if xcomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            xcomp = entry.tree[xcomp_i] # xcompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            xcomp_list = [] # 分割したxcompのリスト
            step = 0

            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt":
                    step += 1
                else:
                    break

            # PW
            if xcomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_xcomp = xcomp[0:step] # 最初の主語の範囲は[0, step])

            and_flag = False
            # (DT{and ,})*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] == "conj":
                    while xcomp[step][0].tag == "dt" and xcomp[step][0].attrib["lemma"] in [",", "and"]:
                        if xcomp[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        xcomp[step].remove(xcomp[step][0])
                    while xcomp[step][-1].tag == "dt" and xcomp[step][-1].attrib["lemma"] == ",":
                        xcomp[step].remove(xcomp[step][-1])
                    xcomp[step].attrib["dependency"] = "xcomp" # 依存関係をxcompにする
                    xcomp_list.append(xcomp[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = xcomp[step:]
            main_xcomp.extend(modification)
            for _xcomp in xcomp_list:
                _xcomp.extend(copy.deepcopy(modification))


            # メインの主語を持つ木を生成
            remove_all_children(xcomp)
            xcomp.extend(main_xcomp)
            while xcomp[-1].tag == "dt" and xcomp[-1].attrib["lemma"] == ",":
                xcomp.remove(xcomp[-1])

            new_branches = [[entry]]

            # 補体リスト
            for _xcomp in xcomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[xcomp_i], _xcomp)
                new_branches.append([new_entry])

            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He likes to swim and to run.")])
test_rule(rule, [TabEntry(False, "Sue asked George to respond to her offer.")])
test_rule(rule, [TabEntry(False, "Sue asked George to respond and to meet to her offer.")])

## CCOMP1 and CCOMP2

In [None]:
rule = (1, "TRUE_CCOMP_AND_CCOMP", "EQ")
def TRUE_CCOMP_AND_CCOMP(entries):
    entry = entries[0]
    
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    ccomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "ccomp",
                           entry.tree)
    # ccompが存在し，文を内部に持たず,否定語を持たない
    if ccomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            ccomp = entry.tree[ccomp_i] # ccompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            ccomp_list = [] # 分割したccompのリスト
            step = 0
            forward_dependencies = []

            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt":
                    forward_dependencies.append(ccomp[step])
                    step += 1
                else:
                    break
            
            # PW
            if ccomp[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            
            main_ccomp = ccomp[0:step] # 最初の主語の範囲は[0, step])

            and_flag = False
            # (DT{and ,})*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] == "conj":
                    while ccomp[step][0].tag == "dt" and ccomp[step][0].attrib["lemma"] in [",", "and"]:
                        if ccomp[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        ccomp[step].remove(ccomp[step][0])
                    while ccomp[step][-1].tag == "dt" and ccomp[step][-1].attrib["lemma"] == ",":
                        ccomp[step].remove(ccomp[step][-1])
                    ccomp[step].attrib["dependency"] = "ccomp" # 依存関係をccompにする
                    ccomp_list.append(ccomp[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = ccomp[step:]
            main_ccomp.extend(modification)
            for _ccomp in ccomp_list:
                _ccomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(ccomp)
            ccomp.extend(main_ccomp)
            while ccomp[-1].tag == "dt" and ccomp[-1].attrib["lemma"] == ",":
                ccomp.remove(ccomp[-1])

            new_entries = [entry]

            # 補体リスト
            for _ccomp in ccomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[ccomp_i], _ccomp)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He said that you run and sleep")])

In [None]:
rule = (1, "FALSE_CCOMP_AND_CCOMP", "EQ")
def FALSE_CCOMP_AND_CCOMP(entries):
    entry = entries[0]
    
    if entry.sign != False:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    ccomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "ccomp",
                           entry.tree)
    # ccompが存在し，文を内部に持たず,否定語を持たない
    if ccomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            ccomp = entry.tree[ccomp_i] # ccompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            ccomp_list = [] # 分割したccompのリスト
            step = 0
            forward_dependencies = []

            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt":
                    forward_dependencies.append(ccomp[step])
                    step += 1
                else:
                    break
            
            # PW
            if ccomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_ccomp = ccomp[0:step] # 最初の主語の範囲は[0, step])

            and_flag = False
            # (DT{and ,})*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] == "conj":
                    while ccomp[step][0].tag == "dt" and ccomp[step][0].attrib["lemma"] in [",", "and"]:
                        if ccomp[step][0].attrib["lemma"] == "and":
                            and_flag = True
                        ccomp[step].remove(ccomp[step][0])
                    while ccomp[step][-1].tag == "dt" and ccomp[step][-1].attrib["lemma"] == ",":
                        ccomp[step].remove(ccomp[step][-1])
                    ccomp[step].attrib["dependency"] = "ccomp" # 依存関係をccompにする
                    ccomp_list.append(ccomp[step])
                    step += 1
                else:
                    break
            if and_flag is False:
                return
            
            modification = ccomp[step:]
            main_ccomp.extend(modification)
            for _ccomp in ccomp_list:
                _ccomp.extend(copy.deepcopy(modification))


            # メインの主語を持つ木を生成
            remove_all_children(ccomp)
            ccomp.extend(main_ccomp)
            while ccomp[-1].tag == "dt" and ccomp[-1].attrib["lemma"] == ",":
                ccomp.remove(ccomp[-1])

            new_branches = [[entry]]

            # 補体リスト
            for _ccomp in ccomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[ccomp_i], _ccomp)
                new_branches.append([new_entry])

            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He said that you run and sleep")])

## S1 or S2

In [None]:
rule = (1, "TRUE_S_OR_S", "EQ")
def TRUE_S_OR_S(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    
    if entry.tree.attrib["upos"] == "VERB":
        s = entry.tree
        # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
        s_list = [] # 分割したsのリスト
        step = 0
        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt":
                step += 1
            else:
                break
        # PW
        if s[step].tag == "pw":
            step += 1
        else:
            return

        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] != "conj":
                step += 1
            else:
                break

        main_s = s[0:step] # 最初の主語の範囲は[0, step]

        or_flag = False
        # (DT{and ,})*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] == "conj" and search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj", s[step]) != -1:
                while s[step][0].tag == "dt" and s[step][0].attrib["lemma"] in [",", "or"]:
                    if s[step][0].attrib["lemma"] == "or":
                        or_flag = True
                    s[step].remove(s[step][0])
                while s[step][-1].tag == "dt" and s[step][-1].attrib["lemma"] == ",":
                    s[step].remove(s[step][-1])
                s[step].attrib["dependency"] = "root"
                s_list.append(s[step])
                step += 1
            else:
                break
        if or_flag is False:
            return

        modification = s[step:]
        main_s.extend(modification)
        for _s in s_list:
            _s.extend(copy.deepcopy(modification))

        # print(entry.pretty_print())

        # メインの主語を持つ木を生成
        remove_all_children(s)
        s.extend(main_s)
        while s[-1].tag == "dt" and s[-1].attrib["lemma"] == ",":
            s.remove(s[-1])

        new_entries = [[entry]]

        for _s in s_list:
            new_entry = copy.deepcopy(entry)
            new_entry.tree = _s
            new_entries.append([new_entry])

        return new_entries
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice didn't run, Bob got an email and Smith replied it.")])
test_rule(rule, [TabEntry(True, "Alice didn't love him and he love her.")])
test_rule(rule, [TabEntry(True, "Alice didn't run, Bob got an email or Smith replied it.")])
test_rule(rule, [TabEntry(True, "Alice didn't love him or he love her.")])

In [None]:
rule = (1, "FALSE_S_OR_S", "EQ")
def FALSE_S_OR_S(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    
    if entry.tree.attrib["upos"] == "VERB":
        s = entry.tree
        # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
        s_list = [] # 分割したsのリスト
        step = 0
        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt":
                step += 1
            else:
                break
        # PW
        if s[step].tag == "pw":
            step += 1
        else:
            return

        # DT*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] != "conj":
                step += 1
            else:
                break

        main_s = s[0:step] # 最初の主語の範囲は[0, step]

        or_flag = False
        # (DT{and ,})*に相当
        while len(s) > step:
            if s[step].tag == "dt" and s[step].attrib["dependency"] == "conj" and search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj", s[step]) != -1:
                while s[step][0].tag == "dt" and s[step][0].attrib["lemma"] in [",", "or"]:
                    if s[step][0].attrib["lemma"] == "or":
                        or_flag = True
                    s[step].remove(s[step][0])
                while s[step][-1].tag == "dt" and s[step][-1].attrib["lemma"] == ",":
                    s[step].remove(s[step][-1])
                s[step].attrib["dependency"] = "root"
                s_list.append(s[step])
                step += 1
            else:
                break
        if or_flag is False:
            return

        modification = s[step:]
        main_s.extend(modification)
        for _s in s_list:
            _s.extend(copy.deepcopy(modification))

        # print(entry.pretty_print())

        # メインの主語を持つ木を生成
        remove_all_children(s)
        s.extend(main_s)
        while s[-1].tag == "dt" and s[-1].attrib["lemma"] == ",":
            s.remove(s[-1])

        new_entries = [entry]

        for _s in s_list:
            new_entry = copy.deepcopy(entry)
            new_entry.tree = _s
            new_entries.append(new_entry)

        return [new_entries]
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice didn't run, Bob got an email and Smith replied it.")])
test_rule(rule, [TabEntry(False, "Alice didn't love him and he love her.")])
test_rule(rule, [TabEntry(False, "Alice didn't run, Bob got an email or Smith replied it.")])
test_rule(rule, [TabEntry(False, "Alice didn't love him or he love her.")])

## NSUBJ1 or NSUBJ2

In [None]:
rule = (1, "TRUE_NSUBJ_OR_NSUBJ", "EQ")
def TRUE_NSUBJ_OR_NSUBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # 「conjで結ばれており，orを先頭に持ち，主語である部分木である」という条件を探す
    nsubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj",
                           entry.tree)
    # nsubjが存在し，文を内部に持たず,否定語を持たない
    if nsubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            nsubj = entry.tree[nsubj_i] # nsubjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            nsubj_list = [] # 分割したnsubjのリスト
            step = 0
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if nsubj[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
                    
            main_nsubj = nsubj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] == "conj":
                    while nsubj[step][0].tag == "dt" and nsubj[step][0].attrib["lemma"] in [",", "or"]:
                        if nsubj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        nsubj[step].remove(nsubj[step][0])
                    while nsubj[step][-1].tag == "dt" and nsubj[step][-1].attrib["lemma"] == ",":
                        nsubj[step].remove(nsubj[step][-1])
                    nsubj[step].attrib["dependency"] = "nsubj" # 依存関係をnsubjにする
                    nsubj_list.append(nsubj[step]) # 主語を追加
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = nsubj[step:]
            main_nsubj.extend(modification)
            for _nsubj in nsubj_list:
                _nsubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(nsubj)
            nsubj.extend(main_nsubj)
            while nsubj[-1].tag == "dt" and nsubj[-1].attrib["lemma"] == ",":
                nsubj.remove(nsubj[-1])

            new_branches = [[entry]]

            # 主語リスト
            for _nsubj in nsubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[nsubj_i], _nsubj)
                new_branches.append([new_entry])


            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice, Bob, Smith, or Chris work.")])
test_rule(rule, [TabEntry(True, "The apple or the orange is delicious")])
test_rule(rule, [TabEntry(False, "The apple or the orange is delicious")])

In [None]:
rule = (1, "FALSE_NSUBJ_OR_NSUBJ", "EQ")
def FALSE_NSUBJ_OR_NSUBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # 「conjで結ばれており，orを先頭に持ち，主語を持つ動詞をメインとした部分木である」という条件を探す
    nsubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "nsubj",
                           entry.tree)
    # nsubjが存在し，文を内部に持たず,否定語を持たない
    if nsubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            nsubj = entry.tree[nsubj_i] # nsubjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            nsubj_list = [] # 分割したnsubjのリスト
            step = 0

            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if nsubj[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_nsubj = nsubj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(nsubj) > step:
                if nsubj[step].tag == "dt" and nsubj[step].attrib["dependency"] == "conj":
                    while nsubj[step][0].tag == "dt" and nsubj[step][0].attrib["lemma"] in [",", "or"]:
                        if nsubj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        nsubj[step].remove(nsubj[step][0])
                    while nsubj[step][-1].tag == "dt" and nsubj[step][-1].attrib["lemma"] == ",":
                        nsubj[step].remove(nsubj[step][-1])
                    nsubj[step].attrib["dependency"] = "nsubj" # 依存関係をnsubjにする
                    nsubj_list.append(nsubj[step]) # 主語を追加
                    step += 1
                else:
                    break
            if or_flag is False:
                return

            # print(entry.pretty_print())
            modification = nsubj[step:]
            main_nsubj.extend(modification)
            for _nsubj in nsubj_list:
                _nsubj.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(nsubj)
            nsubj.extend(main_nsubj)
            while nsubj[-1].tag == "dt" and nsubj[-1].attrib["lemma"] == ",":
                nsubj.remove(nsubj[-1])

            new_entries = [entry]

            # 主語リスト
            for _nsubj in nsubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[nsubj_i], _nsubj)
                new_entries.append(new_entry)


            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice, Bob, Smith or Chris work.")])
test_rule(rule, [TabEntry(False, "The apple or the orange is delicious")])

## CSUBJ1 or CSUBJ2

In [None]:
rule = (1, "TRUE_CSUBJ_OR_CSUBJ", "EQ")
def TRUE_CSUBJ_OR_CSUBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    
    csubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "csubj",
                           entry.tree)
    # csubjが存在し，文を内部に持たず,否定語を持たない
    if csubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            csubj = entry.tree[csubj_i] # csubjオブジェクト
            # DT* PW DT* (DT{conj})* DT("and"が[0]要素)というパターンである
            csubj_list = [] # 分割したcsubjのリスト
            step = 0
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if csubj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            # PW
            main_csubj = csubj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] == "conj":
                    while csubj[step][0].tag == "dt" and csubj[step][0].attrib["lemma"] in [",", "or"]:
                        if csubj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        csubj[step].remove(csubj[step][0])
                    while csubj[step][-1].tag == "dt" and csubj[step][-1].attrib["lemma"] == ",":
                        csubj[step].remove(csubj[step][-1])
                    csubj[step].attrib["dependency"] = "csubj" # 依存関係をcsubjにする
                    csubj_list.append(csubj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = csubj[step:]
            main_csubj.extend(modification)
            for _csubj in csubj_list:
                _csubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(csubj)
            csubj.extend(main_csubj)
            while csubj[-1].tag == "dt" and csubj[-1].attrib["lemma"] == ",":
                csubj.remove(csubj[-1])

            new_branches = [[entry]]

            # 主語リスト
            for _csubj in csubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[csubj_i], _csubj)
                new_branches.append([new_entry])

            return new_branches
        except:
            return

rule_set.add(rule)

test_rule(rule, [TabEntry(True, "To create works or to like it is what I want")])
test_rule(rule, [TabEntry(True, "Creating works or liking it is what I want")])

In [None]:
rule = (1, "FALSE_CSUBJ_OR_CSUBJ", "EQ")
def FALSE_CSUBJ_OR_CSUBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # print(entry.pretty_print())
    
    csubj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "csubj",
                           entry.tree)
    # csubjが存在し，文を内部に持たず,否定語を持たない
    if csubj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            csubj = entry.tree[csubj_i] # csubjオブジェクト
            # DT* PW DT* (DT{conj})* DT("and"が[0]要素)というパターンである
            csubj_list = [] # 分割したcsubjのリスト
            step = 0
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if csubj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            # PW
            main_csubj = csubj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(csubj) > step:
                if csubj[step].tag == "dt" and csubj[step].attrib["dependency"] == "conj":
                    while csubj[step][0].tag == "dt" and csubj[step][0].attrib["lemma"] in [",", "or"]:
                        if csubj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        csubj[step].remove(csubj[step][0])
                    while csubj[step][-1].tag == "dt" and csubj[step][-1].attrib["lemma"] == ",":
                        csubj[step].remove(csubj[step][-1])
                    csubj[step].attrib["dependency"] = "csubj" # 依存関係をcsubjにする
                    csubj_list.append(csubj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = csubj[step:]
            main_csubj.extend(modification)
            for _csubj in csubj_list:
                _csubj.extend(copy.deepcopy(modification))

            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(csubj)
            csubj.extend(main_csubj)
            while csubj[-1].tag == "dt" and csubj[-1].attrib["lemma"] == ",":
                csubj.remove(csubj[-1])

            new_entries = [entry]

            # 主語リスト
            for _csubj in csubj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[csubj_i], _csubj)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return

rule_set.add(rule)

test_rule(rule, [TabEntry(False, "To create works or to like it is what I want")])
test_rule(rule, [TabEntry(False, "Creating works or liking it is what I want")])

## OBJ1 or OBJ2

In [None]:
rule = (1, "TRUE_OBJ_OR_OBJ", "EQ")
def TRUE_OBJ_OR_OBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # 「conjで結ばれており，orを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    obj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "obj",
                           entry.tree)
    # objが存在し，文を内部に持たず,否定語を持たない
    if obj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            obj = entry.tree[obj_i] # objオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            obj_list = [] # 分割したobjのリスト
            step = 0

            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if obj[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_obj = obj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] == "conj":
                    while obj[step][0].tag == "dt" and obj[step][0].attrib["lemma"] in [",", "or"]:
                        if obj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        obj[step].remove(obj[step][0])
                    while obj[step][-1].tag == "dt" and obj[step][-1].attrib["lemma"] == ",":
                        obj[step].remove(obj[step][-1])
                    obj[step].attrib["dependency"] = "obj" # 依存関係をobjにする
                    obj_list.append(obj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return

            modification = obj[step:]
            main_obj.extend(modification)
            for _obj in obj_list:
                _obj.extend(copy.deepcopy(modification))
            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(obj)
            obj.extend(main_obj)
            while obj[-1].tag == "dt" and obj[-1].attrib["lemma"] == ",":
                obj.remove(obj[-1])

            new_branches = [[entry]]

            # 目的語リスト
            for _obj in obj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[obj_i], _obj)
                new_branches.append([new_entry])


            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice likes apple or orange.")])
test_rule(rule, [TabEntry(True, "He ate beaf or chicken.")])
test_rule(rule, [TabEntry(False, "He ate beaf or chicken.")])

In [None]:
rule = (1, "FALSE_OBJ_OR_OBJ", "EQ")
def FALSE_OBJ_OR_OBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # 「conjで結ばれており，orを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    obj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "obj",
                           entry.tree)
    # objが存在し，文を内部に持たず,否定語を持たない
    if obj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            obj = entry.tree[obj_i] # objオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            obj_list = [] # 分割したobjのリスト
            step = 0

            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if obj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_obj = obj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(obj) > step:
                if obj[step].tag == "dt" and obj[step].attrib["dependency"] == "conj":
                    while obj[step][0].tag == "dt" and obj[step][0].attrib["lemma"] in [",", "or"]:
                        if obj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        obj[step].remove(obj[step][0])
                    while obj[step][-1].tag == "dt" and obj[step][-1].attrib["lemma"] == ",":
                        obj[step].remove(obj[step][-1])
                    obj[step].attrib["dependency"] = "obj" # 依存関係をobjにする
                    obj_list.append(obj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = obj[step:]
            main_obj.extend(modification)
            for _obj in obj_list:
                _obj.extend(copy.deepcopy(modification))
            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(obj)
            obj.extend(main_obj)
            while obj[-1].tag == "dt" and obj[-1].attrib["lemma"] == ",":
                obj.remove(obj[-1])

            new_entries = [entry]

            # 目的語リスト
            for _obj in obj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[obj_i], _obj)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "Alice likes apple or orange.")])
test_rule(rule, [TabEntry(False, "He ate beaf or chicken.")])
test_rule(rule, [TabEntry(True, "He ate beaf or chicken.")])

## IOBJ1 or IOBJ2

In [None]:
rule = (1, "TRUE_IOBJ_OR_IOBJ","EQ")
def TRUE_IOBJ_OR_IOBJ(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    iobj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "iobj",
                           entry.tree)
    # iobjが存在し，文を内部に持たず,否定語を持たない
    if iobj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            iobj = entry.tree[iobj_i] # iobjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            iobj_list = [] # 分割したiobjのリスト
            step = 0

            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if iobj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_iobj = iobj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] == "conj":
                    while iobj[step][0].tag == "dt" and iobj[step][0].attrib["lemma"] in [",", "or"]:
                        if iobj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        iobj[step].remove(iobj[step][0])
                    iobj[step].attrib["dependency"] = "iobj" # 依存関係をiobjにする
                    iobj_list.append(iobj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = iobj[step:]
            main_iobj.extend(modification)
            for _iobj in iobj_list:
                _iobj.extend(copy.deepcopy(modification))
            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(iobj)
            iobj.extend(main_iobj)
            while iobj[-1].tag == "dt" and iobj[-1].attrib["lemma"] == ",":
                iobj.remove(iobj[-1])

            new_branches = [[entry]]

            # 目的語リスト
            for _iobj in iobj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[iobj_i], _iobj)
                new_branches.append([new_entry])


            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He gave her or her sister a book.")])

In [None]:
rule = (1, "FALSE_IOBJ_OR_IOBJ","EQ")
def FALSE_IOBJ_OR_IOBJ(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    iobj_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "iobj",
                           entry.tree)
    # iobjが存在し，文を内部に持たず,否定語を持たない
    if iobj_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            iobj = entry.tree[iobj_i] # iobjオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            iobj_list = [] # 分割したiobjのリスト
            step = 0

            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt":
                    step += 1
                else:
                    break
            # PW
            if iobj[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_iobj = iobj[0:step] # 最初の主語の範囲は[0, step]

            or_flag = False
            # (DT{and ,})*に相当
            while len(iobj) > step:
                if iobj[step].tag == "dt" and iobj[step].attrib["dependency"] == "conj":
                    while iobj[step][0].tag == "dt" and iobj[step][0].attrib["lemma"] in [",", "or"]:
                        if iobj[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        iobj[step].remove(iobj[step][0])
                    iobj[step].attrib["dependency"] = "iobj" # 依存関係をiobjにする
                    iobj_list.append(iobj[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = iobj[step:]
            main_iobj.extend(modification)
            for _iobj in iobj_list:
                _iobj.extend(copy.deepcopy(modification))
            # print(entry.pretty_print())

            # メインの主語を持つ木を生成
            remove_all_children(iobj)
            iobj.extend(main_iobj)
            while iobj[-1].tag == "dt" and iobj[-1].attrib["lemma"] == ",":
                iobj.remove(iobj[-1])

            new_entries = [entry]

            # 目的語リスト
            for _iobj in iobj_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[iobj_i], _iobj)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He gave her or her sister a book.")])

## XCOMP1 or XCOMP2

In [None]:
rule = (1, "TRUE_XCOMP_OR_XCOMP", "EQ")
def TRUE_XCOMP_OR_XCOMP(entries):
    entry = entries[0]
    
    if entry.sign != True:
        return
    # 「conjで結ばれており，orを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    xcomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "xcomp",
                           entry.tree)
    # xcompが存在し，文を内部に持たず,否定語を持たない
    if xcomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            xcomp = entry.tree[xcomp_i] # xcompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            xcomp_list = [] # 分割したxcompのリスト
            step = 0

            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt":
                    step += 1
                else:
                    break

            # PW
            if xcomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_xcomp = xcomp[0:step] # 最初の主語の範囲は[0, step])

            or_flag = False
            # (DT{and ,})*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] == "conj":
                    while xcomp[step][0].tag == "dt" and xcomp[step][0].attrib["lemma"] in [",", "or"]:
                        if xcomp[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        xcomp[step].remove(xcomp[step][0])
                    while xcomp[step][-1].tag == "dt" and xcomp[step][-1].attrib["lemma"] == ",":
                        xcomp[step].remove(xcomp[step][-1])
                    xcomp[step].attrib["dependency"] = "xcomp" # 依存関係をxcompにする
                    xcomp_list.append(xcomp[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = xcomp[step:]
            main_xcomp.extend(modification)
            for _xcomp in xcomp_list:
                _xcomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(xcomp)
            xcomp.extend(main_xcomp)
            while xcomp[-1].tag == "dt" and xcomp[-1].attrib["lemma"] == ",":
                xcomp.remove(xcomp[-1])

            new_branches = [[entry]]

            # 補体リスト
            for _xcomp in xcomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[xcomp_i], _xcomp)
                new_branches.append([new_entry])

            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He likes to swim or to run.")])
test_rule(rule, [TabEntry(True, "Sue asked George to respond to her offer.")])
test_rule(rule, [TabEntry(True, "Sue asked George to respond or to meet to her offer.")])

In [None]:
rule = (1, "FALSE_XCOMP_OR_XCOMP", "EQ")
def FALSE_XCOMP_OR_XCOMP(entries):
    entry = entries[0]
    
    if entry.sign != False:
        return
    # 「conjで結ばれており，orを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    xcomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "xcomp",
                           entry.tree)
    # xcompが存在し，文を内部に持たず,否定語を持たない
    if xcomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            xcomp = entry.tree[xcomp_i] # xcompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            xcomp_list = [] # 分割したxcompのリスト
            step = 0

            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt":
                    step += 1
                else:
                    break

            # PW
            if xcomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_xcomp = xcomp[0:step] # 最初の主語の範囲は[0, step])

            or_flag = False
            # (DT{and ,})*に相当
            while len(xcomp) > step:
                if xcomp[step].tag == "dt" and xcomp[step].attrib["dependency"] == "conj":
                    while xcomp[step][0].tag == "dt" and xcomp[step][0].attrib["lemma"] in [",", "or"]:
                        if xcomp[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        xcomp[step].remove(xcomp[step][0])
                    while xcomp[step][-1].tag == "dt" and xcomp[step][-1].attrib["lemma"] == ",":
                        xcomp[step].remove(xcomp[step][-1])
                    xcomp[step].attrib["dependency"] = "xcomp" # 依存関係をxcompにする
                    xcomp_list.append(xcomp[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = xcomp[step:]
            main_xcomp.extend(modification)
            for _xcomp in xcomp_list:
                _xcomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(xcomp)
            xcomp.extend(main_xcomp)
            while xcomp[-1].tag == "dt" and xcomp[-1].attrib["lemma"] == ",":
                xcomp.remove(xcomp[-1])

            new_entries = [entry]

            # 補体リスト
            for _xcomp in xcomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[xcomp_i], _xcomp)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He likes to swim or to run.")])
test_rule(rule, [TabEntry(False, "Sue asked George to respond to her offer.")])
test_rule(rule, [TabEntry(False, "Sue asked George to respond or to meet to her offer.")])

## CCOMP1 or CCOMP2

In [None]:
rule = (1, "TRUE_CCOMP_OR_CCOMP", "EQ")
def TRUE_CCOMP_OR_CCOMP(entries):
    entry = entries[0]
    
    if entry.sign != True:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    ccomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "ccomp",
                           entry.tree)
    # ccompが存在し，文を内部に持たず,否定語を持たない
    if ccomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            ccomp = entry.tree[ccomp_i] # ccompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            ccomp_list = [] # 分割したccompのリスト
            step = 0
            forward_dependencies = []

            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt":
                    forward_dependencies.append(ccomp[step])
                    step += 1
                else:
                    break
            
            # PW
            if ccomp[step].tag == "pw":
                step += 1
            else:
                return
            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_ccomp = ccomp[0:step] # 最初の主語の範囲は[0, step])

            or_flag = False
            # (DT{and ,})*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] == "conj":
                    while ccomp[step][0].tag == "dt" and ccomp[step][0].attrib["lemma"] in [",", "or"]:
                        if ccomp[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        ccomp[step].remove(ccomp[step][0])
                    while ccomp[step][-1].tag == "dt" and ccomp[step][-1].attrib["lemma"] == ",":
                        ccomp[step].remove(ccomp[step][-1])
                    ccomp[step].attrib["dependency"] = "ccomp" # 依存関係をccompにする
                    ccomp_list.append(ccomp[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return

            modification = ccomp[step:]
            main_ccomp.extend(modification)
            for _ccomp in ccomp_list:
                _ccomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(ccomp)
            ccomp.extend(main_ccomp)
            while ccomp[-1].tag == "dt" and ccomp[-1].attrib["lemma"] == ",":
                ccomp.remove(ccomp[-1])

            new_branches = [[entry]]

            # 補体リスト
            for _ccomp in ccomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[ccomp_i], _ccomp)
                new_branches.append([new_entry])

            return new_branches
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "He said that you should run or sleep")])

In [None]:
rule = (1, "FALSE_CCOMP_OR_CCOMP", "EQ")
def FALSE_CCOMP_OR_CCOMP(entries):
    entry = entries[0]
    
    if entry.sign != False:
        return
    # print(entry.pretty_print())
    # 「conjで結ばれており，andを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    ccomp_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "ccomp",
                           entry.tree)
    # ccompが存在し，文を内部に持たず,否定語を持たない
    if ccomp_i != -1 and include_sentence(entry.tree) == -1 and include_negation(entry.tree) == -1:
        try:
            ccomp = entry.tree[ccomp_i] # ccompオブジェクト
            # DT* PW (DT{conj})* DT("and"が[0]要素)というパターンである
            ccomp_list = [] # 分割したccompのリスト
            step = 0
            forward_dependencies = []

            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt":
                    forward_dependencies.append(ccomp[step])
                    step += 1
                else:
                    break
            
            # PW
            if ccomp[step].tag == "pw":
                step += 1
            else:
                return
            
            # DT*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] != "conj":
                    step += 1
                else:
                    break
            main_ccomp = ccomp[0:step] # 最初の主語の範囲は[0, step])

            or_flag = False
            # (DT{and ,})*に相当
            while len(ccomp) > step:
                if ccomp[step].tag == "dt" and ccomp[step].attrib["dependency"] == "conj":
                    while ccomp[step][0].tag == "dt" and ccomp[step][0].attrib["lemma"] in [",", "or"]:
                        if ccomp[step][0].attrib["lemma"] == "or":
                            or_flag = True
                        ccomp[step].remove(ccomp[step][0])
                    while ccomp[step][-1].tag == "dt" and ccomp[step][-1].attrib["lemma"] == ",":
                        ccomp[step].remove(ccomp[step][-1])
                    ccomp[step].attrib["dependency"] = "ccomp" # 依存関係をccompにする
                    ccomp_list.append(ccomp[step])
                    step += 1
                else:
                    break
            if or_flag is False:
                return
            
            modification = ccomp[step:]
            main_ccomp.extend(modification)
            for _ccomp in ccomp_list:
                _ccomp.extend(copy.deepcopy(modification))

            # メインの主語を持つ木を生成
            remove_all_children(ccomp)
            ccomp.extend(main_ccomp)
            while ccomp[-1].tag == "dt" and ccomp[-1].attrib["lemma"] == ",":
                ccomp.remove(ccomp[-1])

            new_entries = [entry]

            # 補体リスト
            for _ccomp in ccomp_list:
                new_entry = copy.deepcopy(entry)
                new_entry.tree.replace(new_entry.tree[ccomp_i], _ccomp)
                new_entries.append(new_entry)

            return [new_entries]
        except:
            return
rule_set.add(rule)

test_rule(rule, [TabEntry(False, "He said that you run or sleep")])

## not

In [None]:
rule = (1, "REVERSE_NEGATION", "EQ")
def REVERSE_NEGATION(entries):
    entry = entries[0]
    N_i = include_negation(entry.tree)
    if N_i != -1 and include_sentence(entry.tree) == -1:
        entry.tree.remove(entry.tree[N_i])
        if entry.tree[N_i - 1].attrib["lemma"] == "can":
            entry.tree[N_i - 1].attrib["word"] = "can"
        if entry.tree[N_i - 1].attrib["lemma"] == "will":
            entry.tree[N_i - 1].attrib["word"] = "will"
        entry.sign = not entry.sign
        return [[entry]]
    
rule_set.add(rule)

test_rule(rule, [TabEntry(True, "Alice doesn't like it.")])
test_rule(rule, [TabEntry(True, "Alice doesn't like it.")])
test_rule(rule, [TabEntry(False, "He didn't sign the contract.")])
test_rule(rule, [TabEntry(True, "He has never been to America.")])
test_rule(rule, [TabEntry(True, "He can't run.")])
test_rule(rule, [TabEntry(True, "He won't run.")])
test_rule(rule, [TabEntry(True, "He shouldn't run.")])
test_rule(rule, [TabEntry(True, "To create world isn't funny")])

# if

In [None]:
rule = (1, "TRUE_IF_ADVCL", "EQ")
def TRUE_IF_ADVCL(entries):
    entry = entries[0]
    if entry.sign != True:
        return
    
    # 「advclで結ばれており，ifを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    advcl_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "advcl" and\
                             dt[0].attrib["lemma"] == "if",
                           entry.tree)
    
    if advcl_i != -1:
        antecedent = entry.tree[advcl_i]
        if advcl_i + 1 < len(entry.tree) and entry.tree[advcl_i + 1].attrib["lemma"] == ",": # if節の後ろにカンマがあれば削除する
            entry.tree.remove(entry.tree[advcl_i + 1])
        if advcl_i + 1 < len(entry.tree) and entry.tree[advcl_i + 1].attrib["lemma"] == "then": # if節の後ろにthenがあれば削除する
            entry.tree.remove(entry.tree[advcl_i + 1])
        if advcl_i - 1 >= 0 and entry.tree[advcl_i - 1].attrib["lemma"] == ",": # if節の前にカンマがあれば削除する
            entry.tree.remove(entry.tree[advcl_i - 1])
        entry.tree.remove(antecedent)
        
        antecedent.remove(antecedent[0])
        antecedent.attrib["dependency"] = "root"
        antecedent_entry = TabEntry(False, tree=antecedent)
        
        return [[antecedent_entry], [entry]]
    

rule_set.add(rule)

test_rule(rule, [TabEntry(True, "If he's cool I like him.")])
test_rule(rule, [TabEntry(True, "If he's cool, I like him.")])
test_rule(rule, [TabEntry(True, "If he's cool then I like him.")])
test_rule(rule, [TabEntry(True, "I like him if he's cool.")])
test_rule(rule, [TabEntry(True, "If he's cool I don't like him.")])
test_rule(rule, [TabEntry(True, "If he's cool, I don't like him.")])
test_rule(rule, [TabEntry(True, "If he's cool then I don't like him.")])
test_rule(rule, [TabEntry(True, "I don't like him if he's cool.")])
test_rule(rule, [TabEntry(True, "I don't like him, if he's cool.")])

In [None]:
rule = (1, "FALSE_IF_ADVCL", "EQ")
def FALSE_IF_ADVCL(entries):
    entry = entries[0]
    if entry.sign != False:
        return
    
    # 「advclで結ばれており，ifを先頭に持ち，目的語を持つ動詞をメインとした部分木である」という条件を探す
    advcl_i = search_in_bros(lambda dt: dt.attrib["dependency"] == "advcl" and\
                             dt[0].attrib["lemma"] == "if",
                           entry.tree)
    
    if advcl_i != -1:
        antecedent = entry.tree[advcl_i]
        if advcl_i + 1 < len(entry.tree) and entry.tree[advcl_i + 1].attrib["lemma"] == ",": # if節の後ろにカンマがあれば削除する
            entry.tree.remove(entry.tree[advcl_i + 1])
        if advcl_i + 1 < len(entry.tree) and entry.tree[advcl_i + 1].attrib["lemma"] == "then": # if節の後ろにthenがあれば削除する
            entry.tree.remove(entry.tree[advcl_i + 1])
        if advcl_i - 1 >= 0 and entry.tree[advcl_i - 1].attrib["lemma"] == ",": # if節の前にカンマがあれば削除する
            entry.tree.remove(entry.tree[advcl_i - 1])
        entry.tree.remove(antecedent)
        
        antecedent.remove(antecedent[0])
        antecedent.attrib["dependency"] = "root"
        antecedent_entry = TabEntry(True, tree=antecedent)
        
        return [[antecedent_entry, entry]]
    

rule_set.add(rule)

test_rule(rule, [TabEntry(False, "If he's cool I like him.")])
test_rule(rule, [TabEntry(False, "If he's cool, I like him.")])
test_rule(rule, [TabEntry(False, "If he's cool then I like him.")])
test_rule(rule, [TabEntry(False, "I like him if he's cool.")])
test_rule(rule, [TabEntry(False, "If he's cool I don't like him.")])
test_rule(rule, [TabEntry(False, "If he's cool, I don't like him.")])
test_rule(rule, [TabEntry(False, "If he's cool then I don't like him.")])
test_rule(rule, [TabEntry(False, "I don't like him if he's cool.")])
test_rule(rule, [TabEntry(False, "I don't like him, if he's cool.")])

# Test Rules

In [None]:
rule_set

In [None]:
len(rule_set)

In [None]:
tableau = Tableau([
    TabEntry(True, "Three men, one holding pipes, another holding a large object above his head, and one resting against the pipe bed on the truck, are looking at the camera.", origin="P"),
])
tableau.append_rules(list(rule_set))
tableau.decompose()
print(tableau)
print(tableau.dump())

# Load Data and Decompose

In [None]:
df_snli_dev = pd.read_csv(os.path.join(DATA_DIR_PATH, "snli_dev.tsv"), delimiter='\t', index_col=0)
df_snli_test = pd.read_csv(os.path.join(DATA_DIR_PATH, "snli_test.tsv"), delimiter='\t', index_col=0)

In [None]:
from tqdm import tqdm

def concat_tableau_to_df(df):
    
    entailment_tableau_list = []
    entailment_tableau_size_list = []
    contradiction_tableau_list = []
    contradiction_tableau_size_list = []

    for sample in tqdm(df.itertuples()):
        sentence1_tree = ET.fromstring(sample.udtree1)
        sentence2_tree = ET.fromstring(sample.udtree2)

        entailment_tableau = Tableau([TabEntry(True, tree=sentence1_tree, origin="P"), TabEntry(False, tree=sentence2_tree, origin="H")])
        entailment_tableau.append_rules(list(rule_set))
        entailment_tableau.decompose()
        entailment_tableau_list.append(json.dumps(entailment_tableau.dump()))
        entailment_tableau_size_list.append(entailment_tableau.size())

        contradiction_tableau = Tableau([TabEntry(True, tree=sentence1_tree, origin="P"), TabEntry(True, tree=sentence2_tree, origin="H")])
        contradiction_tableau.append_rules(list(rule_set))
        contradiction_tableau.decompose()
        contradiction_tableau_list.append(json.dumps(contradiction_tableau.dump()))
        contradiction_tableau_size_list.append(contradiction_tableau.size())
    
    df["entailment_tableau"] = entailment_tableau_list
    df["entailment_tableau_size"] = entailment_tableau_size_list
    df["contradiction_tableau"] = contradiction_tableau_list
    df["contradiction_tableau_size"] = contradiction_tableau_size_list

In [None]:
concat_tableau_to_df(df_snli_dev)
df_snli_dev.to_csv(os.path.join(DATA_DIR_PATH, "snli_dev.tsv"), sep="\t")
df_snli_dev.head()

In [None]:
decomposed_df_snli_dev = df_snli_dev[(df_snli_dev.entailment_tableau_size > 2) & (df_snli_dev.contradiction_tableau_size > 2)]
len(decomposed_df_snli_dev)

In [None]:
concat_tableau_to_df(df_snli_test)
df_snli_test.to_csv(os.path.join(DATA_DIR_PATH, "snli_test.tsv"), sep="\t")
df_snli_test.head()

In [None]:
decomposed_df_snli_test = df_snli_test[(df_snli_test.entailment_tableau_size > 2) & (df_snli_test.contradiction_tableau_size > 2)]
len(decomposed_df_snli_test)