***Vorlesung 'Syntax natürlicher Sprachen'***

--- 
# Vorlesung 10: Unifikation, Subsumption und getypte hierarchische Features

In [1]:
import nltk

from nltk.featstruct import Feature, UnificationFailure, FeatStructReader, FeatStruct
import itertools
from collections import defaultdict


def check_sanity_constraints(th):
    for type1, type2 in itertools.product(th, th):
        if type1 in th[type2] and type2 in th[type1]:
            if type1 != type2:
                raise ValueError(
                    "The type hierarchy is not antisymmetric! " +
                    "{} subsumes {} and vice versa!".format(
                        type1, type2
                    )
                )


def refl_trans_closure(type_hierarchy):
    # make everything a set
    # and compute reflexive closure
    closure = defaultdict(set)
    for t in type_hierarchy:
        closure[t] = set(type_hierarchy[t])
        closure[t].add(t)

    # compute transitive closure
    still_changes = True
    while still_changes:
        still_changes = False
        for x in closure:
            new_for_x = set()
            for y in closure[x]:
                for z in closure[y]:
                    new_for_x.add(z)
            len_before = len(closure[x])
            closure[x].update(new_for_x)
            still_changes |= len(closure[x]) > len_before

    return closure


class HierarchicalFeature(Feature):
    def __init__(self, name, type_hierarchy, **kwargs):
        super(HierarchicalFeature, self).__init__(name, **kwargs)

        self.hierarchy = refl_trans_closure(type_hierarchy)
        check_sanity_constraints(self.hierarchy)

    def unify_base_values(self, fval1, fval2, bindings):
        candidates = self.hierarchy[fval1].intersection(self.hierarchy[fval2])
        score = {t: 0 for t in candidates}
        for type1, type2 in itertools.product(candidates, candidates):
            if type1 in self.hierarchy[type2]:
                score[type1] += 1

        return min(candidates, key=score.__getitem__, default=UnificationFailure)

---
## 1. Subsumption und Unifikation: Beispiel komplexes AGR-Feature

---
### Beispiel 1.1: unifizierende AGR-Merkmale

In [2]:
f0 = FeatStruct("[AGR=[NUM=sg]]") #allgemeiner
f1 = FeatStruct("[AGR=[NUM=sg, PERS=3]]") #spezifischer, mehr Informationen

### *$\to$ Subsumption (f0 ⊑ f1): jede Information in f0 ist auch in f1 enthalten*

***information ordering relation of feature structures* (ähnlich Teilmengenbeziehung)**

- https://cs.union.edu/~striegnk/courses/nlp-with-prolog/html/node82.html#l11.sec.subsumption

In [3]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

True

In [4]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

### *$\to$  Unifikation (f0 ⊔ f1): Kombination der Informationen aus f0 und f1 (aber keine widersprüchlichen Informationen!)*

***information combination operation* (ähnlich Vereinigung von Mengen)**

- https://cs.union.edu/~striegnk/courses/nlp-with-prolog/html/node83.html#l11.fsu


#### bei Feature Structures in Subsumptionsbeziehung entspricht das Ergebnis der Unifikation der spezielleren (subsumierten): 

In [5]:
#Unification (f0 ⊔ f1)
print(f0.unify(f1))  # f0 ⊔ f1 = f1 (da f0 ⊑ f1)

[ AGR = [ NUM  = 'sg' ] ]
[       [ PERS = 3    ] ]


--- 
### Beispiel 1.2: auch Feature Structures, die nicht in Subsumption-Beziehung stehen, können unifizieren

##### im Beispiel sind in f0 auch Informationen enthalten (CASE=nom), die nicht in f1 enthalten sind:

In [6]:
f0 = FeatStruct("[AGR=[NUM=sg, CASE=nom]]") 
f1 = FeatStruct("[AGR=[NUM=sg, PERS=3]]") 

In [7]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

False

##### aber: Unifikation funktioniert, da Informationen nicht widersprüchlich:

In [8]:
#Unification (f0 ⊔ f1)
print(f0.unify(f1))

[       [ CASE = 'nom' ] ]
[ AGR = [ NUM  = 'sg'  ] ]
[       [ PERS = 3     ] ]


### Unifikation als kleinste obere Schranke in der Subsumptionsbeziehung:

> The unification of two feature structures F and G (if it exists) is the smallest feature structure that is subsumed by both F and G. (https://cs.union.edu/~striegnk/courses/nlp-with-prolog/html/node83.html#l11.fsu)

In [9]:
# f0 ⊑ (f0 ⊔ f1)
f0.subsumes(f0.unify(f1))

True

In [10]:
# f1 ⊑ (f0 ⊔ f1)
f1.subsumes(f0.unify(f1))

True

---
### Beispiel 1.3: nicht-unifizierende AGR-Merkmale

#### Unifikation nur partielle Ordnung, nicht immer definiert bei widersprüchlichen/unvereinbaren Werten ($\to$ `NONE`):

In [11]:
f0 = FeatStruct("[AGR=[NUM=pl]]") 
f1 = FeatStruct("[AGR=[NUM=sg, PERS=3]]")

In [12]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

False

In [13]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

In [14]:
#Unification (f0 ⊔ f1)
print(f0.unify(f1))

None


---
## 2. Agreement-Typhierarchie (`*AGR*`)

#### Feature-Structure mit hierarchisch durch Subsumptionsbeziehung gegliederten Features ([1] ⊑ [1sg] usw.)

-  `*AGR*` als Beispiel für ein hierarchisches Feature (atomare Werte mit Angabe Subsumptionsbeziehungen) 
- hier: alternativ zu komplexem AGR-Feature-Wert mit Person-Numerus-Feature-Structure



In [15]:
type_hierarchy = {
    "1": ["1sg","1pl"],
    "2": ["2sg", "2pl"],
    "3": ["3sg", "3pl"],
    "sg": ["1sg", "2sg", "3sg"],
    "pl": ["1pl", "2pl", "3pl"],
    "1sg": [],
    "1pl": [],
    "2sg": [],
    "2pl": [],
    "3sg": [],
    "3pl": [],
}
AGR = HierarchicalFeature("AGR", type_hierarchy)
reader = FeatStructReader(features=(AGR,))

---
### Beispiel 2.1

In [16]:
f0 = reader.fromstring("[*AGR*='1']") #strings containing numbers have to be marked as strings
f1 = reader.fromstring("[*AGR*='1sg']") 

In [17]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

True

In [18]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

In [19]:
#Unification (f0 ⊔ f1)
print(f0.unify(f1))

[ *AGR* = '1sg' ]


---
### Beispiel 2.2: Unifikation von Typen, die sich nicht subsumieren, aber auch nicht unvereinbar sind

In [20]:
f0 = reader.fromstring("[*AGR*='1']")
f1 = reader.fromstring("[*AGR*=sg]")

In [21]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

False

In [22]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

#### Unifikation als kleinste obere Schranke in der Subsumptionsbeziehung:

In [23]:
#Unification (f0 ⊔ f1)
print(f0.unify(f1))

[ *AGR* = '1sg' ]


In [None]:
#f0 ⊑ (f0 ⊔ f1), hier: 1 ⊑ (1sg)
#f1 ⊑ (f0 ⊔ f1), hier: sg ⊑ (1sg)

---
## 3. Numerus-Typhierarchie

$$\bot \sqsubseteq \text{Singular}$$
$$\bot \sqsubseteq \text{nonSingular}$$
$$\text{nonSingular} \sqsubseteq \text{Plural}$$
$$\text{nonSingular} \sqsubseteq \text{Dual}$$


In [24]:
type_hierarchy = {
    "nonSingular": ["Plural", "Dual"], 
    "Singular": [],
    "Plural": [],
    "Dual": [],
}
NUMBER = HierarchicalFeature("NUMBER", type_hierarchy)
reader = FeatStructReader(features=(NUMBER,))

---
### Beispiel 3.1

In [25]:
f0 = reader.fromstring("[*NUMBER*=nonSingular]") #allgemeiner (Plural | Dual, not Singular)
f1 = reader.fromstring("[*NUMBER*=Dual]") #spezifischer, mehr Informationen (Dual, not Plural, not Singular)

In [26]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

True

In [27]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

In [28]:
#Unification
print(f0.unify(f1))

[ *NUMBER* = 'Dual' ]


---
### Beispiel 3.2

In [29]:
f0 = reader.fromstring("[*NUMBER*=Singular]")
f1 = reader.fromstring("[*NUMBER*=nonSingular]")

In [30]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

False

In [31]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

False

In [32]:
#Unification
print(f0.unify(f1))  #unvereinbare Merkmale

None


---
### Beispiel 3.3

In [33]:
f0 = reader.fromstring("[*NUMBER*=Singular]")
f1 = reader.fromstring("[*NUMBER*=Singular]")

In [34]:
#Subsumption (f0 ⊑ f1)
f0.subsumes(f1)

True

In [35]:
#Subsumption (f1 ⊑ f0)
f1.subsumes(f0)

True

In [36]:
#Unification
print(f0.unify(f1))

[ *NUMBER* = 'Singular' ]
