<a href="https://colab.research.google.com/github/gksthdals/NLTK/blob/main/10.%20Analyzing_the_Meaning_of_Sentences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
""" Main Topic

1. How can we represent natural language meaning so that a computer can process these representations?
2. How can we associate meaning representations with an unlimited set of sentences?
3. How can we use programs that connect the meaning representations of sentences to stores of knowledge?

"""

## 1. Natural Language Understanding

### Querying a Database

In [None]:
# SQL : Structured Query Language

# SELECT Country FROM city_table WHERE City = 'athens'

In [None]:
import nltk
nltk.download('book_grammars')

[nltk_data] Downloading package book_grammars to /root/nltk_data...
[nltk_data]   Unzipping grammars/book_grammars.zip.


True

In [None]:
nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')

% start S
S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
NP[SEM='Country="greece"'] -> 'Greece'
NP[SEM='Country="china"'] -> 'China'
Det[SEM='SELECT'] -> 'Which' | 'What'
N[SEM='City FROM city_table'] -> 'cities'
IV[SEM=''] -> 'are'
A[SEM=''] -> 'located'
P[SEM=''] -> 'in'


In [None]:
from nltk import load_parser
cp = load_parser('grammars/book_grammars/sql0.fcfg')
query = 'What cities are located in China'

trees = list(cp.parse(query.split()))
for tree in trees:
  print(tree)
print()

answer = trees[0].label()['SEM']
print(answer)

# [SEM='']인 경우를 제외하고 문장을 만들기 위해서
answer = [s for s in answer if s]
print(answer)

q = ' '.join(answer)
print(q)

(S[SEM=(SELECT, City FROM city_table, WHERE, , , Country="china")]
  (NP[SEM=(SELECT, City FROM city_table)]
    (Det[SEM='SELECT'] What)
    (N[SEM='City FROM city_table'] cities))
  (VP[SEM=(, , Country="china")]
    (IV[SEM=''] are)
    (AP[SEM=(, Country="china")]
      (A[SEM=''] located)
      (PP[SEM=(, Country="china")]
        (P[SEM=''] in)
        (NP[SEM='Country="china"'] China)))))

(SELECT, City FROM city_table, WHERE, , , Country="china")
['SELECT', 'City FROM city_table', 'WHERE', 'Country="china"']
SELECT City FROM city_table WHERE Country="china"


In [None]:
nltk.download('city_database')

[nltk_data] Downloading package city_database to /root/nltk_data...
[nltk_data]   Unzipping corpora/city_database.zip.


True

In [None]:
from nltk.sem import chat80
rows = chat80.sql_query('corpora/city_database/city.db', q)
for r in rows: print(r[0], end=' ')

canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin 

In [None]:
# sql0.fcfg : translation from English to SQL

### Natural Language, Semantics and Logic

In [None]:
# Declarative sentences are true or false in certain situations.
# Definite noun phrases and proper nouns refer to things in the world.

In [None]:
"""
consistent : 여러 개의 문장이 동시에 성립 가능
inconsistent : 여러 개의 문장이 모순 관계

consistency and inconsistency
"""

## 2. Propositional Logic

In [None]:
nltk.boolean_ops()

negation       	-
conjunction    	&
disjunction    	|
implication    	->
equivalence    	<->


In [None]:
read_expr = nltk.sem.Expression.fromstring

In [None]:
read_expr('-(P & Q)')

<NegatedExpression -(P & Q)>

In [None]:
read_expr('P & Q')

<AndExpression (P & Q)>

In [None]:
read_expr('P | (R -> Q)')

<OrExpression (P | (R -> Q))>

In [None]:
read_expr('P <-> -- P')

<IffExpression (P <-> --P)>

In [None]:
lp = nltk.sem.Expression.fromstring
SnF = read_expr('SnF')
NotFnS = read_expr('-FnS')
R = read_expr('SnF -> -FnS')
prover = nltk.Prover9()
prover.prove(NotFnS, [SnF, R])

LookupError: ignored

In [None]:
val = nltk.Valuation([('P', True), ('Q', True), ('R', False)])

In [None]:
val['P']

True

In [None]:
dom = set()
g = nltk.Assignment(dom)

In [None]:
m = nltk.Model(dom, val)

In [None]:
m.evaluate('(P & Q)', g)

True

In [None]:
m.evaluate('-(P & Q)', g)

False

In [None]:
m.evaluate('(P & R)', g)

False

In [None]:
m.evaluate('(P | R)', g)

True

## 3. First-Order Logic

### Syntax

In [None]:
"""
Unary/Binary Predicate

walk(angus) : unary predicate
love(margrietje, brunoke) : binary predicate

###

non-logical constants : 참/거짓을 판단할 수 없는 -> see, angus and bertie
logical constants : 참/거짓을 판단할 수 있는 -> boolean opr

###

e : the type of entities
t : the type of formulas (expression which have truth values)

"""

In [1]:
import nltk

In [2]:
read_expr = nltk.sem.Expression.fromstring
expr = read_expr('walk(angus)', type_check=True)
expr.argument

<ConstantExpression angus>

In [3]:
expr.argument.type

e

In [4]:
expr.function

<ConstantExpression walk>

In [5]:
expr.function.type

<e,?>

In [6]:
sig = {'walk': '<e, t>'}

In [7]:
expr = read_expr('walk(angus)', signature=sig)

In [8]:
expr.function.type

e

In [None]:
# exists x.(dog(x) & disappear(x))
# all x.(dog(x) -> disappear(x))

# ((exists x. dog(x)) -> bark(x))
# all x.((exists x. dog(x)) -> bark(x))

In [9]:
read_expr = nltk.sem.Expression.fromstring
read_expr('dog(cyril)').free()

set()

In [10]:
read_expr('dog(x)').free()

{Variable('x')}

In [11]:
read_expr('own(angus, cyril)').free()

set()

In [12]:
read_expr('exists x.dog(x)').free()

set()

In [13]:
read_expr('all x.dog(x)').free()

set()

In [14]:
read_expr('((some x. walk(x)) -> sing(x))').free()

{Variable('x')}

In [15]:
read_expr('exists x.own(y, x)').free()

{Variable('y')}

### First Order Theorem Proving

In [16]:
# all x. all y.(north_of(x, y) -> -north_of(y, x))

NotFnS = read_expr('-north_of(f, s)')
SnF = read_expr('north_of(s, f)')
R = read_expr('all x. all y. (north_of(x, y) -> -north_of(y, x))')
prover = nltk.Prover9()
prover.prove(NotFnS, [SnF, R])

LookupError: ignored

### Summarizing the Language of First Order Logic

### Truth in Model

In [17]:
dom = {'b', 'o', 'c'}

v = """
bertie => b
olive => o
cyril => c
boy => {b}
girl => {o}
dog => {c}
walk => {o, c}
see => {(b, o), (c, b), (o, c)}
"""

In [18]:
val = nltk.Valuation.fromstring(v)
print(val)

{'bertie': 'b',
 'boy': {('b',)},
 'cyril': 'c',
 'dog': {('c',)},
 'girl': {('o',)},
 'olive': 'o',
 'see': {('o', 'c'), ('b', 'o'), ('c', 'b')},
 'walk': {('o',), ('c',)}}


In [19]:
('o', 'c') in val['see']

True

In [20]:
('b', ) in val['boy']

True

### Individual Variables and Assignments

In [22]:
g = nltk.Assignment(dom, [('x', 'o'), ('y', 'c')])
g

{'x': 'o', 'y': 'c'}

In [23]:
print(g)

g[c/y][o/x]


In [24]:
m = nltk.Model(dom, val)
m.evaluate('see(olive, y)', g)

True

In [25]:
g['y']

'c'

In [26]:
# y -> c, x -> o : see(c, o)가 존재하는가? -> No
# see(o, c)는 존재함
m.evaluate('see(y, x)', g)

False

In [27]:
m.evaluate('see(x, y)', g)

True

In [28]:
g.purge()

In [29]:
g

{}

In [30]:
m.evaluate('see(olive, y)', g)

'Undefined'

In [31]:
m.evaluate('see(bertie, olive) & boy(bertie) & -walk(bertie)', g)

True

### Quantification

In [33]:
m.evaluate('exists x.(girl(x) & walk(x))', g)

True

In [34]:
m.evaluate('girl(x) & walk(x)', g.add('x', 'o'))

True

In [35]:
fmla1 = read_expr('girl(x) | boy(x)')
m.satisfiers(fmla1, 'x', g)

{'b', 'o'}

In [36]:
fmla2 = read_expr('girl(x) -> walk(x)')
m.satisfiers(fmla2, 'x', g)

{'b', 'c', 'o'}

In [37]:
fmla3 = read_expr('walk(x) -> girl(x)')
m.satisfiers(fmla3, 'x', g)

{'b', 'o'}

In [39]:
m.evaluate('all x.(girl(x) -> walk(x))', g)

True

### Quantifier Scope Ambiguity

In [44]:
v2 = """
bruce => b
elspeth => e
julia => j
matthew => m
person => {b, e, j, m}
admire => {(j, b), (b, b), (m, e), (e, m)}
"""
val2 = nltk.Valuation.fromstring(v2)

In [45]:
dom2 = val2.domain
m2 = nltk.Model(dom2, val2)
g2 = nltk.Assignment(dom2)
fmla4 = read_expr('(person(x) -> exists y.(person(y) & admire(x, y)))')
m2.satisfiers(fmla4, 'x', g2)

{'b', 'e', 'j', 'm'}

In [46]:
fmla5 = read_expr('(person(y) & all x.(person(x) -> admire(x, y)))')
m2.satisfiers(fmla5, 'y', g2)

set()

In [47]:
fmla6 = read_expr('(person(y) & all x.((x = bruce | x = julia) -> admire(x, y)))')
m2.satisfiers(fmla6, 'y', g2)

{'b'}

### Model Building

In [48]:
a3 = read_expr('exists x.(man(x) & walk(x))')
c1 = read_expr('mortal(socrates)')
c2 = read_expr('-mortal(socrates)')
mb = nltk.Mace(5)

In [52]:
"""http://www.cs.unm.edu/~mccune/prover9/"""
# mb.build_model(None, [a3, c1]) -> True
# mb.build_model(None, [a3, c2]) -> True
# mb.build_model(None, [c1, c2]) -> False

'http://www.cs.unm.edu/~mccune/prover9/'

In [None]:
a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x, y)))')
a5 = read_expr('man(adam)')
a6 = read_expr('woman(eve)')
g = read_expr('love(adam, eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6])
mc.build_model()

In [None]:
"""
mc.valuation

{'C1': 'b',
 'adam': 'a',
 'eve': 'a',
 'love': {('a', 'b')},
 'man': {('a',)},
 'woman': {('a',), ('b',)}}

"""

In [None]:
# man과 woman은 서로 disjoint set이라는 것을 명시해주어야 adam과 eve가 같은 'a'로 mapping되지 않음
a7 = read_expr('all x. (man(x) -> -woman(x))')

g = read_expr('love(adam, eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6, a7])
mc.build_model()

In [None]:
"""
mc.valuation

{'C1': 'c',
 'adam': 'a',
 'eve': 'b',
 'love': {('a', 'c')},
 'man': {('a',)},
 'woman': {('c',), ('b',)}}

"""

## 4. The Semantics of English Sentences

### Compositional Semantics  in Feature-Based Grammar

In [None]:
# S[SEM=<?vp(?np)>] -> NP[SEM=?np] VP[SEM=?vp]

# VP[SEM=?v] -> IV[SEM=?v]
# NP[SEM=<cyril>] -> 'Cyril'
# IV[SEM=<\x.bark(x)>] -> 'barks'

### The λ-Calculus

In [54]:
read_expr = nltk.sem.Expression.fromstring
expr = read_expr(r'\x.(walk(x) & chew_gum(x))')
expr

<LambdaExpression \x.(walk(x) & chew_gum(x))>

In [55]:
expr.free()

set()

In [57]:
expr = read_expr(r'\x.(walk(x) & chew_gum(x))(gerald)')
print(expr)

\x.(walk(x) & chew_gum(x))(gerald)


In [58]:
print(expr.simplify())

(walk(gerald) & chew_gum(gerald))


In [60]:
print(read_expr(r'\x.\y.(dog(x) & own(y, x))(cyril)').simplify())

\y.(dog(cyril) & own(y,cyril))


In [61]:
print(read_expr(r'\x y.(dog(x) & own(y, x))(cyril, angus)').simplify())

(dog(cyril) & own(angus,cyril))


In [62]:
expr1 = read_expr('exists x.P(x)')
print(expr1)

exists x.P(x)


In [63]:
expr2 = expr1.alpha_convert(nltk.sem.Variable('z'))
print(expr2)

exists z.P(z)


In [64]:
expr1 == expr2

True

In [65]:
expr3 = read_expr('\P.(exists x.P(x))(\y.see(y, x))')
print(expr3)

(\P.exists x.P(x))(\y.see(y,x))


In [66]:
print(expr3.simplify())

exists z1.see(z1,x)


### Quantified NPs

In [None]:
"""
exists x.(dog(x) & bark(x))

\P.exists x.(dog(x) & P(x))

\P.all x.(dog(x) -> P(x))

\Q P.exists x.(Q(x) & P(x))
"""

### Transitive Verbs

In [None]:
"""
Angus chases a dog

\y.exists x.(dog(x) & chase(y, x))

\P.exists x.(dog(x) & P(x))(\z.chase(y, z))
-> exists x.(dog(x) & chase(y, x))

X(\z.cahse(y, z))
\X y.X(\x.chase(y, x))
"""

In [68]:
read_expr = nltk.sem.Expression.fromstring
tvp = read_expr(r'\X x.X(\y.chase(x,y))')
np = read_expr(r'(\P.exists x.(dog(x) & P(x)))')
vp = nltk.sem.ApplicationExpression(tvp, np)
print(vp)

(\X x.X(\y.chase(x,y)))(\P.exists x.(dog(x) & P(x)))


In [69]:
print(vp.simplify())

\x.exists z2.(dog(z2) & chase(x,z2))


In [71]:
nltk.download('book_grammars')

[nltk_data] Downloading package book_grammars to /root/nltk_data...
[nltk_data]   Unzipping grammars/book_grammars.zip.


True

In [72]:
from nltk import load_parser
parser = load_parser('grammars/book_grammars/simple-sem.fcfg', trace=0)
sentence = 'Angus gives a bone to every dog'
tokens = sentence.split()
for tree in parser.parse(tokens):
  print(tree.label()['SEM'])

all z4.(dog(z4) -> exists z3.(bone(z3) & give(angus,z3,z4)))


In [73]:
sents = ['Irene walks', 'Cyril bites an ankle']
grammar_file = 'grammars/book_grammars/simple-sem.fcfg'
for results in nltk.interpret_sents(sents, grammar_file):
  for (synrep, semrep) in results:
    print(synrep)

(S[SEM=<walk(irene)>]
  (NP[-LOC, NUM='sg', SEM=<\P.P(irene)>]
    (PropN[-LOC, NUM='sg', SEM=<\P.P(irene)>] Irene))
  (VP[NUM='sg', SEM=<\x.walk(x)>]
    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
(S[SEM=<exists z5.(ankle(z5) & bite(cyril,z5))>]
  (NP[-LOC, NUM='sg', SEM=<\P.P(cyril)>]
    (PropN[-LOC, NUM='sg', SEM=<\P.P(cyril)>] Cyril))
  (VP[NUM='sg', SEM=<\x.exists z5.(ankle(z5) & bite(x,z5))>]
    (TV[NUM='sg', SEM=<\X x.X(\y.bite(x,y))>, TNS='pres'] bites)
    (NP[NUM='sg', SEM=<\Q.exists x.(ankle(x) & Q(x))>]
      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] an)
      (Nom[NUM='sg', SEM=<\x.ankle(x)>]
        (N[NUM='sg', SEM=<\x.ankle(x)>] ankle)))))


In [74]:
v = """
bertie => b
olive => o
cyril => c
boy => {b}
girl => {o}
dog => {c}
walk => {o, c}
see => {(b, o), (c, b), (o, c)}
"""

In [75]:
val = nltk.Valuation.fromstring(v)
g = nltk.Assignment(val.domain)
m = nltk.Model(val.domain, val)
sent = 'Cyril sees every boy'
grammar_file = 'grammars/book_grammars/simple-sem.fcfg'
results = nltk.evaluate_sents([sent], grammar_file, m, g)[0]
for (syntree, semrep, value) in results:
  print(semrep)
  print(value)

all z6.(boy(z6) -> see(cyril,z6))
True


### Quantifier Ambiguity Revisited

In [76]:
from nltk.sem import cooper_storage as cs
sentence = 'every girl chases a dog'
trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
semrep = trees[0].label()['SEM']
cs_semrep = cs.CooperStore(semrep)
print(cs_semrep.core)

chase(z2,z3)


In [77]:
for bo in cs_semrep.store:
  print(bo)

bo(\P.all x.(girl(x) -> P(x)),z2)
bo(\P.exists x.(dog(x) & P(x)),z3)


In [78]:
cs_semrep.s_retrieve(trace=True)

Permutation 1
   (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z3))
   (\P.exists x.(dog(x) & P(x)))(\z3.all x.(girl(x) -> chase(x,z3)))
Permutation 2
   (\P.exists x.(dog(x) & P(x)))(\z3.chase(z2,z3))
   (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))


In [80]:
for reading in cs_semrep.readings:
  print(reading)

exists x.(dog(x) & all z9.(girl(z9) -> chase(z9,x)))
all x.(girl(x) -> exists z10.(dog(z10) & chase(x,z10)))


## 5. Discoure Semantics

### Discourse Representation Theory

In [81]:
read_dexpr = nltk.sem.DrtExpression.fromstring
drs1 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])')
print(drs1)

([x,y],[angus(x), dog(y), own(x,y)])


In [82]:
drs1.draw()

TclError: ignored

In [83]:
print(drs1.fol())

exists x y.(angus(x) & dog(y) & own(x,y))


In [84]:
drs2 = read_dexpr('([x], [walk(x)]) + ([y], [run(y)])')
print(drs2)

(([x],[walk(x)]) + ([y],[run(y)]))


In [85]:
print(drs2.simplify())

([x,y],[walk(x), run(y)])


In [86]:
drs3 = read_dexpr('([], [(([x], [dog(x)]) -> ([y],[ankle(y), bite(x, y)]))])')
print(drs3.fol())

all x.(dog(x) -> exists y.(ankle(y) & bite(x,y)))


In [87]:
drs4 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])')
drs5 = read_dexpr('([u, z], [PRO(u), irene(z), bite(u, z)])')
drs6 = drs4 + drs5
print(drs6.simplify())

([u,x,y,z],[angus(x), dog(y), own(x,y), PRO(u), irene(z), bite(u,z)])


In [88]:
print(drs6.simplify().resolve_anaphora())

([u,x,y,z],[angus(x), dog(y), own(x,y), (u = [x,y,z]), irene(z), bite(u,z)])


In [89]:
from nltk import load_parser
parser = load_parser('grammars/book_grammars/drt.fcfg', logic_parser=nltk.sem.drt.DrtParser())
trees = list(parser.parse('Angus owns a dog'.split()))
print(trees[0].label()['SEM'].simplify())

([x,z12],[Angus(x), dog(z12), own(x,z12)])


### Discourse Processing

In [None]:
dt = nltk.DiscourseTester(['A student dances', 'Every student is a person'])
dt.readings()

In [None]:
# s0 readings:

# s0-r0: exists x.(student(x) & dance(x))

# s1 readings:

# s1-r0: all x.(student(x) -> person(x))

In [None]:
dt.add_sentence('No person dances', consistchk=True)

# Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
#     s0-r0: exists x.(student(x) & dance(x))
#     s1-r0: all x.(student(x) -> person(x))
#     s2-r0: -exists x.(person(x) & dance(x))

In [None]:
dt.retract_sentence('No person dances', verbose=True)

# Current sentences are
# s0: A student dances
# s1: Every student is a person

In [None]:
dt.add_sentence('A person dances', informchk=True)

# Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
# Not informative relative to thread 'd0'

In [None]:
from nltk.tag import RegexpTagger
tagger = RegexpTagger(
[('^(chases|runs)$', 'VB'),
('^(a)$', 'ex_quant'),
('^(every)$', 'univ_quant'),
('^(dog|boy)$', 'NN'),
('^(He)$', 'PRP')])

rc = nltk.DrtGlueReadingCommand(depparser=nltk.MaltParser(tagger=tagger))
dt = nltk.DiscourseTester(['Every dog chases a boy', 'He runs'], rc)
dt.readings()

# s0 readings:

# s0-r0: ([],[(([x],[dog(x)]) -> ([z3],[boy(z3), chases(x,z3)]))])
# s0-r1: ([z4],[boy(z4), (([x],[dog(x)]) -> ([],[chases(x,z4)]))])

# s1 readings:

# s1-r0: ([x],[PRO(x), runs(x)])

In [None]:
dt.readings(show_thread_readings=True)
# d0: ['s0-r0', 's1-r0'] : INVALID: AnaphoraResolutionException
# d1: ['s0-r1', 's1-r0'] : ([z6,z10],[boy(z6), (([x],[dog(x)]) ->
# ([],[chases(x,z6)])), (z10 = z6), runs(z10)])

In [None]:
dt.readings(show_thread_readings=True, filter=True)
# d1: ['s0-r1', 's1-r0'] : ([z12,z15],[boy(z12), (([x],[dog(x)]) ->
# ([],[chases(x,z12)])), (z17 = z12), runs(z15)])