In [6]:
from cfg import CFG
from cky import CKY

In [7]:
rules = {
    'S': {'AB'},
    'A': {'aAb', ''},
    'B': {'bBc', ''}
}
# Non-ambiguous CFG for {a^i · b^j · c^k | j = i + k}
			
cfg = CFG(rules=rules, start_symbol='S')

Initial CFG:
 CFG(
	S --> AB
	A --> aAb | ε
	B --> bBc | ε
)

* Start Symbol: S
* Terminal Symbols: {a, b, c, ε}
* Non-Terminal Symbols: {S, A, B}

Removed start symbol from RHS: CFG(
	C --> S
	S --> AB
	A --> aAb | ε
	B --> bBc | ε
)

* Start Symbol: C
* Terminal Symbols: {a, b, c, ε}
* Non-Terminal Symbols: {C, S, A, B}

Removed rules with nonsolitary terminals: CFG(
	C --> S
	S --> AB
	A --> DAE | ε
	B --> EBF | ε
	D --> a
	E --> b
	F --> c
)

* Start Symbol: C
* Terminal Symbols: {a, b, c, ε}
* Non-Terminal Symbols: {C, S, A, B, D, E, F}

Removed rules with long nonterminals: CFG(
	C --> S
	S --> AB
	A --> GE | ε
	B --> HF | ε
	D --> a
	E --> b
	F --> c
	G --> DA
	H --> EB
)

* Start Symbol: C
* Terminal Symbols: {a, b, c, ε}
* Non-Terminal Symbols: {C, S, A, B, D, E, F, G, H}

Removed epsilon rules: CFG(
	C --> S | ε
	S --> A | AB | B | ε
	A --> GE
	B --> HF
	D --> a
	E --> b
	F --> c
	G --> D | DA
	H --> E | EB
)

* Start Symbol: C
* Terminal Symbols: {a, b, c, ε}
* Non-Terminal 

In [8]:
sorted(cfg.generate_words(10))

Number of words generated: 21


['aaaaabbbbb',
 'aaaabbbb',
 'aaaabbbbbc',
 'aaabbb',
 'aaabbbbbcc',
 'aaabbbbc',
 'aabb',
 'aabbbbbccc',
 'aabbbbcc',
 'aabbbc',
 'ab',
 'abbbbbcccc',
 'abbbbccc',
 'abbbcc',
 'abbc',
 'bbbbbccccc',
 'bbbbcccc',
 'bbbccc',
 'bbcc',
 'bc',
 'ε']

In [9]:
cky = CKY(cfg)

In [10]:
words = list(sorted(cfg.generate_words(10)))

results = []
for word in words:
	result = cky.parse(word)
	results.append(result)
	print(f'{word}: {result}')

print(f"True values: {sum(results)}\nFalse values: {len(results) - sum(results)}")
	

Number of words generated: 21
aaaaabbbbb: True
aaaabbbb: True
aaaabbbbbc: True
aaabbb: True
aaabbbbbcc: True
aaabbbbc: True
aabb: True
aabbbbbccc: True
aabbbbcc: True
aabbbc: True
ab: True
abbbbbcccc: True
abbbbccc: True
abbbcc: True
abbc: True
bbbbbccccc: True
bbbbcccc: True
bbbccc: True
bbcc: True
bc: True
ε: True
True values: 21
False values: 0


## PROBABILISTIC

In [11]:
# Ejemplo de uso:
rules = {
    'S': [('AB', 0.9), ('BC', 0.1)],
    'A': [('BA', 0.5), ('a', 0.5)],
    'B': [('CC', 0.7), ('b', 0.3)],
    'C': [('AB', 0.6), ('a', 0.4)]
}

grammar = PCFG(rules, 'S')
cky = CKY(grammar)

word = "ab"
probability = cky.parse(word)
print(f"The probability of the word '{word}' being in the language generated by the grammar is {probability}")

NameError: name 'PCFG' is not defined