In [1]:
from src.cfg import CFG
from src.cky import CKY

In [2]:
rules = {
    'S': {'AB'},
    'A': {'aAb', ''},
    'B': {'bBc', ''}
}

rules2 = {
    'S': {'AB', 'BC'},
    'A': {'BA', 'a'},
    'B': {'CC', 'b'},
    'C': {'AB', 'a'}
}

prules2 = {
    'S': {('AB', 0.8), ('b', 0.1), ('c', 0.1)},
    'A': {('a', 0.5), ('c', 0.5)},
    'B': {('b', 0.7), ('c', 0.3)},
}

prules = {
    'S': {('AB', 0.9), ('BC', 0.1)},
    'A': {('BA', 0.5), ('a', 0.5)},
    'B': {('CC', 0.7), ('b', 0.3)},
    'C': {('AB', 0.6), ('a', 0.4)}
}
			
cfg = CFG(rules=prules, start_symbol='S', probabilistic=True)


Initial rules:
PCFG(
	S --> AB [0.9] | BC [0.1]
	A --> BA [0.5] | a [0.5]
	B --> CC [0.7] | b [0.3]
	C --> AB [0.6] | a [0.4]
)

* Start Symbol: S
* Terminal Symbols: {a, b}
* Non-Terminal Symbols: {S, A, B, C}



In [3]:
generated_words = cfg.generate_words(10)
generated_words

Number of words generated: 545




[('ab', 0.135),
 ('aaa', 0.05488),
 ('bab', 0.02295),
 ('aaab', 0.019908),
 ('aaaaa', 0.012418559999999999),
 ('aaba', 0.012348000000000001),
 ('ba', 0.012),
 ('baaa', 0.008568000000000001),
 ('aabab', 0.007465499999999998),
 ('aaaaab', 0.0051791039999999995),
 ('aaaaaaa', 0.0037063756799999995),
 ('aaaaba', 0.0034856640000000003),
 ('bbab', 0.0034425),
 ('baaab', 0.0032129999999999997),
 ('aabaaa', 0.0028717919999999997),
 ('aaaabab', 0.0021930048),
 ('ababa', 0.002079),
 ('baaaaa', 0.0019756799999999996),
 ('baaba', 0.0019278000000000003),
 ('abaab', 0.0019278),
 ('aaaaaaab', 0.001649100096),
 ('abaaaa', 0.001495872),
 ('ababab', 0.00139482),
 ('bbaaa', 0.0012852000000000002),
 ('aaaaaaaaa', 0.001264371978239999),
 ('aabaaab', 0.0012071051999999998),
 ('baabab', 0.001204875),
 ('aaabaa', 0.0011783520000000001),
 ('aaaaaaba', 0.001145301696),
 ('aabbab', 0.0011198249999999999),
 ('abaa', 0.001008),
 ('aaababa', 0.0009435636000000001),
 ('baaaaab', 0.0009356255999999997),
 ('aaaabaaa',

In [4]:
cky = CKY(cfg)

In [5]:
for word in generated_words:
    print(word[0], cky.parse(word[0]))

ab (True, 0.135, [(('S', ('A', 'a', 0.5), ('B', 'b', 0.3)), 0.135)])
aaa (True, 0.05487999999999999, [(('S', ('A', 'a', 0.5), (('B', ('C', 'a', 0.4), ('C', 'a', 0.4)), 0.11200000000000002)), 0.05039999999999999), (('S', (('B', ('C', 'a', 0.4), ('C', 'a', 0.4)), 0.11200000000000002), ('C', 'a', 0.4)), 0.0044800000000000005)])
bab (True, 0.02295, [(('S', ('B', 'b', 0.3), (('C', ('A', 'a', 0.5), ('B', 'b', 0.3)), 0.09)), 0.0027), (('S', (('A', ('B', 'b', 0.3), ('A', 'a', 0.5)), 0.075), ('B', 'b', 0.3)), 0.02025)])
aaab (True, 0.019908, [(('S', ('A', 'a', 0.5), (('B', ('C', 'a', 0.4), (('C', ('A', 'a', 0.5), ('B', 'b', 0.3)), 0.09)), 0.025199999999999997)), 0.01134), (('S', (('B', ('C', 'a', 0.4), ('C', 'a', 0.4)), 0.11200000000000002), (('C', ('A', 'a', 0.5), ('B', 'b', 0.3)), 0.09)), 0.001008), (('S', (('A', (('B', ('C', 'a', 0.4), ('C', 'a', 0.4)), 0.11200000000000002), ('A', 'a', 0.5)), 0.027999999999999997), ('B', 'b', 0.3)), 0.00756)])
aaaaa (True, 0.021638399999999995, [(('S', ('A',

In [6]:
words = generated_words

results = []
for word in words:
	result = cky.parse(word[0])
	results.append(result)
	if not cfg.is_probabilistic():
		print(f'{word[0]}: {result}')
	else:
		print(f'{word[0]}: {result[0]} [{result[1]}]')

num_trues = sum([result[0] for result in results])
print(f"\nTrue values: {num_trues}\nFalse values: {len(results) - num_trues}")
	

ab: True [0.135]
aaa: True [0.05487999999999999]
bab: True [0.02295]
aaab: True [0.019908]
aaaaa: True [0.021638399999999995]
aaba: True [0.012348000000000001]
ba: True [0.012]
baaa: True [0.008568]
aabab: True [0.014553]
aaaaab: True [0.013956767999999994]
aaaaaaa: True [0.020391651839999983]
aaaaba: True [0.009017568000000002]
bbab: True [0.0034425]
baaab: True [0.0035909999999999996]
aabaaa: True [0.005517792]
aaaabab: True [0.013671352799999998]
ababa: True [0.002457]
baaaaa: True [0.0036973439999999995]
baaba: True [0.0019278000000000001]
abaab: True [0.0019278000000000001]
aaaaaaab: True [0.016632064512]
abaaaa: True [0.0016369919999999999]
ababab: True [0.0032659200000000003]
bbaaa: True [0.0012851999999999998]
aaaaaaaaa: True [0.027917528002559977]
aabaaab: True [0.0038875031999999984]
baabab: True [0.00269325]
aaabaa: True [0.001629936]
aaaaaaba: True [0.010851224832]
aabbab: True [0.00218295]
abaa: True [0.001008]
aaababa: True [0.004480736400000002]
baaaaab: True [0.00332972