In [1]:
from src.cfg import CFG
from src.cky import CKY

In [2]:
rules = {
    'S': {'AB'},
    'A': {'aAb', ''},
    'B': {'bBc', ''}
}

rules2 = {
    'S': {'AB', 'BC'},
    'A': {'BA', 'a'},
    'B': {'CC', 'b'},
    'C': {'AB', 'a'}
}

prules2 = {
    'S': {('AB', 0.8), ('b', 0.1), ('c', 0.1)},
    'A': {('a', 0.5), ('c', 0.5)},
    'B': {('b', 0.7), ('c', 0.3)},
}

prules = {
    'S': {('AB', 0.9), ('BC', 0.1)},
    'A': {('BA', 0.5), ('a', 0.5)},
    'B': {('CC', 0.7), ('b', 0.3)},
    'C': {('AB', 0.6), ('a', 0.4)}
}
			
cfg = CFG(rules=prules, start_symbol='S', probabilistic=True)


Initial rules:
PCFG(
	S --> AB [0.9] | BC [0.1]
	A --> BA [0.5] | a [0.5]
	B --> CC [0.7] | b [0.3]
	C --> AB [0.6] | a [0.4]
)

* Start Symbol: S
* Terminal Symbols: {a, b}
* Non-Terminal Symbols: {S, A, B, C}



In [3]:
generated_words = cfg.generate_words(10)
generated_words

Number of words generated: 545




[('ab', 0.135),
 ('aaa', 0.05488),
 ('bab', 0.02295),
 ('aaab', 0.019908),
 ('aaaaa', 0.012418559999999999),
 ('aaba', 0.012348000000000001),
 ('ba', 0.012),
 ('baaa', 0.008568000000000001),
 ('aabab', 0.007465499999999998),
 ('aaaaab', 0.0051791039999999995),
 ('aaaaaaa', 0.00370637568),
 ('aaaaba', 0.0034856640000000003),
 ('bbab', 0.0034425),
 ('baaab', 0.0032129999999999997),
 ('aabaaa', 0.002871792),
 ('aaaabab', 0.002193004799999999),
 ('ababa', 0.002079),
 ('baaaaa', 0.00197568),
 ('baaba', 0.0019278000000000003),
 ('abaab', 0.0019278),
 ('aaaaaaab', 0.0016491000960000006),
 ('abaaaa', 0.001495872),
 ('ababab', 0.0013948200000000002),
 ('bbaaa', 0.0012852000000000002),
 ('aaaaaaaaa', 0.0012643719782399994),
 ('aabaaab', 0.0012071052),
 ('baabab', 0.001204875),
 ('aaabaa', 0.0011783520000000001),
 ('aaaaaaba', 0.0011453016960000007),
 ('aabbab', 0.0011198249999999999),
 ('abaa', 0.001008),
 ('aaababa', 0.0009435636000000001),
 ('baaaaab', 0.0009356255999999998),
 ('aaaabaaa', 0.0

In [4]:
cky = CKY(cfg)

In [5]:
for word in generated_words:
    print(cky.parse(word[0]))

(True, 0.135, [('S', 0.02025)])
(True, 0.05487999999999999, [('S', 0.000451584), ('S', 3.2112640000000004e-05)])
(True, 0.02295, [('S', 1.0934999999999998e-05), ('S', 6.834374999999999e-05)])
(True, 0.019908, [('S', 7.715735999999999e-07), ('S', 2.4385536e-07), ('S', 5.689958399999999e-07)])
(True, 0.012418559999999999, [('S', 4.796389124997119e-09), ('S', 4.7963891249971205e-09), ('S', 2.0302176190464e-09), ('S', 1.2688860119039996e-08), ('S', 3.410765599997952e-10), ('S', 3.410765599997953e-10)])
(True, 0.012348000000000001, [('S', 7.715736e-07), ('S', 5.4867456e-08)])
(True, 0.012, [('S', 0.00144)])
(True, 0.008568, [('S', 9.10393344e-08), ('S', 1.524096e-06)])
(True, 0.0074655, [('S', 1.46478425625e-10), ('S', 2.197176384375e-09), ('S', 1.23451776e-10), ('S', 4.166497439999999e-10), ('S', 7.2013536e-10), ('S', 3.645685259999999e-10)])
(True, 0.005179103999999999, [('S', 9.219465541730296e-13), ('S', 6.798881584683412e-13), ('S', 1.0926773975384058e-11), ('S', 1.3008064256409597e-12

In [6]:
words = generated_words

results = []
for word in words:
	result = cky.parse(word[0])
	results.append(result)
	if not cfg.is_probabilistic():
		print(f'{word[0]}: {result}')
	else:
		print(f'{word[0]}: {result[0]} [{result[1]}]')

num_trues = sum([result[0] for result in results])
print(f"\nTrue values: {num_trues}\nFalse values: {len(results) - num_trues}")
	

ab: True [0.135]
aaa: True [0.05487999999999999]
bab: True [0.02295]
aaab: True [0.019908]
aaaaa: True [0.012418559999999999]
aaba: True [0.012348000000000001]
ba: True [0.012]
baaa: True [0.008568]
aabab: True [0.0074655]
aaaaab: True [0.005179103999999999]
aaaaaaa: True [0.0037063756799999995]
aaaaba: True [0.0034856639999999994]
bbab: True [0.0034425]
baaab: True [0.0032129999999999997]
aabaaa: True [0.0028717919999999997]
aaaabab: True [0.0021930047999999995]
ababa: True [0.002079]
baaaaa: True [0.00197568]
baaba: True [0.0019278]
abaab: True [0.0019278000000000001]
aaaaaaab: True [0.0016491000959999997]
abaaaa: True [0.0014958719999999999]
ababab: True [0.00139482]
bbaaa: True [0.0012852]
aaaaaaaaa: True [0.0012643719782399997]
aabaaab: True [0.0012071051999999998]
baabab: True [0.0012048750000000002]
aaabaa: True [0.001178352]
aaaaaaba: True [0.0011453016959999996]
aabbab: True [0.0011198249999999999]
abaa: True [0.001008]
aaababa: True [0.0009435636]
baaaaab: True [0.00093562559