/
lettergen2
executable file
·50 lines (39 loc) · 1.27 KB
/
lettergen2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3
from collections import defaultdict
import sys
import re
args = sys.argv[1:]
NTILES = int(args.pop(0))
RE_VALID_WORD = re.compile(r'^([a-z][a-z]){1,%s}$' % NTILES)
RE_REPEATED_PAIR = re.compile(r'''
^(..)*
(?P<letter1>.)(?P<letter2>.)
(?P=letter1)(?P=letter2)
''', re.X)
words = defaultdict(set)
for word in open(args.pop(0)) if args else sys.stdin:
word = word.rstrip('\n')
# discard words with odd length, capitals, apostrophes.
if not RE_VALID_WORD.match(word):
continue
# split the word into letter pairs.
pairs = re.findall(r'.{2}', word)
# discard words with repeated pairs.
if RE_REPEATED_PAIR.match(''.join(sorted(pairs))):
continue
# add the word to each of its letter pair wordsets.
for pair in pairs:
words[pair].add(word)
# work backwards from 676 to NTILES.
# remove the least-damaging letter pair at each step.
damage = lambda pair: len(words[pair])
while len(words) > NTILES:
least_damaging_pair = min(words.keys(), key=damage)
lost_words = words.pop(least_damaging_pair)
for wordset in words.values():
wordset.difference_update(lost_words)
# report the tileset.
print(','.join(sorted(words.keys())))
# report all formable dictionary words.
for word in sorted(set().union(*words.values())):
print(word)