/
bc-word-transform.pl
executable file
·241 lines (176 loc) · 5.14 KB
/
bc-word-transform.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/bin/perl
# This program uses the scrabble dictionary at scrab.db.94y.info
# NOTE: older versions of this program used /usr/dict/words and ispell
# Changes 'source' to 'target' using any/all of the following transforms:
require "/usr/local/lib/bclib.pl";
# TODO: hardcoding source/target during testing only
($source, $target) = ("BARRY", "CARTER");
($source, $target) = ("WORD", "GAMES");
($source, $target) = ("SALT", "PEPPER");
($source, $target) = ("DAY", "NIGHT");
($source, $target) = ("GESTATION", "BIRTH");
($source, $target) = ($globopts{source}, $globopts{target});
# load entire db into memory (faster?)
@res = sqlite3hashlist("SELECT * FROM words", "/home/barrycarter/BCINFO/sites/DB/scrab.db");
for $i (@res) {
# map word to definition
$worddef{$i->{word}} = $i->{definition};
# anagrams
push(@{$ana{$i->{sig1}}}, $i->{word});
# currently unused
push(@{$lba{$i->{sig2}}}, $i->{word});
}
warn "FORCE DEBUGGING";
$globopts{debug}=1;
@source = ($source);
@target = ($target);
for (;;) {
# first, go forward from source word and reset @source
%swords = word_transforms(@source);
@source = ();
for $i (keys %swords) {
# if weve already reached this word, ignore it
if ($source{$i}) {next;}
# TODO: this seems kludgey somehow
# figure out original word
$oword = $swords{$i};
$oword=~s/:.*$//isg;
# note that source has hit this words + path
$source{$i} = "$source{$oword} $swords{$i}:$i";
# do any match things target has hit?
if ($target{$i}) {
$final = $i; $alldone = 1; last;
}
# add word to source
push(@source, $i);
}
# if not, go backwards from target word and reset target
%twords = word_transforms(@target);
@target = ();
for $i (keys %twords) {
# if weve already reached this word, ignore it
if ($target{$i}) {next;}
# TODO: this seems kludgey AND redundant
$oword = $twords{$i};
$oword=~s/:.*$//isg;
# note that target has hit this word + path
$target{$i} = "$target{$oword} $twords{$i}:$i";
# do any match things target has hit?
if ($source{$i}) {
$final = $i; $alldone = 1; last;
}
# add word to target
push(@target, $i);
}
# TODO: kludgey because Im nested fairly deep
if ($alldone) {last;}
debug("SOURCE",@source);
debug("TARGET",@target);
}
debug("FINAL: $final","SOURCE: $source{$final}", "TARGET: $target{$final}");
=item word_transforms(@words)
Given a list of words, return all one-level transforms of those words
in a hash mapping new word to "old_word:transform_type"
=cut
sub word_transforms {
my(@words) = @_;
my(%words);
my(%ret);
for $word (@words) {
# "superhash" of words and definitions
%{$words{drop}} = word_drop_letter($word);
%{$words{add}} = word_add_letter($word);
%{$words{change}} = word_change_letter($word);
%{$words{anagram}} = word_anagram($word);
# for each type of transform
for $i (keys %words) {
# for each word in type $i transform
for $j (keys %{$words{$i}}) {
# if this word already defined, weve already got path too
# if ($definition{$j}) {next;}
# $definition{$j} = $words{$i}->{$j};
$ret{$j} = "$word:$i";
# $path{$j} = "$path{$word} $word:$i:$j";
# push(@words, $j);
}
}
}
return %ret;
}
=item word_drop_letter($word)
Returns valid words/definitions formed by dropping letter from $word
=cut
sub word_drop_letter {
my($word) = @_;
my(@words);
my(%rethash);
# potential words, quoted
for $i (1..length($word)) {
push(@words, uc(substr($word,0,$i-1).substr($word,$i)));
}
return word_get(@words);
}
=item word_add_letter($word)
Returns valid words/definitions formed by dropping letter to $word
=cut
sub word_add_letter {
my($word) = @_;
my(@words);
# potential words, quoted
for $i (0..length($word)) {
for $j ("a".."z") {
push(@words, uc(substr($word,0,$i).$j.substr($word,$i)));
}
}
# debug("SIZE: $#words");
return word_get(@words);
}
=item word_change_letter($word)
Returns valid words/definitions formed by changing single letter of $word
=cut
sub word_change_letter {
my($word) = @_;
my(@words);
# potential words, quoted
for $i (1..length($word)) {
for $j ("A".."Z") {
# cant change letter for itself, pointless
if (substr($word,$i-1,1) eq $j) {next;}
push(@words, uc(substr($word,0,$i-1).$j.substr($word,$i)));
}
}
# debug("SIZE: $#words");
return word_get(@words);
}
=item word_anagram($word)
Return anagrams of $word, with definitions
=cut
sub word_anagram {
# <h>Oh, my word!</h>
my($word) = @_;
$word = uc($word);
my(%rethash);
# determine sig1 of word (as caps)
my($sig1) = uc(join("",sort(split(//,$word))));
# look through anagram list for this sig
for $i (@{$ana{$sig1}}) {
if ($i eq $word) {next;}
$rethash{$i} = $worddef{$i};
}
return %rethash;
}
=item word_get(@list)
Given @list, a list of potential words (quoted), return those that are actually
words and their definitions.
=cut
sub word_get {
my(@list) = @_;
my(%rethash);
# look in worddef hash (global)
for $i (@list) {
if ($worddef{$i}) {
$rethash{$i} = $worddef{$i};
}
}
return %rethash;
}