Skip to content
Newer
Older
100755 287 lines (246 sloc) 8.19 KB
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
1 #!/usr/bin/perl
e3657b1 @alexfink sundry naming and describing improvements
authored Feb 20, 2011
2 # Generate random phonologies, featurally and via ordered rules with persistence,
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
3 # with allophony and the potential for good morphophonology and the works. (Getting there!)
4 # Alex Fink, January 2010 -- present.
5 # Thanks to Marcus Smith <http://smithma.bol.ucla.edu/> for unwitting inspiration,
6 # and Marcus and UPSID for being proximal sources for various numbers.
8f3ef15 @alexfink better structuring rule objects
authored Nov 4, 2011
7 # (A much greater proportion of the numbers are wholly fabricated, though!)
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
8
c01b382 @alexfink lumping of too-similar syllable positions
authored Oct 22, 2011
9 # What next? Should we privilege
e3657b1 @alexfink sundry naming and describing improvements
authored Feb 20, 2011
10 # (a) advanced inventory tracking, with the bigram transition matrix stuff; or
11 # (b) new phonology? (long-distance rules; syllable tracking > moraic stuff)
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
12
13 use strict;
14 use YAML::Any;
15 use CGI;
16
eac5632 @alexfink packages to separate files
authored Oct 16, 2011
17 use FeatureSystem;
25a5ed3 @alexfink PhoneSet to own file
authored Nov 14, 2011
18 use PhoneSet;
eac5632 @alexfink packages to separate files
authored Oct 17, 2011
19 use PhonologicalRule;
20 use Phonology;
a43dc42 @alexfink separate tracking of the current inventory from describing
authored Dec 18, 2011
21 use PhonologySynchronicState;
eac5632 @alexfink packages to separate files
authored Oct 17, 2011
22 use Transcription;
23 use PhonologyDescriber;
24
47f13f7 @alexfink fewer false positives in rule conflict detection
authored Nov 21, 2011
25 my $version = '0.3.1b';
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
26 my $credits = 'Gleb, a phonology generator, by Alex Fink' .
27 (' ' x (29 - length($version))) . # for a total length of 78
28 "version $version";
29
30 my $verbose;
0fc5796 @alexfink slightly friendlier usage message
authored Jan 17, 2012
31 my $show_seed;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
32 my $use_html;
33 my $CGI;
6af9159 @alexfink several rule description fixes
authored May 26, 2011
34 my $seed = time ^ $$ ^ $$<<15;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
35
36
37
38 my $outfile;
0fb52d8 @alexfink deglobalise feature system
authored Oct 16, 2011
39 my $annotate_output;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
40 my $infile;
41 my $show_inventory;
42 my $show_all;
43 my $num_words = 0;
44 my $phone_to_interpret;
45 my $canonicalise;
46
47 sub die_with_usage {
48 print STDERR <<USAGE;
49 $credits
50
51 Usage: $0 [options]
52
53 -I Produce a segmental inventory, with frequencies of appearance
54 in each syllable position.
55 -d Produce English descriptions of the phonology's rules, etc.
56 -w N Generate N random words.
57 -c When generating random words, also compute canonical phonemic
58 representations, which don't require unnecessary rules.
59 -h Use HTML.
0fc5796 @alexfink slightly friendlier usage message
authored Jan 17, 2012
60
61 -o <filename> Phonology output file. Defaults to no output. The output is a
62 YAML-formatted, not human-friendly, collection of the data
63 needed to run the phonology generator.
64 -O <filename> As above, with a little extra annotation for
65 like translations of the internal phone notation.
66 (Still not human-friendly.)
67 -i <filename> Input the phonology from the named file, rather than generating
68 a new one.
69
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
70 -r N Use N as the random seed.
71 -v Verbose. Show progress and a few other things.
8ba3e96 @alexfink more rule description and tabulation fixes
authored May 30, 2011
72 -D Show some debugging output.
0fc5796 @alexfink slightly friendlier usage message
authored Jan 17, 2012
73 -p <string> Do some conversions between phone formats. Do nothing else.
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
74
75 USAGE
76 exit 1;
77 }
78
79 sub parse_args {
80 my $arg;
81 while ($arg = shift) {
82 if ($arg eq '-D') {
fcf0cb6 @alexfink putting Phonology debug variables where they belong
authored Oct 19, 2011
83 $Phonology::debug++;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
84 }
97f98cd @alexfink fixes to conflicts
authored Nov 12, 2011
85 elsif ($arg eq '--noprune') {
86 $Phonology::noprune++;
87 }
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
88 elsif ($arg eq '-r') {
89 $seed = shift;
90 die "-r expects an integer argument\n" if !defined $seed or ($seed !~ /^\-?[0-9]+$/);
91 }
92 elsif ($arg =~ /^-[oO]$/) {
93 $outfile = shift;
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
94 $annotate_output = 1 if $arg eq '-O';
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
95 die "$arg expects a filename argument\n" if !defined $outfile;
96 }
97 elsif ($arg eq '-i') {
98 $infile = shift;
99 die "-i expects a filename argument\n" if !defined $infile;
100 }
101 elsif ($arg eq '-I') {
102 $show_inventory = 1;
103 }
104 elsif ($arg eq '-d') {
105 $show_inventory = $show_all = 1;
106 }
107 elsif ($arg eq '-v') {
fcf0cb6 @alexfink putting Phonology debug variables where they belong
authored Oct 19, 2011
108 $verbose = $Phonology::verbose = 1;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
109 }
0fc5796 @alexfink slightly friendlier usage message
authored Jan 17, 2012
110 elsif ($arg eq '--showseed') {
111 $show_seed = 1;
112 }
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
113 elsif ($arg eq '-w') {
114 $num_words = shift;
115 die "-w expects an integer argument\n" if !defined $num_words or ($num_words !~ /^\-?[0-9]+$/);
116 }
117 elsif ($arg eq '-p') {
118 $phone_to_interpret = shift;
119 die "-p expects an argument\n" if !defined $phone_to_interpret;
120 }
121 elsif ($arg eq '-c') {
122 $canonicalise = 1;
123 }
124 elsif ($arg eq '-h') {
125 $use_html = 1;
126 }
127 else {
128 die_with_usage;
129 }
130 }
131 }
132
133 my $yamlimpl = YAML::Any->implementation;
fd2e35f @alexfink I've also observed YAML::Perl to work
authored Jun 6, 2011
134 unless (grep $_ eq $yamlimpl, qw(YAML::XS YAML::Syck YAML::Perl)) {
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
135 print STDERR <<END;
7d21824 @alexfink more compliant indentation in the YAML data files
authored Jun 3, 2011
136 Warning: your YAML implementation might not like the data files.
fd2e35f @alexfink I've also observed YAML::Perl to work
authored Jun 7, 2011
137 YAML::Syck and YAML::XS work. So does YAML::Perl, though it's mighty slow.
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
138 END
139 }
140
141 if ($0 =~ /\.cgi$/) {
142 $CGI = CGI->new;
143
144 $use_html = 1;
145 # lazily not taking genuine query arguments for now
146 $show_inventory = $show_all = 1;
147 $num_words = 30;
148
149 print $CGI->header;
150 }
151
152 parse_args @ARGV;
153
154 if ($use_html) {
155 my $style = <<END; # put everything in an IPAish font for now
156 * {
157 font-family: Gentium, 'Doulos SIL', 'Lucida Grande', serif;
158 }
159 END
160 print CGI::start_html(-title => 'Random phonology',
7fff1da @alexfink package PhonologyDescriber
authored Oct 15, 2011
161 -style => {-code => $style});
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
162 }
163 else {
164 $" = $, = ", ";
165 }
166
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
167 die 'feature system not found' unless -f 'features.yml';
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
168 my $FS = FeatureSystem::load_file('features.yml');
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
169
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
170 my $phonetic_alphabet;
f989fc3 @alexfink package Transcription
authored Oct 15, 2011
171 if ($use_html && -f 'IPA_HTML.yml') {
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
172 $phonetic_alphabet = Transcription::load_file('IPA_HTML.yml', $FS);
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
173 } elsif (-f 'CXS.yml') {
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
174 $phonetic_alphabet = Transcription::load_file('CXS.yml', $FS);
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
175 } else {
176 die 'no suitable phonetic alphabet found';
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
177 }
fcf0cb6 @alexfink putting Phonology debug variables where they belong
authored Oct 19, 2011
178 $Phonology::debug_alphabet = Transcription::load_file('CXS.yml', $FS) if -f 'CXS.yml';
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
179
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
180 if (defined $phone_to_interpret) {
db6046b @alexfink initial extraction of package FeatureSystem.
authored Sep 17, 2011
181 $phone_to_interpret = $FS->parse($phone_to_interpret, undefined => 1) unless $phone_to_interpret =~ /^[.01u]*$/;
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
182 print '[' . $phonetic_alphabet->name_phone($phone_to_interpret) . '] ' . $FS->feature_string($phone_to_interpret);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
183 $phone_to_interpret =~ /[01]/g;
184 print ' ' . (pos($phone_to_interpret) - 1) if defined pos($phone_to_interpret);
185 print "\n";
186 exit 0;
187 }
188
0fc5796 @alexfink slightly friendlier usage message
authored Jan 17, 2012
189 print STDERR "seed $seed\n" if $verbose or $show_seed;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
190 srand $seed;
191
4d09eac @alexfink partial extraction of package Phonology
authored Sep 16, 2011
192 my $pd;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
193
194 if (defined $infile) {
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
195 $pd = Phonology::load_file($infile, $FS);
4d09eac @alexfink partial extraction of package Phonology
authored Sep 17, 2011
196 } else {
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
197 $pd = Phonology::generate($FS);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
198 }
199
200 if (defined $outfile) {
0fb52d8 @alexfink deglobalise feature system
authored Oct 17, 2011
201 $pd->dump_file($outfile, $annotate_output);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
202 }
203
eac5632 @alexfink packages to separate files
authored Oct 17, 2011
204 my $pdes = PhonologyDescriber::new($phonetic_alphabet, YAML::Any::LoadFile('phon_descr.yml'), $use_html);
04b33ff @alexfink making package PhonologicalRule
authored Oct 5, 2011
205
206 if ($show_inventory) {
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
207 print $pdes->describe_inventory($pd, html => $use_html);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
208 }
209
210 if ($show_all) {
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
211 $pdes->tabulate($pd, annotate_only => 1); # should this be given a name of its own?
212 my ($template, $elaborations) = $pdes->describe_syllable_structure($pd, html => $use_html);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
213 if ($use_html) {
214 print CGI::h2('Syllable structure'),
215 CGI::p(join '', @$template),
216 CGI::p(join '<br />', @$elaborations);
217 } else {
218 print "\nSyllable structure: " . join('', @$template) . "\n"; # not well formatted at present, eh
219 print join "\n", @$elaborations;
220 print "\n\n";
221 }
222
8ba3e96 @alexfink more rule description and tabulation fixes
authored May 30, 2011
223 print STDERR "describing rules...\n" if $verbose;
bc98c70 @alexfink deglobalising describer data
authored Oct 16, 2011
224 my $rules = $pdes->describe_rules($pd);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
225 if ($use_html) {
226 print CGI::h2('Allophony'),
227 CGI::ul(CGI::li($rules));
228 } else {
229 print "Allophony:\n";
230 print join "\n", @$rules;
231 print "\n\n";
232 }
233 }
234
235 if ($use_html and $num_words > 0) {
236 print CGI::h2('Some words'),
237 CGI::start_table();
238 }
a43dc42 @alexfink separate tracking of the current inventory from describing
authored Dec 19, 2011
239 print STDERR "generating sample words...\n" if $verbose;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
240 for (1..$num_words) {
ae6b19c @alexfink generate_form into Phonology
authored Oct 4, 2011
241 my $word = $pd->generate_form(12); # magic entropy value
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
242 my $surface_word;
243 my $generated_word = [@$word];
244 if (defined $canonicalise) {
ae6b19c @alexfink generate_form into Phonology
authored Oct 5, 2011
245 ($surface_word, $word) = $pd->canonicalise_phonemic_form($generated_word);
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
246 } else {
247 $surface_word = [@$word];
b968149 @alexfink run is a Phonology method
authored Sep 22, 2011
248 $pd->run($surface_word, start => $pd->{start_sequences});
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
249 }
250
251 if ($use_html) {
252 print '<tr>';
253 print "<td>//",
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
254 $phonetic_alphabet->spell($generated_word),
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
255 "//</td>"
256 if defined $canonicalise;
257 print "<td>/",
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
258 $phonetic_alphabet->spell($word),
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
259 "/</td><td>[",
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
260 $phonetic_alphabet->spell($surface_word),
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
261 "]</td></tr>\n";
262 } else {
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
263 print "//" . $phonetic_alphabet->spell($generated_word). "//\t" if defined $canonicalise;
264 print "/" . $phonetic_alphabet->spell($word) . "/\t[" . $phonetic_alphabet->spell($surface_word) . "]\n";
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
265 for my $phone (@$surface_word) {
f989fc3 @alexfink package Transcription
authored Oct 16, 2011
266 $_ = $phonetic_alphabet->name_phone($phone);
04e3820 @alexfink fix add_requirements. runnable now
authored Sep 18, 2011
267 print $FS->feature_string($phone), "\n" if /\#\#/;
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
268 }
269 }
270 }
271
272 if ($use_html and $num_words > 0) {
273 print CGI::end_table();
274 }
275
276 if ($use_html) {
277 print CGI::p({-style => 'font-size: small;'},
1038530 @alexfink update link produced in the html phonologies
authored Jun 20, 2011
278 "Generated by <a href=\"https://github.com/alexfink/random_language/tree/master/phonology\">Gleb</a>",
e636dce @alexfink initial commit of gleb
authored Dec 28, 2010
279 "version $version / $FS->{version} ",
280 $infile ? "from the file $infile." : "with seed $seed.");
281 print CGI::end_html;
282 }
283
284
285
286
Something went wrong with that request. Please try again.