Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
96 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# encoding: UTF-8 | ||
import sys | ||
import random | ||
|
||
usage = """ | ||
Use it like this: | ||
$ python build_levels.py dictionary.txt | ||
dictionary is expected to be a UTF-8 encoded file | ||
out will be a line-by-line list of JSON formatted levels as expected by the game | ||
""" | ||
|
||
def permutations ( str ): | ||
"""Generate all possible permutations of a sequence.""" | ||
if len( str ) <= 1: | ||
yield str | ||
else: | ||
for perm in permutations( str[1:] ): | ||
for i in range( len( perm ) + 1 ): | ||
yield perm[:i] + str[0:1] + perm[i:] | ||
|
||
def find_all ( keyword, words ): | ||
"""Return all valid words in a dictionary generatable by any | ||
subset of at least three characters of a given string.""" | ||
perm = set( f for f in permutations( keyword ) ) | ||
candidates = set([ w[:5] for w in perm ]) | ||
candidates.update( set([ w[:4] for w in perm ]) ) | ||
candidates.update( set([ w[:3] for w in perm ]) ) | ||
candidates.update( perm ) | ||
return words.intersection( candidates ) | ||
|
||
|
||
def build_levels( filename ): | ||
# load the dictionary | ||
words = set( open( filename ).read().decode( 'UTF-8' ).strip().split( '\n' ) ) | ||
|
||
# get all 6 letter words, which will be used as "keys" | ||
keywords = [ w for w in words if len(w) == 6 ] | ||
|
||
json_template = u'{ "key":"%s", "words":["%s"] }' | ||
skip = {} | ||
for key in keywords: | ||
if key not in skip: | ||
candidates = find_all( key, words ) | ||
skip.update( dict( (k, 1) for k in candidates if len(k) is 6 ) ) | ||
found = sorted([ w for w in candidates ]) | ||
# only use levels where number of words is >=10 and <=50 | ||
if len(found) >= 10 and len(found) <= 50: | ||
level = json_template % ( key, '","'.join( found ) ) | ||
print level.encode( 'UTF-8' ) | ||
|
||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) < 1: | ||
print usage | ||
sys.exit(2) | ||
build_levels( sys.argv[1] ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# encoding: UTF-8 | ||
import sys | ||
|
||
""" | ||
filter_bin.py | ||
This simple script runs though the SHsnid.csv BÍN dump provided by http://bin.arnastofnun.is/gogn/ | ||
and outputs a list of all general words between 3-6 characters long. | ||
Usage should be something along the lines of: | ||
$ python filter_bin.py > dictionary.txt | ||
This produces a file usable by the levelbuilder (build_levels.py). | ||
""" | ||
|
||
|
||
is_char = { u'Á':u'á', u'É':u'é', u'Í':u'í', u'Ó':u'ó', u'Ú':u'ú', u'Ý':u'ý', u'Þ':u'þ', u'Æ':u'æ', u'Ð':u'ð', u'Ö':u'ö' } | ||
def lowercase ( s ): | ||
return ''.join( is_char[c] if c in is_char else c.lower() for c in s ) | ||
|
||
|
||
def filter_bin ( filename ): | ||
words = {} | ||
for line in open( filename, 'r' ): | ||
lemma, id, group, category, word, tag = line.strip().split(';') | ||
word = unicode( word, encoding='UTF-8' ) | ||
if len( word ) >= 3 and len( word ) <= 6 and category not in ['ism','örn','göt','fyr','föð','móð','bibl','lönd']: | ||
lc = lowercase( word ) | ||
if lc not in words: | ||
if lc == word: | ||
words[ lc ] = lc | ||
print word.encode('UTF-8') | ||
|
||
|
||
if __name__ == '__main__': | ||
# Allow a file to be passed as a parameter, but assume user is using BÍN | ||
filter_bin( sys.argv[1] if len(sys.argv) > 1 else 'SHsnid.csv' ) |