Adding levelbuilding tools.

borgar · Mar 22, 2010 · 6dcd4b0 · 6dcd4b0
1 parent d9c4232
commit 6dcd4b0
Show file tree

Hide file tree

Showing 2 changed files with 96 additions and 0 deletions.
diff --git a/levelbuilder/build_levels.py b/levelbuilder/build_levels.py
@@ -0,0 +1,58 @@
+# encoding: UTF-8
+import sys
+import random
+
+usage = """
+Use it like this:
+
+$ python build_levels.py dictionary.txt
+
+dictionary is expected to be a UTF-8 encoded file
+out will be a line-by-line list of JSON formatted levels as expected by the game
+"""
+
+def permutations ( str ):
+    """Generate all possible permutations of a sequence."""
+    if len( str ) <= 1:
+        yield str
+    else:
+        for perm in permutations( str[1:] ):
+            for i in range( len( perm ) + 1 ):
+                yield perm[:i] + str[0:1] + perm[i:]
+
+def find_all ( keyword, words ):
+    """Return all valid words in a dictionary generatable by any
+       subset of at least three characters of a given string."""
+    perm = set( f for f in permutations( keyword ) )
+    candidates = set([ w[:5] for w in perm ]) 
+    candidates.update( set([ w[:4] for w in perm ]) ) 
+    candidates.update( set([ w[:3] for w in perm ]) ) 
+    candidates.update( perm )
+    return words.intersection( candidates )
+
+
+def build_levels( filename ):
+    # load the dictionary
+    words = set( open( filename ).read().decode( 'UTF-8' ).strip().split( '\n' ) )
+
+    # get all 6 letter words, which will be used as "keys"
+    keywords = [ w for w in words if len(w) == 6 ]
+
+    json_template = u'{ "key":"%s", "words":["%s"] }'
+    skip = {}
+    for key in keywords:
+        if key not in skip:
+            candidates = find_all( key, words )
+            skip.update( dict( (k, 1) for k in candidates if len(k) is 6 ) )
+            found = sorted([ w for w in candidates ])
+            # only use levels where number of words is >=10 and <=50
+            if len(found) >= 10 and len(found) <= 50:
+                level = json_template % ( key, '","'.join( found ) )
+                print level.encode( 'UTF-8' )
+
+
+if __name__ == '__main__':    
+    if len(sys.argv) < 1:
+        print usage
+        sys.exit(2)
+    build_levels( sys.argv[1] )
diff --git a/levelbuilder/filter_bin.py b/levelbuilder/filter_bin.py
@@ -0,0 +1,38 @@
+# encoding: UTF-8
+import sys
+
+"""
+filter_bin.py
+
+This simple script runs though the SHsnid.csv BÍN dump provided by http://bin.arnastofnun.is/gogn/
+and outputs a list of all general words between 3-6 characters long.
+
+Usage should be something along the lines of:
+
+$ python filter_bin.py > dictionary.txt
+
+This produces a file usable by the levelbuilder (build_levels.py).
+"""
+
+
+is_char = { u'Á':u'á', u'É':u'é', u'Í':u'í', u'Ó':u'ó', u'Ú':u'ú', u'Ý':u'ý', u'Þ':u'þ', u'Æ':u'æ', u'Ð':u'ð', u'Ö':u'ö' }
+def lowercase ( s ):
+    return ''.join( is_char[c] if c in is_char else c.lower() for c in s )
+
+
+def filter_bin ( filename ):
+    words = {}
+    for line in open( filename, 'r' ):
+        lemma, id, group, category, word, tag = line.strip().split(';')
+        word = unicode( word, encoding='UTF-8' )
+        if len( word ) >= 3 and len( word ) <= 6 and category not in ['ism','örn','göt','fyr','föð','móð','bibl','lönd']:
+            lc = lowercase( word )
+            if lc not in words:
+                if lc == word:
+                    words[ lc ] = lc
+                    print word.encode('UTF-8')
+
+
+if __name__ == '__main__':
+    # Allow a file to be passed as a parameter, but assume user is using BÍN
+    filter_bin( sys.argv[1] if len(sys.argv) > 1 else 'SHsnid.csv' )