Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
39 lines (27 sloc) 1.3 KB
# encoding: UTF-8
import sys
This simple script runs though the SHsnid.csv BÍN dump provided by
and outputs a list of all general words between 3-6 characters long.
Usage should be something along the lines of:
$ python > dictionary.txt
This produces a file usable by the levelbuilder (
is_char = { u'Á':u'á', u'É':u'é', u'Í':u'í', u'Ó':u'ó', u'Ú':u'ú', u'Ý':u'ý', u'Þ':u'þ', u'Æ':u'æ', u'Ð':u'ð', u'Ö':u'ö' }
def lowercase ( s ):
return ''.join( is_char[c] if c in is_char else c.lower() for c in s )
def filter_bin ( filename ):
words = {}
for line in open( filename, 'r' ):
lemma, id, group, category, word, tag = line.strip().split(';')
word = unicode( word, encoding='UTF-8' )
if len( word ) >= 3 and len( word ) <= 6 and category not in ['ism','örn','göt','fyr','föð','móð','bibl','lönd']:
lc = lowercase( word )
if lc not in words and '-' not in word:
if lc == word:
words[ lc ] = lc
print word.encode('UTF-8')
if __name__ == '__main__':
# Allow a file to be passed as a parameter, but assume user is using BÍN
filter_bin( sys.argv[1] if len(sys.argv) > 1 else 'SHsnid.csv' )