In [1]:
import re
import json
import time

In [194]:
def replace_all(text, dic, ptrn=""):
    if re.findall(ptrn, text):
        for i, j in dic.items():
            text = re.sub(i, j, text)
    return text

In [3]:
acc_words = '[Uu]m(fram|hverfis)|UM(FRAM|HVERFIS)|[Uu]m|UM|[Gg]egnum|GEGNUM|[Kk]ringum|KRINGUM|[Vv]ið|VIÐ|[Íí]|[Áá]'
dat_words = '[Ff]rá|FRÁ|[Aa][ðf]|A[ÐF]|[Áá]samt|ÁSAMT|[Gg]agnvart|GAGNVART|[Gg]egnt?|GEGNT?|[Hh]anda|HANDA|[Hh]já|HJÁ|[Mm]eð( )?fram|MEÐ( )?FRAM|[Mm]óti?|MÓTI?|[Uu]ndan|UNDAN|[Nn]álægt|NÁLÆGT'
gen_words = '[Tt]il|TIL|[Aa]uk|AUK|[Áá]n|ÁN|[Hh]andan|HANDAN|[Ii]nnan|INNAN|[Mm]eðal|MEÐAL|[Mm]egin|MEGIN|[Mm]ill(i|um)|MILL(I|UM)|[Oo]fan|OFAN|[Ss]akir|SAKIR|[Ss]ökum|SÖKUM|[Uu]tan|UTAN|[Vv]egna|VEGNA'
accdat_words = "[Ee]ftir|EFTIR|[Ff]yrir|FYRIR|[Mm]eð|MEÐ|[Uu]ndir|UNDIR|[Vv]ið|VIÐ|[Yy]fir|YFIR"
accgen_words = acc_words + "|" + gen_words
accdat_words_comb = acc_words + "|" + dat_words + "|" + accdat_words
accdatgen_words_comb = accdat_words_comb + "|" + gen_words

amounts = "([Hh]undr[au]ð|HUNDR[AU]Ð|[Þþ]úsund|ÞÚSUND|[Mm]illjón(ir)?|MILLJÓN(IR)?)"
date_ptrn = r"^((([012]?[1-9]|3[01])\. ?)?(jan(úar)?|feb(rúar)?|mars?|apr(íl)?|maí|jú[nl]í?|ág(úst)?|sep(t(ember)?)?|okt(óber)?|nóv(ember)?|des(ember)?) )\d{2,4}$"

In [328]:
weight_ptrn = r"\b(t|[knmµpazy]?gr?|lbs)\.?\b"

def make_weight_dict():
    weight_dict = {"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) t\.?(\W|$)": "\g<1> tonni\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) t\.?(\W|$)": "\g<1> tonns\g<10>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) t\.?(\W|$)": "\g<1> tonnum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " t\.?(\W|$)": "\g<1> \g<11> tonnum\g<13>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) t\.?(\W|$)": "\g<1> tonna\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " t\.?(\W|$)": "\g<1> \g<11> tonna\g<13> ",
                    "(\d|" + amounts + ") t\.?(\W|$)": "\g<1> tonn \g<3>",
                   
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) gr?\.?(\W|$)": "\g<1> grammi\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) gr?\.?(\W|$)": "\g<1> gramms\g<10>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) gr?\.?(\W|$)": "\g<1> grömmum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " gr?\.?(\W|$)": "\g<1> \g<11> grömmum\g<14>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) gr?\.?(\W|$)": "\g<1> gramma\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " gr?\.?(\W|$)": "\g<1> \g<11> gramma\g<14>",
                    "(1 )gr?\.?(\W|$)": "\g<1>gramm\g<2>",
                    "([02-9]|" + amounts + ") gr?\.?(\W|$)": "\g<1> grömm \g<3>"}

    prefix_weight = [("nanó", "n"),
                     ("milli", "m"),
                     ("míkró", "µ"),
                     ("píkó", "p"),
                     ("attó", "a"),
                     ("zeptó", "z"),
                     ("yoktó", "y")]

    base_weight = [("kíló", "kg"),
                   ("pund", "lbs")]

    for prefix, letter in prefix_weight:
        weight_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "g\.?(\W|$)": "\g<1> " + prefix + "grammi\g<10>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "g\.?(\W|$)": "\g<1> " + prefix + "gramms\g<10>"})
        weight_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "g\.?(\W|$)": "\g<1> " + prefix + "grömmum\g<10>"})
        weight_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "g\.?(\W|$)": "\g<1> \g<11> " + prefix + "grömmum\g<14>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "g\.?(\W|$)": "\g<1> " + prefix + "gramma\g<10>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "g\.?(\W|$)": "\g<1> \g<11> " + prefix + "gramma\g<14>"})

        weight_dict.update({"(1 )" + letter + "g\.?(\W|$)": "\g<1>" + prefix + "gramm\g<2>"})
        weight_dict.update({"([02-9]|" + amounts + ") " + letter + "g\.?(\W|$)": "\g<1> " + prefix + "grömm \g<3>"})

    for prefix, letter in prefix_weight[:3]:
        weight_dict.update({"(\W|^)" + letter + "g\.?(\W|$)": "\g<1>" + prefix + "grömm\g<2>"})

    for word, letters in base_weight:
        weight_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letters + "\.?(\W|$)": "\g<1> " + word + "i\g<10>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letters + "\.?(\W|$)": "\g<1> " + word + "s\g<10>"})

        weight_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letters + "\.?(\W|$)": "\g<1> " + word + "um\g<10>"})
        weight_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letters + "\.?(\W|$)": "\g<1> \g<11> " + word + "um\g<13>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letters + "\.?(\W|$)": "\g<1> " + word + "a\g<10>"})
        weight_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letters + "\.?(\W|$)": "\g<1> \g<11> " + word + "a\g<13> "})

        weight_dict.update({"(\W|^)" + letters + "(\W|$)": "\g<1>" + word + "\g<2>"})
    return weight_dict

distance_ptrn = r"[\′\″]|\b([pnµmcsdkN]?m|ft)\.?\b"

def make_distance_dict():

    distance_dict = {"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) (\′|ft\.?)(\W|$)": "\g<1> feti\g<11>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) (\′|ft\.?)(\W|$)": "\g<1> fets\g<11>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) (\′|ft\.?)(\W|$)": "\g<1> fetum\g<11>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " (\′|ft\.?)(\W|$)": "\g<1> \g<11> fetum\g<15>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) (\′|ft\.?)(\W|$)": "\g<1> feta\g<11>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " (\′|ft\.?)(\W|$)": "\g<1> \g<11> feta\g<15> ",
                    "(\d|" + amounts + " )(\′|ft\.?)(\W|$)": "\g<1> fet \g<4>",
                     
                    "((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) \″(\W|$)": "\g<1> tommu\g<11>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) \″(\W|$)": "\g<1> tommum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " \″(\W|$)": "\g<1> \g<11> tommum\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) \″(\W|$)": "\g<1> tomma\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " \″(\W|$)": "\g<1> \g<11> tomma\g<10> ",                    
                    "(1 )\″\.?(\W|$)": "\g<1> tomma\g<2>",
                    "([02-9]|" + amounts + ") \″\.?(\W|$)": "\g<1> tommur\ g<3>",
                     
                    "((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1)) )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1> metra\g<14>",
                    "((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9])) )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1> metra\g<12>",
                    "((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + " )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1> \g<13> metra\g<16>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9])) )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1> metrum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + " )m\.?( (?![kmgyabefstvö]\.)(?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1> \g<11> metrum\g<14>",
                    "(1 )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1>metri\g<2>",
                    "([02-9] )m\.?( (?![kmgyabefstvö]\.)[A-ZÁÐÉÍÓÚÝÞÆÖa-záðéíóúýþæö\d]*(\W|$))": "\g<1>metrar\g<2>"}    

    prefix_meter = [("p", "píkó"),
                    ("n", "nanó"),
                    ("µ", "míkró"),
                    ("m", "milli"),
                    ("[cs]", "senti"),
                    ("d", "desi"),
                    ("k", "kíló"),
                    ("N", "njúton")]

    for letter, prefix in prefix_meter:
        distance_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "m\.?(\W|$)": "\g<1> " + prefix + "metra\g<14>"})
        distance_dict.update({"((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "m\.?(\W|$)": "\g<1> " + prefix + "metra\g<12>"})
        distance_dict.update({"((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "m\.?(\W|$)": "\g<1> \g<13> " + prefix + "metra\g<16>"})
        distance_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "m\.?(\W|$)": "\g<1> " + prefix + "metrum\g<10>"})
        distance_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "m\.?(\W|$)": "\g<1> \g<11> " + prefix + "metrum\g<14>"})
        distance_dict.update({"(1 )" + letter + "m\.?(\W|$)": "\g<1>" + prefix + "metri \g<2>"})
        distance_dict.update({"([02-9]|" + amounts + ") " + letter + "m\.?(\W|$)": "\g<1> " + prefix + "metrar \g<3>"})

    return distance_dict

area_ptrn = r"\b(ha|(f(er)?|rúm)[pnµmcsdk]?m\b\.?)|[pnµmcsdk]?m[²2³3]"

def make_area_dict():
    area_dict = {"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ha\.?(\W|$)": "\g<1> hektara\g<14>",
                "((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ha\.?(\W|$)": "\g<1> hektara\g<12>",
                "((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " ha\.?(\W|$)": "\g<1> \g<13> hektara\g<16>",
                "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ha\.?(\W|$)": "\g<1> hekturum\g<10>",
                "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " ha\.?(\W|$)": "\g<1> \g<11> hekturum\g<14>",
                "(1) ha\.?(\W|$)": "\g<1> hektari\g<2>",
                "([02-9]|" + amounts + ") ha\.?(\W|$)": "\g<1> hektarar \g<3>"}

    dimension_after = [("²", "fer"),
                       ("2", "fer"),
                       ("³", "rúm"),
                       ("3", "rúm")]

    dimension_before = [("f", "fer"),
                        ("fer", "fer"),
                        ("rúm", "rúm")]

    prefix_meter_dimension = [("", ""),
                            ("p", "píkó"),
                            ("n", "nanó"),
                            ("µ", "míkró"),
                            ("m", "milli"),
                            ("[cs]", "senti"),
                            ("d", "desi"),
                            ("k", "kíló")]

    for letter, prefix in prefix_meter_dimension:
        for superscript, dimension in dimension_after:
            area_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "m" + superscript + "(\W|$)": "\g<1> " + dimension + prefix + "metra\g<14>"})
            area_dict.update({"((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "m" + superscript + "(\W|$)": "\g<1> " + dimension + prefix + "metra\g<12>"})
            area_dict.update({"((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "m" + superscript + "(\W|$)": "\g<1> \g<13> " + dimension + prefix + "metra\g<16>"})
            area_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "m" + superscript + "(\W|$)": "\g<1> " + dimension + prefix + "metrum\g<10>"})
            area_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "m" + superscript + "(\W|$)": "\g<1> \g<11> " + dimension + prefix + "metrum\g<14>"})      
            area_dict.update({"(1 )" + letter + "m" + superscript + "(\W|$)": "\g<1>" + dimension + prefix + "metri\g<2>"})
            area_dict.update({"([02-9]|" + amounts + ") " + letter + "m" + superscript + "(\W|$)": "\g<1> " + dimension + prefix + "metrar \g<3>"})

    for letter, prefix in prefix_meter_dimension:
        for preprefix, dimension in dimension_before:
            area_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + preprefix + letter + "m\.?(\W|$)": "\g<1> " + dimension + prefix + "metra\g<14>"})
            area_dict.update({"((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + preprefix + letter + "m\.?(\W|$)": "\g<1> " + dimension + prefix + "metra\g<12>"})
            area_dict.update({"((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + preprefix + letter + "m\.?(\W|$)": "\g<1> \g<13> " + dimension + prefix + "metra\g<16>"})
            area_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + preprefix + letter + "m\.?(\W|$)": "\g<1> " + dimension + prefix + "metrum\g<10>"})
            area_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + preprefix + letter + "m\.?(\W|$)": "\g<1> \g<11> " + dimension + prefix + "metrum\g<14>"})
            area_dict.update({"(1 )" + preprefix + letter + "m\.?(\W|$)": "\g<1>" + dimension + prefix + "metri\g<2>"})
            area_dict.update({"([02-9]|" + amounts + ") " + preprefix + letter + "m\.?(\W|$)": "\g<1> " + dimension + prefix + "metrar \g<3>"})

    return area_dict

volume_ptrn = r"\b[dcmµ]?[Ll]\.?\b"

def make_volume_dict():

    volume_dict = {}

    prefix_liter = [("", ""),
                 ("d", "desi"),
                 ("c", "senti"),
                 ("m", "milli"),
                 ("µ", "míkró")]

    for letter, prefix in prefix_liter:
        volume_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "[Ll]\.?(\W|$)": "\g<1> " + prefix + "lítra\g<14>"})
        volume_dict.update({"((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "[Ll]\.?(\W|$)": "\g<1> " + prefix + "lítra\g<12>"})
        volume_dict.update({"((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "[Ll]\.?(\W|$)": "\g<1> \g<13> " + prefix + "lítra\g<16>"})
        volume_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "[Ll]\.?(\W|$)": "\g<1> " + prefix + "lítrum\g<10>"})
        volume_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "[Ll]\.?(\W|$)": "\g<1> \g<11> " + prefix + "lítrum\g<14>"})
        volume_dict.update({"(1 )" + letter + "[Ll]\.?(\W|$)": "\g<1>" + prefix + "lítri\g<2>"})
        volume_dict.update({"([02-9]|" + amounts + ") " + letter + "[Ll]\.?(\W|$)": "\g<1> " + prefix + "lítrar \g<3>"})

    for letter, prefix in prefix_liter[1:]:
        volume_dict.update({"(\W|^)" + letter + "l\.?(\W|$)": "\g<1>" + prefix + "lítrar \g<2>"})

    return volume_dict

time_ptrn = r"\b(klst|mín|m?s(ek)?)\b"

def make_time_dict():

    time_dict = {"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) klst\.?(\W|$)": "\g<1> klukkustundar\g<10>",
        "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) klst\.?(\W|$)": "\g<1> klukkustundum\g<10>",
        "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " klst\.?(\W|$)": "\g<1> \g<11> klukkustundum\g<14>",
        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) klst\.?(\W|$)": "\g<1> klukkustunda\g<10>",
        "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " klst\.?(\W|$)": "\g<1> \g<11> klukkustunda\g<14>",

        "(1 )klst\.?(\W|$)": "\g<1> klukkustund\g<2>",
        "(\W|^)klst\.?(\W|$)": "\g<1>klukkustundir\g<2>"}

    prefix_time = [("mín()?", "mínút"),
                   ("s(ek)?", "sekúnd"),
                   ("ms(ek)?", "millisekúnd")]

    for letters, prefix in prefix_time:
        time_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letters + "\.?(\W|$)": "\g<1> " + prefix + "u\g<15>"})
        time_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letters + "\.?(\W|$)": "\g<1> " + prefix + "um\g<11>"})
        time_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letters + "\.?(\W|$)": "\g<1> \g<11> " + prefix + "um\g<15>"})
        time_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letters + "\.?(\W|$)": "\g<1> " + prefix + "na\g<11>"})
        time_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letters + "\.?(\W|$)": "\g<1> \g<11> " + prefix + "na\g<15>"})
        time_dict.update({"(1 )" + letters + "\.?(\W|$)": "\g<1>" + prefix + "a\g<2>"})
        time_dict.update({"([02-9]|" + amounts + ") " + letters + "\.?(\W|$)": "\g<1> " + prefix + "ur \g<3>"})

    return time_dict

currency_ptrn = "(\W|^)((ma?\.?)?[Kk]r\.?\-?|C(HF|AD|ZK)|(DK|SE|NO)K|EUR|GBP|I[NS]K|JPY|PTE|(AU|US)D|mlj[óa]\.?)(\W|$)|[$£¥]"

def make_currency_dict():

    currency_dict = {"((\W|^)(" + dat_words + ")) kr\.?\-? ?((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> krónum\g<15>",
                    "((\W|^)(" + gen_words + ")) kr\.?\-? ?((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> króna\g<15>",
                    "(\W|^)[Kk]r\.? ?((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<2> krónur\g<11>",

                    "((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?kr\.?\-?(\W|$)": "\g<1> krónu\g<14>",
                    "((\W|^)(" + accdatgen_words_comb + ")) kr\.?\-? ?((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<10> krónu\g<14>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) kr\.?\-?(\W|$)": "\g<1> krónum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") kr\.?\-?(\W|$)": "\g<1> \g<8>krónum\g<14>",
                    "((\W|^)(" + dat_words + ")) kr\.?\-? ?((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> krónum\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) kr\.?\-?(\W|$)": "\g<1> króna\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") kr\.?\-?(\W|$)": "\g<1> \g<8>króna\g<14>",
                    "((\W|^)(" + gen_words + ")) kr\.?\-? ?((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> króna\g<10>",
                    "(1 ?)kr\.?\-?(\W|$)": "\g<1>króna\g<2>",
                    "([02-9]|" + amounts + ") ?kr\.?\-?(\W|$)": "\g<1> krónur \g<3>",
                    "(\W|^)[Kk]r\.? ?(\d)": "\g<1>\g<2> krónur \g<2>",
        
                    "((\W|^)(" + accgen_words + ")) \$((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<8> dollara\g<17>",
                    "((\W|^)(" + dat_words + ")) \$((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> dollurum\g<15>",
                    "(\W|^)\$((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<2> dollarar\g<11>",                    
                    "((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?\$(\W|$)": "\g<1> dollara\g<14>",
                    "((\W|^)(" + accdatgen_words_comb + ")) \$((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<10> dollara\g<14>",                    
                    "((\W|^)(" + accgen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?\$(\W|$)": "\g<1> dollara\g<12>",
                    "((\W|^)(" + accgen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " ?\$(\W|$)": "\g<1> \g<13> dollara\g<16>",
                    "((\W|^)(" + accgen_words + ")) \$((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<11> dollara\g<12>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?\$(\W|$)": "\g<1> dollurum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " ?\$(\W|$)": "\g<1> \g<11> dollurum\g<14>",
                    "((\W|^)(" + dat_words + ")) \$((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> dollurum\g<10>",
                    "(1 ?)\$(\W|$)": "\g<1> dollari\g<2>",
                    "([02-9]|" + amounts + ") ?\$(\W|$)": "\g<1> dollarar\g<2>",
                    "(\W|^) ?\$((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1>\g<2> dollari\g<6>",
                    "(\W|^) ?\$((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1>\g<2> dollarar\g<6>",  
                    "(\W|^)\$(\W|$)": "\g<1>dollari\g<2>",
    
                    "((\W|^)(" + dat_words + ")) £((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> pundum\g<15>",
                    "((\W|^)(" + gen_words + ")) £((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> punda\g<15>",
                    "(\W|^)£((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + "?)(\W|$)": "\g<1> \g<2> pund\g<11>",
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?£(\W|$)": "\g<1> pundi\g<10>",
                    "((\W|^)(" + dat_words + ")) £((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<6> pundi\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?£(\W|$)": "\g<1> punds\g<10>",
                    "((\W|^)(" + gen_words + ")) £((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<6> punds\g<10>",                    
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?£(\W|$)": "\g<1> pundum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?£(\W|$)": "\g<1> \g<8>pundum\g<14>",
                    "((\W|^)(" + dat_words + ")) £((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> pundum\g<10>",                   
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?£(\W|$)": "\g<1> punda\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?£(\W|$)": "\g<1> \g<8>punda\g<14>",
                    "((\W|^)(" + gen_words + ")) £((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> punda\g<10>",                                       
                    "(\W|^) ?£(((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)(\W|$)": "\g<1>\g<2> pund\g<7>",                  
                    "(\W|^)£(\W|$)": "\g<1>pund\g<2>",

                    "((\W|^)(" + dat_words + ")) ¥((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> japönskum jenum\g<15>",
                    "((\W|^)(" + gen_words + ")) ¥((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> japanskra jena\g<15>",                 
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?¥(\W|$)": "\g<1> japönsku jeni\g<10>",
                    "((\W|^)(" + dat_words + ")) ¥((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<6> japönsku jeni\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?¥(\W|$)": "\g<1> japansks jens\g<10>",
                    "((\W|^)(" + gen_words + ")) ¥((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<6> japansks jens\g<10>",                  
                    "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?¥(\W|$)": "\g<1> japönskum jenum\g<10>",
                    "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?¥(\W|$)": "\g<1> \g<8>japönskum jenum\g<14>",
                    "((\W|^)(" + dat_words + ")) ¥((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9>japönskum jenum\g<10>",
                    "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?¥(\W|$)": "\g<1> japanskra jena\g<10>",
                    "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?¥(\W|$)": "\g<1> \g<8>japanskra jena\g<14>",
                    "((\W|^)(" + gen_words + ")) ¥((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> japanskra jena\g<10>",                    
                    "(1 ?)¥(\W|$)": "\g<1>japanskt jen\g<2>",
                    "([02-9]|" + amounts + ") ?¥(\W|$)": "\g<1> japönsk jen\g<2>",                  
                    "(\W|^) ?¥((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1>\g<2> japanskt jen\g<6>",
                    "(\W|^) ?¥((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1>\g<2> japönsk jen\g<6>",       
                    "(\W|^)¥(\W|$)": "\g<1>japönsk jen\g<2>",
                    "((\W|^)("+ dat_words + ") (\d{1,2}\.)?(\d{3}\.?)*\d+(,\d+)?) þ\.?kr\.?\-?(\W|$)": "\g<1> þúsund krónum\g<9>",
                    "((\W|^)("+ gen_words + ") (\d{1,2}\.)?(\d{3}\.?)*\d+(,\d+)?) þ\.?kr\.?\-?(\W|$)": "\g<1> þúsund króna\g<9>",
                    "(\W|^)þ\.?kr\.?\-?(\W|$)": "\g<1>þúsund krónur\g<2>"}


    currency_list = [("evr", "€"), ("rúpí", "₹"), ("lír", "₤")]

    for word, symbol in currency_list:
        currency_dict.update({"((\W|^)(" + dat_words + ")) " + symbol + "((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> " + word + "um\g<15>"})
        currency_dict.update({"((\W|^)(" + gen_words + ")) " + symbol + "((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<6> " + word + "a\g<15>"})
        currency_dict.update({"(\W|^)" + symbol + "((((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ")(\W|$)": "\g<1> \g<2> " + word + "ur\g<11>"})

        currency_dict.update({"((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?" + symbol + "(\W|$)": "\g<1> " + word + "u\g<14>"})
        currency_dict.update({"((\W|^)(" + accdatgen_words_comb + ")) " + symbol + "((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1> \g<10> " + word + "u\g<14>"})

        currency_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?" + symbol + "(\W|$)": "\g<1> " + word + "um\g<10>"})
        currency_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?" + symbol + "(\W|$)": "\g<1> \g<8>" + word + "um\g<14>"})
        currency_dict.update({"((\W|^)(" + dat_words + ")) " + symbol + "((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> " + word + "um\g<10>"})
        currency_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?" + symbol + "(\W|$)": "\g<1> " + word + "a\g<10>"})
        currency_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)? " + amounts + ") ?" + symbol + "(\W|$)": "\g<1> \g<8>" + word + "a\g<14>"})
        currency_dict.update({"((\W|^)(" + gen_words + ")) " + symbol + "((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1> \g<9> " + word + "a\g<10>"})

        currency_dict.update({"(1 ?)" + symbol + "(\W|$)": "\g<1> " + word + "a\g<2>"})
        currency_dict.update({"([02-9]|" + amounts + ") ?" + symbol + "(\W|$)": "\g<1> " + word + "ur\g<2>"})

        currency_dict.update({"(\W|^) ?" + symbol + "((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))(\W|$)": "\g<1>\g<2> " + word + "a\g<6>"})
        currency_dict.update({"(\W|^) ?" + symbol + "((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))(\W|$)": "\g<1>\g<2> " + word + "ur\g<6>"})

        currency_dict.update({"(\W|^)" + symbol + "(\W|$)": "\g<1>" + word + "ur\g<2>"})
        
    currency_letters = [("ISK", "íslenskar krónur"), ("GBP", "sterlingspund"), ("EUR", "evrur"),
                        ("USD", "bandaríkjadalir"), ("DKK", "danskar krónur"), ("AUD", "ástralskir dalir"),
                        ("JPY", "japönsk jen"), ("CHF", "svissneskir frankar"), ("CAD", "kanadískir dalir"),
                        ("CZK", "tékkneskar krónur"), ("INR", "indverskar rúpíur"), ("SEK", "sænskar krónur"),
                        ("NOK", "norskar krónur"), ("PTE", "portúgalskir skútar")]

    for letters, word in currency_letters:
        currency_dict.update({"(\W|^)" + letters + "(\W|$)": "\g<1>" + word + "\g<2>"})

    million_list = [("m\.?kr\.?\-?", " króna"),
                    ("mljó\.?", "")]

    for letters, suffix in million_list:
        currency_dict.update({"((\W|^)(" + gen_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1) )" + letters + "(\W|$)": "\g<1>milljónar" + suffix + "\g<9>"})
        currency_dict.update({"((\W|^)("+ dat_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d+(,\d*[02-9])) )" + letters + "(\W|$)": "\g<1>milljónum" + suffix + "\g<10>"})
        currency_dict.update({"((\W|^)("+ gen_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d+(,\d*[02-9])) )" + letters + "(\W|$)": "\g<1>milljóna" + suffix + "\g<10>"})
        currency_dict.update({"(1 )" + letters + "(\W|$)": "\g<1>milljón" + suffix + "\g<2>"})
        currency_dict.update({"([02-9] )" + letters + "(\W|$)": "\g<1>milljónir" + suffix + "\g<2>"})


    billion_list = [("ma\.?kr\.?\-?", " króna"),
                    ("mlja\.?", "")]

    for letters, suffix in billion_list:
        currency_dict.update({"((\W|^)(" + acc_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1) )" + letters + "(\W|$)": "\g<1>milljarð" + suffix + "\g<9>"})
        currency_dict.update({"((\W|^)(" + dat_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1) )" + letters + "(\W|$)": "\g<1>milljarði" + suffix + "\g<9>"})
        currency_dict.update({"((\W|^)(" + gen_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1) )" + letters + "(\W|$)": "\g<1>milljarðs" + suffix + "\g<9>"})
        currency_dict.update({"((\W|^)("+ accgen_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d+(,\d*[02-9])) )" + letters + "(\W|$)": "\g<1>milljarða" + suffix + "\g<12>"})
        currency_dict.update({"((\W|^)("+ dat_words + ") (\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d+(,\d*[02-9])) )" + letters + "(\W|$)": "\g<1>milljörðum" + suffix + "\g<10>"})
        currency_dict.update({"(1 )" + letters + "(\W|$)": "\g<1> milljarður" + suffix + "\g<2>"})
        currency_dict.update({"([02-9] )" + letters + "(\W|$)": "\g<1> milljarðar" + suffix + "\g<2>"})

    return currency_dict

electronic_ptrn = r"\b([kMGT]?(V|Hz|B|W|W\.?(st|h)))\.?\b"

def make_electronic_dict():

    electronic_dict = {}

    watt_prefix = [("", ""), ("k", "kíló"), ("M", "Mega"), ("G", "Gíga"), ("T", "Tera")]
    measurement = [('V', 'volt'), ('Hz', 'herz')]

    for letter, prefix in watt_prefix:
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> " + prefix + "vattstundar\g<11>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> " + prefix + "vattstundum\g<11>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> \g<11> " + prefix + "vattstundum\g<15>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> " + prefix + "vattstunda\g<11>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> \g<11> " + prefix + "vattstunda\g<15>"})
        electronic_dict.update({"(1 )" + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> " + prefix + "vattstund\g<3>"})
        electronic_dict.update({"([02-9]|" + amounts + ") " + letter + "[Ww]\.?(st|h)\.?(\W|$)": "\g<1> " + prefix + "vattstundir \g<3>"})
        
        electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "W(\W|$)": "\g<1> " + prefix + "vatti\g<10>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "W(\W|$)": "\g<1> " + prefix + "vatts\g<10>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "W(\W|$)": "\g<1> " + prefix + "vöttum\g<10>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "W(\W|$)": "\g<1> \g<11> " + prefix + "vöttum\g<14>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "W(\W|$)": "\g<1> " + prefix + "vatta\g<10>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "W(\W|$)": "\g<1> \g<11> " + prefix + "vatta\g<14> "})
        electronic_dict.update({"([02-9]|" + amounts + ") " + letter + "W(\W|$)": "\g<1> " + prefix + "vött \g<3>"})
        electronic_dict.update({"(1 )" + letter + "W(\W|$)": "\g<1> " + prefix + "vatt\g<2>"})  

        for symbol, word in measurement:
   
            electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + symbol + "(\W|$)": "\g<1> " + prefix + word + "i\g<10>"})
            electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + symbol + "(\W|$)": "\g<1> " + prefix + word + "s\g<10>"})
            electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + symbol + "(\W|$)": "\g<1> " + prefix + word + "um\g<10>"})
            electronic_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + symbol + "(\W|$)": "\g<1> \g<11> " + prefix + word + "um\g<14>"})
            electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + symbol + "(\W|$)": "\g<1> " + prefix + word + "a\g<10>"})
            electronic_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + symbol + "(\W|$)": "\g<1> \g<11> " + prefix + word + "a\g<14> "})
            electronic_dict.update({"(\d|" + amounts + ") " + letter + symbol + "(\W|$)": "\g<1> " + prefix + word + " \g<3>"})

    for letter, prefix in watt_prefix[1:]:
        electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "B(\W|$)": "\g<1> " + prefix + "bæti\g<10>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) " + letter + "B(\W|$)": "\g<1> " + prefix + "bæts\g<10>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "B(\W|$)": "\g<1> " + prefix + "bætum\g<10>"})
        electronic_dict.update({"((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "B(\W|$)": "\g<1> \g<11> " + prefix + "bætum\g<14>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) " + letter + "B(\W|$)": "\g<1> " + prefix + "bæta\g<10>"})
        electronic_dict.update({"((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " " + letter + "B(\W|$)": "\g<1> \g<11> " + prefix + "bæta\g<14> "})
        electronic_dict.update({"(\d|" + amounts + ") " + letter + "B(\W|$)": "\g<1> " + prefix + "bæt \g<3>"})

    return electronic_dict

rest_ptrn = r"\%|\b(stk|[Kk][Cc]al)\.?\b"

def make_rest_measure_dict():
    rest_measure_dict = {"((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?\%(\W|$)": "\g<1> prósenti\g<10>",
                        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) ?\%(\W|$)": "\g<1> prósents\g<10>",
                        "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?\%(\W|$)": "\g<1> prósentum\g<10>",
                        "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " \%(\W|$)": "\g<1> \g<11> prósentum\g<14>",
                        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) ?\%(\W|$)": "\g<1> prósenta\g<10>",
                        "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " \%(\W|$)": "\g<1> \g<11> prósenta\g<14> ",
                        "\%": " prósent",

                        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) stk\.?(\W|$)": "\g<1> stykkis\g<10>",
                        "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) stk\.?(\W|$)": "\g<1> stykkjum\g<10>",
                        "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " stk\.?(\W|$)": "\g<1> \g<11> stykkjum\g<14>",
                        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) stk\.?(\W|$)": "\g<1> stykkja\g<10>",
                        "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " stk\.?(\W|$)": "\g<1> \g<11> stykkja\g<14> ",
                        "(\W|^)[Ss]tk\.?(\W|$)": "\g<1>stykki\g<2>",

                        "((\W|^)(" + accdatgen_words_comb + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*1|\d,\d*1))) [Kk][Cc]al(\W|$)": "\g<1> kílókaloríu\g<14>",
                        "((\W|^)(" + dat_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) [Kk][Cc]al(\W|$)": "\g<1> kílókaloríum\g<10>",
                        "((\W|^)(" + dat_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " [Kk][Cc]al(\W|$)": "\g<1> \g<11> kílókaloríum\g<14>",
                        "((\W|^)(" + gen_words + ") ((\d{1,2}\.)?(\d{3}\.?)*(\d*[02-9]|\d,\d*[02-9]))) [Kk][Cc]al(\W|$)": "\g<1> kílókaloría\g<10>",
                        "((\W|^)(" + gen_words + ") (((\d{1,2}\.)?(\d{3}\.?)*|\d+)(,\d+)?)?) " + amounts + " [Kk][Cc]al(\W|$)": "\g<1> \g<11> kílókaloría\g<14> ",
                        "(1 )[Kk][Cc]al(\W|$)": "\g<1>kílókaloría\g<2>",
                        "(\W|^)[Kk][Cc]al\.?(\W|$)": "\g<1>kílókaloríur\g<2>"}
     
    return rest_measure_dict


period_ptrn = r"\b(mán(ud)?|þri(ðjud)?|miðvikud|fim(mtud)?|fös(tud)?|lau(gard)|sun(nud)|jan|feb|mar|apr|jú[nl]|ágú?|sept?|okt|nóv|des)\.?|\b(I(II?|V|X)|(V|X|XV)I{1,3}|XI?[VX])\b"
                         
def make_period_dict():
    period_dict = {"(\W|^)mán(ud)?\.?(\W|$)": "\g<1>mánudag\g<3>",
                    "(\W|^)þri(ðjud)?\.?(\W|$)": "\g<1>þriðjudag\g<3>",
                    "(\W|^)miðvikud\.?(\W|$)": "\g<1>miðvikudag\g<2>",
                    "(\W|^)fim(mtud\.?|\.)(\W|$)": "\g<1>fimmtudag\g<3>",
                    "(\W|^)fös(tud)?\.?(\W|$)": "\g<1>föstudag\g<3>",
                    "(\W|^)lau(gard)?\.?(\W|$)": "\g<1>laugardag\g<3>",
                    "(\W|^)sun(nud)?\.?(\W|$)": "\g<1>sunnudag\g<3>",

                    "(\W|^)jan\.?(\W|$)": "\g<1>janúar\g<2>",
                    "(\W|^)feb\.?(\W|$)": "\g<1>febrúar\g<2>",
                    "(\W|^)mar\.?(\W|$)": "\g<1>mars\g<2>",
                    "(\W|^)apr\.?(\W|$)": "\g<1>apríl\g<2>",
                    "(\W|^)jún\.?(\W|$)": "\g<1>júní\g<2>",
                    "(\W|^)júl\.?(\W|$)": "\g<1>júlí\g<2>",
                    "(\W|^)ágú?\.?(\W|$)": "\g<1>ágúst\g<2>",
                    "(\W|^)sept?\.?(\W|$)": "\g<1>september\g<2>",
                    "(\W|^)okt\.?(\W|$)": "\g<1>október\g<2>",
                    "(\W|^)nóv\.?(\W|$)": "\g<1>nóvember\g<2>",
                    "(\W|^)des\.?(\W|$)": "\g<1>desember\g<2>",

                    "(\W|^)II\.?(\W|$)": "\g<1>annar\g<2>",
                    "(\W|^)III\.?(\W|$)": "\g<1>þriðji\g<2>",
                    "(\W|^)IV\.?(\W|$)": "\g<1>fjórði\g<2>",
                    "(\W|^)VI\.?(\W|$)": "\g<1>sjötti\g<2>",
                    "(\W|^)VII\.?(\W|$)": "\g<1>sjöundi\g<2>",
                    "(\W|^)VIII\.?(\W|$)": "\g<1>áttundi\g<2>",
                    "(\W|^)IX\.?(\W|$)": "\g<1>níundi\g<2>",
                    "(\W|^)XI\.?(\W|$)": "\g<1>ellefti\g<2>",
                    "(\W|^)XII\.?(\W|$)": "\g<1>tólfti\g<2>",
                    "(\W|^)XIII\.?(\W|$)": "\g<1>þrettándi\g<2>",
                    "(\W|^)XIV\.?(\W|$)": "\g<1>fjórtándi\g<2>",
                    "(\W|^)XV\.?(\W|$)": "\g<1>fimmtándi\g<2>",
                    "(\W|^)XVI\.?(\W|$)": "\g<1>sextándi\g<2>",
                    "(\W|^)XVII\.?(\W|$)": "\g<1>sautjándi\g<2>",
                    "(\W|^)XVIII\.?(\W|$)": "\g<1>átjándi\g<2>",
                    "(\W|^)XIX\.?(\W|$)": "\g<1>nítjándi\g<2>"}
    
    return period_dict

In [5]:
abbr_dict = json.load(open("norm-dictionaries/abbrdict.txt"))
direction_ptrn = "[SN]?V|N|[SN]?A|S"
direction_dict = json.load(open("norm-dictionaries/directiondict.txt"))
denominator_dict = json.load(open("norm-dictionaries/denominatordict.txt"))

In [34]:
weight_dict = make_weight_dict()
distance_dict = make_distance_dict()
area_dict = make_area_dict()
volume_dict = make_volume_dict()
time_dict = make_time_dict()
currency_dict = make_currency_dict()
electronic_dict = make_electronic_dict()
rest_dict = make_rest_measure_dict()
period_dict = make_period_dict()

In [334]:
t2_string = "SV-átt, 3-8 m/s."

In [335]:
start = time.time()
t2_string = replace_all(t2_string, abbr_dict)
t2_string = replace_all(t2_string, direction_dict, direction_ptrn)
t2_string = replace_all(t2_string, denominator_dict, "\/")
t2_string = replace_all(t2_string, weight_dict, weight_ptrn)
t2_string = replace_all(t2_string, distance_dict, distance_ptrn)
t2_string = replace_all(t2_string, area_dict, area_ptrn)
t2_string = replace_all(t2_string, volume_dict, volume_ptrn)
t2_string = replace_all(t2_string, time_dict, time_ptrn)
t2_string = replace_all(t2_string, currency_dict, currency_ptrn)
t2_string = replace_all(t2_string, electronic_dict, electronic_ptrn)
t2_string = replace_all(t2_string, rest_dict, rest_ptrn) 
t2_string = replace_all(t2_string, period_dict, period_ptrn)
print(time.time() - start)

0.0018696784973144531


In [336]:
t2_string

'suðvestanátt, 3-8 metrar á sekúndu'