In [1]:
# -*- coding: utf-8 -*-
from read_input import * 
import numpy as np
%matplotlib inline
PYTHONIOENCODING="utf-8"

level_cost = {u'':0, u'Shift':1, u'Alt':2, u'Alt_Shift':3}
w_P = 0.25
w_A = 0.25
w_F = 0.25
w_E = 0.25
#Read in model
print "read in: characters, keyslots and letters"
azerty = get_azerty()
letters = get_letters()
characters = get_characters()
keyslots = get_keyslots()

print "read in: similarity values"
similarity_c_c = get_character_similarities()
similarity_c_l = get_character_letter_similarities()

print "read in: distance values"    
distance_level_0, distance_level_1 = get_distances(level_cost)

#read in  probabilities
print "read in: probability values" 
p_single, p_bigram = get_probabilities()
print "read in: ergonomics, performance" 
ergonomics = get_ergonomics()
performance = get_performance()

print "Done reading input values."

read in: characters, keyslots and letters
read in: similarity values
read in: distance values
read in: probability values
´d composes í
´d composes á
^d composes ê
^d composes ô
^d composes û
^d composes î
^d composes â
¨d composes ï
¨d composes ö
¨d composes ü
¨d composes ë
read in: ergonomics, performance
Done reading input values.


In [2]:
# 1. compute the linear cost
#for each linear variable x[c,s] compute the P, A, F and E term (if it is chosen)
x_P = {} 
x_A = {} 
x_F = {} 
x_E = {} 

for c in characters: 
    for s in keyslots: 
        P=0
        A=0
        #if that character was previously not on azerty, distance is 0.
        F = p_single[c] * distance_level_1.get((s, azerty.get(c,"NaN")),0)
        E=0
        for l in letters:
            #update performance
            if (c,l) in p_bigram:
                P += (p_bigram[(c,l)]*performance[(s,azerty[l])]) 
            if (l,c) in p_bigram:
                P += (p_bigram[(l,c)]*performance[(azerty[l],s)])            
            #update association
            if (c,l) in similarity_c_l:
                A += (p_single[c] + p_single[l])*similarity_c_l[(c,l)]*distance_level_0[s,azerty[l]]    
            #update ergonomics
            if (c,l) in p_bigram:                
                E += (p_bigram[(c,l)]*ergonomics[(s,azerty[l])])
            if (l,c) in p_bigram:
                E += (p_bigram[(l,c)]*ergonomics[(azerty[l],s)])
        x_P[c,s] = P
        x_A[c,s] = A
        x_F[c,s] = F
        x_E[c,s] = E
#now normalize these terms such that they are all between 0 and 1
def normalize_dict_values(d):
    maximum = np.max(d.values())
    minimum = np.min(d.values())
    
    for k, v in d.iteritems():
        d[k] = v / float(maximum - minimum)
    return d

x_P = normalize_dict_values(x_P)
x_A = normalize_dict_values(x_A)
x_F = normalize_dict_values(x_F)
x_E = normalize_dict_values(x_E)
    
#weighted sum of linear terms
linear_cost = {}
for c in characters: 
    for s in keyslots:
        linear_cost[c,s] = w_P * x_P[c,s] + w_A*x_A[c,s] + w_F*x_F[c,s] + w_E*x_E[c,s]         

In [4]:
#Writes an input file for the reformualtion of the quadratic term
f = codecs.open("reformulation_input.txt", 'w', encoding="utf-8")
f.write("# number of letters and keys\n")
f.write(str(len(keyslots))+"\n")
f.write("# w_A*probabilities*similarities\n")
for c1 in characters:
    prob_strings = []
    for c2 in characters:
        if(c1,c2) in similarity_c_c.keys():
            #Don#t forget the weighting
            p = w_A*(p_single[c1] + p_single[c2])*similarity_c_c[c1,c2]
            prob_strings.append("%f"%p)
        else:
            prob_strings.append("0")
    #add dummy values to fill it up to number of keyslots
    for i in range(len(keyslots) - len(characters)):
        prob_strings.append("0")
    f.write(" ".join(prob_strings) + "\n")
#add dummy values to fill it up to number of keyslots
for i in range(len(keyslots) - len(characters)):
    prob_strings = []
    for c2 in characters:
        prob_strings.append("0")
    #add dummy values to fill it up tp number of keyslots
    for i in range(len(keyslots) - len(characters)):
        prob_strings.append("0")
    f.write(" ".join(prob_strings) + "\n")

#write the w_A weighted distances with linear cost added on diagonal
f.write("# distances\n")
distances = distance_level_0

for s1 in keyslots:
    dist_strings = []
    for s2 in keyslots:        
        d = distances[(s1,s2)]
        dist_strings.append("%f"%d) 
        
    f.write(" ".join(dist_strings) + "\n")

f.write("# fixation of the spacebar to the bottom\n")
f.write("0\n")
f.write("# scale for rounding down the probabilities\n")
f.write("1e6")
f.write("# distances\n")
distances = distance_level_0

for c in characters:
    lin_strings = []
    for s in keyslots:        
        l = linear_cost[(c,s)]
        lin_strings.append("%f"%l) 
        
    f.write(" ".join(lin_strings) + "\n")
#add dummy values to fill it up to number of keyslots
for i in range(len(keyslots) - len(characters)):
    lin_strings = []
    for s in keyslots:
        lin_strings.append("0")    
    f.write(" ".join(lin_strings) + "\n")
    
f.close()
print "Done."

Done.


In [2]:
p_bigram[u'´d', u'z']

5.0065385393323679e-07

In [19]:
len(keyslots)*len(keyslots)

16900

In [20]:
len(characters)

72

In [21]:
72*72

5184

In [4]:
from read_input import * 
get_ergonomics()[u'D01', u'E00']

0.3

read in: characters, keyslots and letters
