In [1]:
from math import sqrt

In [2]:
from functools import lru_cache

In [3]:
keyboard = {0: ['q', 'w', 'e', 'r', 't', 'y', 'u', 'i', 'o', 'p'], 
            1: ['a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l'],
            2: ['z', 'x', 'c', 'v', 'b', 'n', 'm']}

In [4]:
xoffset = [0, 0.25, 0.75]

In [5]:
# Computes real (x, y) coordinates given the keyboard position (xi, yi)
def coordinates(xi, yi):
    return (xi + xoffset[yi], yi)

In [6]:
# Distance to travel to space bar, assuming it only depends on y axis.
def space(yi):
    return 3-yi

In [7]:
key_metrics = {
    letter: (coordinates(xi, yi), space(yi))
    for (yi, row) in keyboard.items()
    for (xi, letter) in enumerate(row)
}

In [8]:
# Returns Euclidean distance between two vectors:
def dist(u, v):
    def vec_sub(u, v):
        return (u[0] - v[0], u[1] - v[1])
    def hypot(v):
        return sqrt(sum(x ** 2 for x in v))
    return hypot(vec_sub(u, v))

In [9]:
# LRU cache ensures that the distance is not recomputed for the
# same pair of letters
@lru_cache(maxsize=None)
def travel(letter1, letter2):
    def position(letter):
        return key_metrics[letter][0]
    return dist(position(letter1), position(letter2))

In [10]:
def distance(word):
    if len(word) == 0:
        return 0

    prevLetter = None    
    dist = 0
    
    for letter in word:
        if prevLetter is not None:
            dist += travel(prevLetter, letter)
        prevLetter = letter
        
        
    dist += (key_metrics[word[0]][1] + key_metrics[word[-1]][1])/2
    return dist * 19.05 # Scale to mm

In [11]:
distance("papal")

703.3426198050789

In [12]:
distance("a")

38.1

In [13]:
word = "papal"

Using word frequency as listed in
https://www.kaggle.com/rtatman/english-word-frequency
download, and unzip to get file unigram_freq.csv
```sh
unzip ~/Downloads/archive.zip
```

In [14]:
filename = "unigram_freq.csv"

In [15]:
word_norm = {}
global total_count
total_count = 0

In [16]:
def register(word, count):
    global total_count
    d = distance(word)    
    word_norm[word] = (d, d *count)
    total_count = total_count + count

In [17]:
firstLine = True
with open(filename, 'r', encoding='UTF-8') as file:
    while line := file.readline().rstrip():
        if not firstLine:
            (word, count) = line.split(",")
            count = int(count)
            register(word, count)
        else:
            firstLine = False

In [18]:
word_items = list(map(lambda wp: (wp[0], (wp[1][0], wp[1][1]/total_count)), word_norm.items()))

In [19]:
word_items.sort(reverse=True, key = lambda p: p[1][1])

In [20]:
word_items

[('the', (152.42188247862651, 5.9960291819576845)),
 ('and', (213.93578412382442, 4.728014549595767)),
 ('of', (140.09602333298795, 3.132900871579079)),
 ('to', (133.35, 2.751912507360592)),
 ('for', (235.34602333298795, 2.3743005668503905)),
 ('that', (256.8287611717123, 1.484764181028167)),
 ('with', (249.56987918077394, 1.3507495853957376)),
 ('is', (158.80668197257134, 1.270655987257749)),
 ('this', (227.69806367961715, 1.2499337559523382)),
 ('in', (83.02933514141957, 1.1956811837505543)),
 ('information', (735.7703442564256, 1.1667183047765384)),
 ('you', (152.4, 0.7763971836847898)),
 ('not', (171.64809418149832, 0.7686013146345245)),
 ('your', (209.55, 0.7347190101887958)),
 ('from', (188.39062568342368, 0.7289290564888206)),
 ('have', (262.84411612854575, 0.6990725346064885)),
 ('or', (152.4, 0.6713356605195756)),
 ('all', (190.5, 0.6550973209732747)),
 ('page', (348.0277252160803, 0.6403551344629932)),
 ('on', (95.4480941814983, 0.6086652010434255)),
 ('about', (284.337568406