<a href="https://colab.research.google.com/github/eunbi2000/Virtual-Keyboard-Decoder/blob/main/Virtual_Keyboard_Decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import csv
import math
import pickle
import random

# **Read Inputs**

In [None]:
# Read user data
words = []
word = []
correct = ""
correct_list = []
with open('Data/data.txt', 'r') as input:
    lines = input.readlines()
    for line in lines:
        if line[0] != '=':
            line_split = line.split()
            word.append([line_split[0],float(line_split[1]), float(line_split[2])])
            correct += line_split[0]
        else:
            words.append(word)
            correct_list.append(correct)
            correct = ""
            word = []
words.append(word)
words = words[1:]
correct_list = correct_list[1:]

words
#correct_list

[[['i', 25.258852, 7.752883], ['f', 13.50101, 11.854456]],
 [['a', 4.9218874, 13.221646], ['t', 16.132853, 7.0692873]],
 [['f', 13.671909, 12.948208],
  ['i', 25.942448, 7.70731],
  ['r', 12.03128, 7.160434],
  ['s', 7.48537, 13.540659],
  ['t', 15.346718, 7.3427258]],
 [['y', 20.91802, 6.65913],
  ['o', 29.633863, 8.071894],
  ['u', 23.754942, 8.61877]],
 [['f', 15.244179, 12.67477],
  ['a', 4.648449, 13.084928],
  ['i', 25.703188, 7.4794445],
  ['l', 31.103592, 13.221646]],
 [['w', 5.6396623, 8.482051], ['e', 8.784202, 7.6617365]],
 [['r', 12.03128, 7.6161637],
  ['u', 24.711975, 7.7984557],
  ['n', 24.575256, 18.189108]],
 [['t', 15.551796, 8.390905],
  ['h', 21.054739, 13.768523],
  ['e', 9.262718, 8.254187]],
 [['r', 13.159212, 8.61877],
  ['i', 26.831121, 7.3427258],
  ['s', 7.3144712, 12.993782],
  ['k', 27.719795, 12.629196]],
 [['o', 29.97566, 6.385692], ['f', 14.458044, 13.039353]],
 [['f', 14.458044, 11.5810175],
  ['a', 5.4345837, 12.857063],
  ['i', 25.49811, 7.70731],
  [

In [None]:
# Read dictionary
with open('Data/unigram.dict', 'rb') as unigramModelFile:
    unigramModel = pickle.load(unigramModelFile)
unigramModelFile.close()

# Read keyboard data
keyboard_raw = pd.read_csv("Data/keyboard.csv")
keyboard = keyboard_raw[['key', 'x_mm', 'y_mm']]
keyboard

Unnamed: 0,key,x_mm,y_mm
0,a,4.02501,9.625024
1,b,18.900047,13.650034
2,c,12.950032,13.650034
3,d,9.975025,9.625024
4,e,8.487521,5.600014
5,f,12.950032,9.625024
6,g,15.925039,9.625024
7,h,18.900047,9.625024
8,i,23.362558,5.600014
9,j,21.875053,9.625024


# **Unigram Language Model Decoder**

In [None]:
# Keyboard size and dual Gaussian model parameters
key_width = 3
key_height = 4
a = 2.403
b = 0.017
c = 2.295
d = 0.016

def get_likelihood(p, mu, sigma):
    lik = stats.multivariate_normal(mu,sigma).pdf(p)
    return lik

def is_letter(p, letter):
    min_w = keyboard.loc[keyboard['key']==letter, ['x_mm']]['x_mm'].values.tolist()[0]
    min_h = keyboard.loc[keyboard['key']==letter, ['y_mm']]['y_mm'].values.tolist()[0]
    max_w = min_w + key_width
    max_h = min_h + key_height

    if p[0] >= min_w and p[0] <= max_w and p[1] >= min_h and p[1] <= max_h:
        return True
    return False
#print(is_letter((5.0, 10.0), 'a'))
#print(is_letter((18.0, 14.0), 'b'))
#print(is_letter((10.0, 5.0), 'c'))

def get_literal_string(touchpoints):
    literal_string = ''
    alp = keyboard['key'].values.tolist()[:-1]
    for let in touchpoints:
        letter = '?'
        for key in alp:
            if is_letter([let[1], let[2]], key):
                letter = key
                break
        literal_string += letter
    return literal_string


In [None]:
def unigram_lm_decoder(touchpoints):
    possible_words = []
    for word, prob in unigramModel.items():
        if (len(touchpoints) == len(word)):
            if (touchpoints[0][0]==word[0]):
                possible_words.append(word)
            elif touchpoints[-1][0] == word[-1]:
                possible_words.append(word)

    p_w_s = []                  # Holds p(w|s_1, s_2, ..., s_n) for all possible words
    for item in possible_words:
        word = item
        p_w = unigramModel[item]
        p_s_w = 1                 # Holds p(s_1, s_2, ..., s_n|w) for the current possible word

        for j, letter in enumerate(list(word)):
            min_w = keyboard.loc[keyboard['key']==letter, ['x_mm']]['x_mm'].values.tolist()[0]
            min_h = keyboard.loc[keyboard['key']==letter, ['y_mm']]['y_mm'].values.tolist()[0]
            mid_w = min_w + key_width/2
            mid_h = min_h + key_height/2
            mu = [mid_w, mid_h] #center of key
            sigma = [[a+(b * (key_width * key_width)), 0], [0, c+(d * (key_height * key_height))]]
            p_s_c = get_likelihood([touchpoints[j][1],touchpoints[j][2]], mu, sigma) #get likelihood of touchpoints (j)
            p_s_w *= p_s_c
        p_w_s.append([p_s_w*p_w, word])
    max_lik = -1
    max_word = ''
    for prob,word in p_w_s:
        #print(word)
        if prob > max_lik:
            max_lik = prob
            max_word = word
            #print(max_word)
    return max_word


In [None]:
decoded_success_count = 0
literal_success_count = 0
decoded_words = []
literal_strings = []
i=0
count =1
for touchpoints in words:
#calculate the success rate for both the decoded words and the literal strings using the docoded word/literal string success count
    decoded = unigram_lm_decoder(touchpoints)
    literal = get_literal_string(touchpoints)
    if (decoded == correct_list[i]):
        decoded_success_count+=1
    if (literal == correct_list[i]):
        literal_success_count+=1
    decoded_words.append(decoded)
    literal_strings.append(literal)
    print(str(count) + " " + correct_list[i]+ " " + decoded + " " + literal)
    i+=1
    count+=1

decoded_success_count *=100
literal_success_count *=100

1 if of if
2 at at at
3 first first first
4 you you upi
5 fail fail fai?
6 we we we
7 run run ri?
8 the the tbe
9 risk risk rosk
10 of of pf
11 failure failure fai?ire
12 watch watch qatch
13 out out put
14 for for for
15 low low ?ow
16 objects objects o?jects
17 please please p?ease
18 provide provide pro?ode
19 your your ypyr
20 date date datw
21 circumstances circumference ?irci?asyancw
22 are are ate
23 poor poor ?por
24 a a a
25 problem problem ?rp?lem
26 with with woth
27 the the tbe
28 engine engine e?v?ne
29 my my mt
30 favorite favorite favoroye
31 subject subject sibject
32 elephants elephants e?e?hants
33 are are ate
34 large large ??ehe
35 my my my
36 favorite favorite fabprite
37 place place ?lacw
38 to to to
39 visit visit ?ozot
40 can can ??n
41 i i i
42 skate skate skate
43 with with wotb
44 sister sister sozyet
45 neither neither ?eotbwr
46 a a a
47 borrower borrower bprrower
48 a a a
49 question question questop?
50 to to tp
51 answer answer amzwet
52 three three tbre

IndexError: list index out of range

In [None]:
with open("results.txt", 'w') as output:
    output.write(f"success_rate(decoded_words)={decoded_success_count/count}%, success_rate(literal_strings)={literal_success_count/count}%\n")
  # The first line: success_rate(decoded_words), success_rate(literal_strings)
    for i in range(count-1):
        output.write(f"{correct_list[i]}, {decoded_words[i]}, {literal_strings[i]}\n")
    # Each line after: correct_word, decoded_word, literal_string

output.close()