In [2]:
import os
import re

left_hand = 'qwertasdfgzxcvb'
right_hand = 'yuiophjklnm'

We want to load all source files in, so we can have as many unique words to practice on.

The sources used are listed below, with links to where they were acquired.
- [dwyl_english_words.txt](https://github.com/dwyl/english-words/blob/master/words.txt)

In [3]:
source_path = './source/'
source_files = []

output_path = './subsets/'

for file_name in os.listdir(source_path):
    with open(source_path+file_name,"r") as f:
        source_files.append(f.readlines())

Create a general word bank without any punctuation or digits, to be used for consequent generation of subsets.

In [4]:
non_numeric = re.compile("^[A-Za-z]+$")

word_bank = set()

for source in source_files:
    for word in source:
        if non_numeric.match(word):
            word_bank.add(word.lower())

if not "words_english.txt" in os.listdir(output_path):
    with open(output_path+'words_english.txt','w') as f:
        f.writelines(word_bank)

With this code, we generate two word banks: one that targets the left hand, and one that targets the right hand.
While practicing with these word banks, I immediately noticed that my right hand is significantly less efficient at typing. My WPM on the left hand word bank is at roughly 70-80, whereas my WPM on the right hand word bank lies between 30 and 50.

In [30]:
left_hand_qwerty = re.compile("^[qwertasdfgzxcvb]{5,}$")
right_hand_qwerty = re.compile("^[yuiophjklnm]{5,}$")

def create_subset_from_regex(name, regex_patt: re.Pattern):
    words = [word for word in word_bank if regex_patt.match(word)]
    with open(output_path+name+'.txt','w') as f:
        f.writelines(words)

create_subset_from_regex('lh_qwerty_english',left_hand_qwerty)
create_subset_from_regex('rh_qwerty_english',right_hand_qwerty)

Our right hand word bank has significantly fewer words than our left hand word bank, due to QWERTY layout. To add some variation and practice timing my keypresses between the left and right hand, I add the rightmost row on the left hand: t, g, and b; this word bank is called rhplus.

In [33]:
# right hand + left index
rhplus_qwerty = [re.compile("^[yuiophjklnmtgb]{5,}$"),re.compile("[tgb]+")]
with open(output_path+"rhplus_qwerty_english.txt","w") as f:
    f.writelines([word for word in word_bank if rhplus_qwerty[0].match(word) and rhplus_qwerty[1].match(word)])

Even with the previous methodology, I still feel like I don't get effective practice for my right hand typing. Therefore, I create some additional word banks for practicing with my right hand.

To be specific, I create two additional word banks.
The first word bank is composed of words that follow the rules below:
- more letters should be from the right hand than the left hand
- at least one sequence of characters from right hand of length 2 or greater

In [8]:
rh_realistic_1 = []
left_hand = 'qwertasdfgzxcvb'
right_hand = 'yuiophjklnm'

MIN_SEQ_LENGTH = 2
RH_PERCENT = 0.5

for word in word_bank:
    lh = 0
    rh = 0
    seq = 0
    last = 0
    for char in word:
        if char in left_hand:
            lh += 1
            last = 1
        elif char in right_hand:
            if last == 1:
                seq += 1
            else:
                seq = 1
            rh += 1
            last = 1

    if seq >= MIN_SEQ_LENGTH and (rh / len(word)) > RH_PERCENT:
        rh_realistic_1.append(word)

with open(output_path+"rh_realistic_1_qwerty.txt","w") as f:
    f.writelines(rh_realistic_1)

To further increase the accuracy and coordination between my left and right hand, I created a word bank that contains words that are primarily right-handed, but have sequences that follow the pattern of LRL, or RLR.

In [7]:
rh_alternating = []

alt_patt_lrl = re.compile(f"[{left_hand}][{right_hand}][{left_hand}]")
alt_patt_rlr = re.compile(f"[{right_hand}][{left_hand}][{right_hand}]")

MIN_PATT_OCC = 4
RH_PERCENT = 0.5

for word in word_bank:
    lh = 0
    rh = 0
    patt_occ = 0
    for char in word:
        if char in left_hand:
            lh += 1
        elif char in right_hand:
            rh += 1

    patt_occ = len(alt_patt_lrl.findall(word)) + len(alt_patt_rlr.findall(word))

    if patt_occ >= MIN_PATT_OCC and (rh / len(word)) > RH_PERCENT:
        rh_alternating.append(word)

with open(output_path+"rh_alternating_qwerty.txt","w") as f:
    f.writelines(rh_alternating)

In [21]:
alt_patt_lrl_exclusive = re.compile(f"^([{left_hand}][{right_hand}])+$")
alt_patt_rlr_exclusive = re.compile(f"^([{right_hand}][{left_hand}])+$")

with open(output_path+"alternating_qwerty.txt","w") as f:
    f.writelines([word for word in word_bank if (alt_patt_rlr_exclusive.match(word) or alt_patt_lrl_exclusive.match(word)) and len(word) > 5])