In [2]:
import pandas as pd
import numpy as np
import processing

In [3]:
log = pd.read_csv('./data/log_valid_processed.csv')

In [4]:
# Aggregate the log into words
words = processing.log_to_words(log)

In [5]:
# Process
words = processing.process_words(words)

# Refine and add substrategies
# Contractions
mask = words.ite == 'predict'
mask &= words.ite_lev_dist == 1
mask &= words.ite_len_diff == 1
mask &= words.ite_input_key.str.contains("'")
mask &= ~words.loc[mask].ite_input_prev.str.contains("'")
words.loc[mask, 'ite2'] = 'contraction'

# Capitalizations
mask = words.ite == 'predict'
mask &= words.ite_lev_dist == 0
mask &= words.ite_len_diff == 0
mask &= words.ite_input_prev.str.contains('^[a-z]+$')
mask &= words.ite_input_key.str.contains('[A-Z]')
words.loc[mask, 'ite2'] = 'capitalization'

# Remove the no changes that actually have changes
mask = words.ite2 == 'no_change'
mask &= words.ite_input != words.ite_input_prev
words.loc[mask, 'ite2'] = 'other'

# A lot of the "other" ite2 is actually an added space
mask = words.ite_input_key.str.split().str.len() > 1 # Ite input has more than one word
mask &= words.ite_input_key.str.replace(' ', '') == words.ite_input_prev # After removing spaces, should equal previous text field
words.loc[mask, 'ite2'] = 'add_space'

# Strategic use: User changed the word after using prediction
words['strategic'] = None
# Default is False for all predictions
words.loc[words.ite == 'predict', 'strategic'] = 'none'
# Mask: prediction where the selected suggestion was not the final word
mask = words.ite == 'predict'
mask &= words.word != words.ite_input
# mask &= words.loc[mask].apply(lambda x: x.word not in x.ite_input,axis=1) # Remove the ite entries that involve multiple words

# Case 1: User changes the keys, but the prefix is the same 
mask1 = mask.copy()
mask1 &= (~words.strategic.isin(['add', 'remove']))
mask1 &= words.loc[mask].apply(lambda x: (x.ite_input_prev in x.word) and (x.ite_input_prev in x.ite_input), axis=1)
words.loc[mask1, 'strategic'] = 'same_prefix'

# Case 2: User adds keys after prediction
mask2 = mask.copy()
mask2 &= words.loc[mask].apply(lambda x: x.ite_input in x.word, axis=1)
words.loc[mask2, 'strategic'] = 'add'

# Case 3: User removes keys after prediction
mask3 = mask.copy()
mask3 &= words.loc[mask].apply(lambda x: x.word in x.ite_input, axis=1)
words.loc[mask3, 'strategic'] = 'remove'

# Case 4: Other
mask4 = mask.copy()
mask4 &= (~words.strategic.isin(['add', 'remove', 'same_prefix']))
words.loc[mask4, 'strategic'] = 'other'

In [6]:
# Save
words.to_csv('./data/words.csv', index=False)