In [22]:
import polars as pl 
import numpy as np 
import pickle 

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.offline import init_notebook_mode
from plotly.subplots import make_subplots

from helpers import *

In [2]:
big_metadata = pl.read_csv("../data/preprocessed_big_metadata.csv")
big_metadata.head()

PARTICIPANT_ID,AGE,GENDER,HAS_TAKEN_TYPING_COURSE,COUNTRY,LAYOUT,NATIVE_LANGUAGE,FINGERS,TIME_SPENT_TYPING,KEYBOARD_TYPE,ERROR_RATE,AVG_WPM_15,AVG_IKI,ECPC,KSPC,ROR,SPEED,AVG_KEYPRESS
i64,i64,str,i64,str,str,str,str,i64,str,f64,f64,f64,f64,f64,f64,str,f64
5,27,"""female""",0,"""MY""","""qwerty""","""en""","""7-8""",6,"""laptop""",0.87108,72.8871,150.457375,0.031469,1.101399,0.3675,"""fast""",102.780952
7,13,"""female""",0,"""AU""","""qwerty""","""en""","""7-8""",0,"""laptop""",6.685633,24.1809,386.575303,0.092105,1.292398,0.0667,"""slow""",122.280861
23,21,"""female""",0,"""IN""","""qwerty""","""en""","""3-4""",0,"""full""",2.130493,24.7112,457.947902,0.016624,1.07289,0.0413,"""slow""",128.350417
24,21,"""female""",0,"""PH""","""qwerty""","""tl""","""7-8""",1,"""laptop""",1.893287,45.3364,223.913395,0.045296,1.1777,0.2678,"""slow""",121.406805
25,19,"""male""",0,"""IN""","""qwerty""","""en""","""7-8""",1,"""laptop""",0.747384,54.6831,190.034172,0.055389,1.146707,0.4434,"""slow""",143.912533


In [3]:
with open("../mappings/key-hand.json", "r") as f:
    key_hand = json.load(f)
    
key_hand.items()

dict_items([('48', 'R'), ('49', 'L'), ('50', 'L'), ('51', 'L'), ('52', 'L'), ('53', 'L'), ('54', 'L'), ('55', 'R'), ('56', 'R'), ('57', 'R'), ('8', 'R'), ('9', 'L'), ('13', 'L'), ('16', ['L', 'R']), ('17', ['L', 'R']), ('18', ['L', 'R']), ('19', 'R'), ('20', 'L'), ('27', 'L'), ('32', 'R'), ('33', 'R'), ('34', 'R'), ('35', 'R'), ('36', 'R'), ('37', 'R'), ('38', 'R'), ('39', 'R'), ('40', 'R'), ('44', 'R'), ('45', 'R'), ('46', 'R'), ('65', 'L'), ('66', 'R'), ('67', 'L'), ('68', 'L'), ('69', 'L'), ('70', 'L'), ('71', 'L'), ('72', 'R'), ('73', 'R'), ('74', 'R'), ('75', 'R'), ('76', 'R'), ('77', 'R'), ('78', 'R'), ('79', 'R'), ('80', 'R'), ('81', 'L'), ('82', 'L'), ('83', 'L'), ('84', 'L'), ('85', 'R'), ('86', 'L'), ('87', 'L'), ('88', 'L'), ('89', 'R'), ('90', 'L'), ('91', 'L'), ('92', 'R'), ('93', 'R'), ('96', 'R'), ('97', 'R'), ('98', 'R'), ('99', 'R'), ('100', 'R'), ('101', 'R'), ('102', 'R'), ('103', 'R'), ('104', 'R'), ('105', 'R'), ('106', 'R'), ('107', 'R'), ('109', 'R'), ('110', 'R'

In [4]:
mapper = Mapper()
participant_ids = find_all_participants(BIG_DATA_DIR)

In [5]:
participant_ids[:10]

['21833',
 '233209',
 '485024',
 '375416',
 '39734',
 '34285',
 '285679',
 '256683',
 '206052',
 '201699']

In [None]:
ignore_keys = [mapper.get_code_from_key("<SoS>"), mapper.get_code_from_key("space")]
hands = np.empty((len(participant_ids), 3, 2))

# pids = ['291017']

for i, participant_id in enumerate(participant_ids):
    if i % 1000 == 0:
        print(f"File {i:7d} of {len(participant_ids)}")
    try:
        df = read_data_for_participant(participant_id, directory=BIG_DATA_DIR, print_info=False, drop_timestamps=False, 
                                       columns_to_read=["TEST_SECTION_ID", "RELEASE_TIME", "PRESS_TIME", "KEYCODE"], quote_char=None,
                                       dtypes={"PRESS_TIME": float, "RELEASE_TIME": float})
        df = create_bigrams(df, ignore_keys=ignore_keys)
        df = (
            df.lazy()
            .with_columns([
                pl.struct(["PREV_KEYCODE", "KEYCODE"]).apply(lambda x: key_hand[str(x["PREV_KEYCODE"])] + key_hand[str(x["KEYCODE"])]).alias("HANDS"),
            ])
            .select([
                pl.col("INTER_KEY_INTERVAL"),
                pl.when(pl.col("HANDS").apply(lambda x: len(set(x))) == 1)
                    .then(pl.col("HANDS").apply(lambda x: x[0]))
                    .otherwise("Alteration")
                    .alias("BIGRAM_HAND"),
                pl.when(pl.col("KEYCODE") == pl.col("PREV_KEYCODE"))
                    .then(pl.col("INTER_KEY_INTERVAL"))
                    .otherwise(None)
                    .alias("LETTER_REPETITION")
            ])
            .groupby("BIGRAM_HAND").mean()
            .sort("BIGRAM_HAND")
            .collect()
        ) 
        
        # Output is a dataframe with the average value of IKI for Left / Right hand, Hand-alteration + Letter Repetition
        hands[i] = df[:,1:].rows() # first element - alteration, second - left, third - right
        break
    except FileNotFoundError:
        continue
    

In [17]:
with open('../data/hand-comparison-repetitions.pickle', 'rb') as handle:
    hands = pickle.load(handle)

In [19]:
# bigrams = np.array(["Alteration", "L", "R"] * len(participant_ids)).reshape(-1, 1)
# test = np.concatenate([hands.reshape(-1, 2), bigrams], axis=1)

hands_df = pl.DataFrame(data=hands.reshape(-1, 2), schema=["IKI", "IKI_REPETITION"])
hands_df = hands_df.with_columns([
    pl.Series(np.array(["Alteration", "L", "R"] * len(participant_ids))).alias("BIGRAM_HAND"),
    pl.Series(np.array(list(map(lambda x: np.repeat(x, 3).tolist(), np.array(participant_ids).reshape(-1, 1)))).ravel()).alias("PARTICIPANT_ID").cast(int)
])

### Difference between hand bigrams for all participants

In [None]:
fig = ff.create_distplot(
    [
        hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=5)


fig.add_vline(x=0, line_dash="dash", line_color="red")

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=20), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time (ms)", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

In [26]:
hands_df = hands_df.join(big_metadata[["PARTICIPANT_ID", "SPEED", "HAS_TAKEN_TYPING_COURSE"]], on="PARTICIPANT_ID")
hands_df.head()

IKI,IKI_REPETITION,BIGRAM_HAND,PARTICIPANT_ID,SPEED,HAS_TAKEN_TYPING_COURSE
f64,f64,str,i64,str,i64
368.792857,,"""Alteration""",21833,"""slow""",1
267.073171,75.375,"""L""",21833,"""slow""",1
365.486667,133.590909,"""R""",21833,"""slow""",1
62.811912,,"""Alteration""",233209,"""slow""",1
51.433333,79.0,"""L""",233209,"""slow""",1


In [27]:
trained_hands_df = hands_df.filter(pl.col("HAS_TAKEN_TYPING_COURSE") == 1)
untrained_hands_df = hands_df.filter(pl.col("HAS_TAKEN_TYPING_COURSE") == 0)

fast_hands_df = hands_df.filter(pl.col("SPEED") == "fast")
slow_hands_df = hands_df.filter(pl.col("SPEED") == "slow")

### Bigram hands for Trained Participants

In [None]:
fig = ff.create_distplot(
    [
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Untrained Participants

In [None]:
fig = ff.create_distplot(
    [
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Fast Participants

In [None]:
fig = ff.create_distplot(
    [
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Slow Participants

In [None]:
fig = ff.create_distplot(
    [
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

In [33]:
schema = {"Name": str, "Median": float, "Mean": float, "Std": float, "Kurtosis": float, "Skewness": float}
statistics_df = pl.DataFrame(schema=schema)

In [34]:
def stats(df, name):
    global statistics_df, schema
    _vals = name, df.median(), df.mean(), df.std(), df.skew(), df.kurtosis()
    statistics_df = statistics_df.extend(pl.DataFrame(np.array(_vals).reshape(-1, 1), schema=schema))

### Right hand statistics

In [35]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"], "right-hand")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "right-hand-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "right-hand-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "fast"))["IKI"], "right-hand-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "slow"))["IKI"], "right-hand-slow")

stats(hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "fast"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "slow"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-slow")

statistics_df

Name,Median,Mean,Std,Kurtosis,Skewness
str,f64,f64,f64,f64,f64
"""right-hand""",112.284958,132.183932,112.423479,1.584739,9.975195
"""right-hand-tra…",94.362289,116.920158,108.103944,2.068344,14.86997
"""right-hand-unt…",120.58545,138.946027,113.631916,1.409576,8.478411
"""right-hand-fas…",41.081481,39.187937,35.987181,-1.189714,5.052706
"""right-hand-slo…",136.435644,155.9518,112.973915,1.53679,11.139121
"""right-hand-r""",99.0,116.548933,86.205739,16.668544,714.144893
"""right-hand-r-t…",97.222222,115.385826,88.347958,12.265427,289.964172
"""right-hand-r-u…",99.714286,117.064226,85.234828,18.834709,929.885132
"""right-hand-r-f…",80.882353,90.683573,54.656948,9.21948,150.832833
"""right-hand-r-s…",104.7,123.159285,91.392533,16.92588,700.345594


### Left hand statistics

In [36]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"], "left-hand")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "left-hand-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "left-hand-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "fast"))["IKI"], "left-hand-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "slow"))["IKI"], "left-hand-slow")

stats(hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "fast"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "slow"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-slow")

statistics_df

Name,Median,Mean,Std,Kurtosis,Skewness
str,f64,f64,f64,f64,f64
"""right-hand""",112.284958,132.183932,112.423479,1.584739,9.975195
"""right-hand-tra…",94.362289,116.920158,108.103944,2.068344,14.86997
"""right-hand-unt…",120.58545,138.946027,113.631916,1.409576,8.478411
"""right-hand-fas…",41.081481,39.187937,35.987181,-1.189714,5.052706
"""right-hand-slo…",136.435644,155.9518,112.973915,1.53679,11.139121
"""right-hand-r""",99.0,116.548933,86.205739,16.668544,714.144893
"""right-hand-r-t…",97.222222,115.385826,88.347958,12.265427,289.964172
"""right-hand-r-u…",99.714286,117.064226,85.234828,18.834709,929.885132
"""right-hand-r-f…",80.882353,90.683573,54.656948,9.21948,150.832833
"""right-hand-r-s…",104.7,123.159285,91.392533,16.92588,700.345594


### Hand-alteration statistics

In [37]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"], "hand-alt")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "hand-alt-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "hand-alt-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("SPEED") == "fast"))["IKI"], "hand-alt-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("SPEED") == "slow"))["IKI"], "hand-alt-slow")
statistics_df

Name,Median,Mean,Std,Kurtosis,Skewness
str,f64,f64,f64,f64,f64
"""right-hand""",112.284958,132.183932,112.423479,1.584739,9.975195
"""right-hand-tra…",94.362289,116.920158,108.103944,2.068344,14.86997
"""right-hand-unt…",120.58545,138.946027,113.631916,1.409576,8.478411
"""right-hand-fas…",41.081481,39.187937,35.987181,-1.189714,5.052706
"""right-hand-slo…",136.435644,155.9518,112.973915,1.53679,11.139121
"""right-hand-r""",99.0,116.548933,86.205739,16.668544,714.144893
"""right-hand-r-t…",97.222222,115.385826,88.347958,12.265427,289.964172
"""right-hand-r-u…",99.714286,117.064226,85.234828,18.834709,929.885132
"""right-hand-r-f…",80.882353,90.683573,54.656948,9.21948,150.832833
"""right-hand-r-s…",104.7,123.159285,91.392533,16.92588,700.345594
