In [None]:
big_metadata = pl.read_csv("data/preprocessed_big_metadata.csv")
big_metadata.head()

In [None]:
with open("mappings/key-hand.json", "r") as f:
    key_hand = json.load(f)
    
key_hand.items()

In [None]:
mapper = Mapper()
participant_ids = find_all_participants(BIG_DATA_DIR)

In [None]:
participant_ids[:10]

In [None]:
ignore_keys = [mapper.get_code_from_key("<SoS>"), mapper.get_code_from_key("space")]
hands = np.empty((len(participant_ids), 3, 2))

# pids = ['291017']

for i, participant_id in enumerate(participant_ids):
    if i % 1000 == 0:
        print(f"File {i:7d} of {len(participant_ids)}")
    try:
        df = read_data_for_participant(participant_id, directory=BIG_DATA_DIR, print_info=False, drop_timestamps=False, 
                                       columns_to_read=["TEST_SECTION_ID", "RELEASE_TIME", "PRESS_TIME", "KEYCODE"], quote_char=None,
                                       dtypes={"PRESS_TIME": float, "RELEASE_TIME": float})
        df = create_bigrams(df, ignore_keys=ignore_keys)
        df = (
            df.lazy()
            .with_columns([
                pl.struct(["PREV_KEYCODE", "KEYCODE"]).apply(lambda x: key_hand[str(x["PREV_KEYCODE"])] + key_hand[str(x["KEYCODE"])]).alias("HANDS"),
            ])
            .select([
                pl.col("INTER_KEY_INTERVAL"),
                pl.when(pl.col("HANDS").apply(lambda x: len(set(x))) == 1)
                    .then(pl.col("HANDS").apply(lambda x: x[0]))
                    .otherwise("Alteration")
                    .alias("BIGRAM_HAND"),
                pl.when(pl.col("KEYCODE") == pl.col("PREV_KEYCODE"))
                    .then(pl.col("INTER_KEY_INTERVAL"))
                    .otherwise(None)
                    .alias("LETTER_REPETITION")
            ])
            .groupby("BIGRAM_HAND").mean()
            .sort("BIGRAM_HAND")
            .collect()
        ) 
        
        # Output is a dataframe with the average value of IKI for Left / Right hand, Hand-alteration + Letter Repetition
        hands[i] = df[:,1:].rows() # first element - alteration, second - left, third - right
    except FileNotFoundError:
        continue
    

In [None]:
with open('data/hand-comparison-repetitions.pickle', 'rb') as handle:
    hands = pickle.load(handle)

In [None]:
# bigrams = np.array(["Alteration", "L", "R"] * len(participant_ids)).reshape(-1, 1)
# test = np.concatenate([hands.reshape(-1, 2), bigrams], axis=1)

hands_df = pl.DataFrame(data=hands.reshape(-1, 2), schema=["IKI", "IKI_REPETITION"])
hands_df = hands_df.with_columns([
    pl.Series(np.array(["Alteration", "L", "R"] * len(participant_ids))).alias("BIGRAM_HAND"),
    pl.Series(np.array(list(map(lambda x: np.repeat(x, 3).tolist(), np.array(participant_ids).reshape(-1, 1)))).ravel()).alias("PARTICIPANT_ID").cast(int)
])

### Difference between hand bigrams for all participants

In [None]:
fig = ff.create_distplot(
    [
        hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

In [None]:
hands_df = hands_df.join(big_metadata[["PARTICIPANT_ID", "SPEED", "HAS_TAKEN_TYPING_COURSE"]], on="PARTICIPANT_ID")
hands_df.head()

In [None]:
trained_hands_df = hands_df.filter(pl.col("HAS_TAKEN_TYPING_COURSE") == 1)
untrained_hands_df = hands_df.filter(pl.col("HAS_TAKEN_TYPING_COURSE") == 0)

fast_hands_df = hands_df.filter(pl.col("SPEED") == "fast")
slow_hands_df = hands_df.filter(pl.col("SPEED") == "slow")

### Bigram hands for Trained Participants

In [None]:
fig = ff.create_distplot(
    [
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        trained_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Untrained Participants

In [None]:
fig = ff.create_distplot(
    [
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        untrained_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Fast Participants

In [None]:
fig = ff.create_distplot(
    [
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        fast_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

### Bigram hands for Slow Participants

In [None]:
fig = ff.create_distplot(
    [
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"].view().tolist(),
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"].view().tolist(),
        slow_hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"].view().tolist(),
    ], group_labels=["Right", "Left", "Hand-alteration"], show_rug=False, bin_size=10)


fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")
# 
fig.update_xaxes(showgrid=True, range=[-220, 600])
fig.show()

In [None]:
schema = {"Name": str, "Median": float, "Mean": float, "Std": float, "Kurtosis": float, "Skewness": float}
statistics_df = pl.DataFrame(schema=schema)

In [None]:
def stats(df, name):
    global statistics_df, schema
    _vals = name, df.median(), df.mean(), df.std(), df.skew(), df.kurtosis()
    statistics_df = statistics_df.extend(pl.DataFrame(np.array(_vals).reshape(-1, 1), schema=schema))

### Right hand statistics

In [None]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI"], "right-hand")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "right-hand-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "right-hand-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "fast"))["IKI"], "right-hand-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "slow"))["IKI"], "right-hand-slow")

stats(hands_df.filter(pl.col("BIGRAM_HAND") == "R")["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "fast"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "R") & (pl.col("SPEED") == "slow"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "right-hand-r-slow")

statistics_df

### Left hand statistics

In [None]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI"], "left-hand")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "left-hand-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "left-hand-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "fast"))["IKI"], "left-hand-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "slow"))["IKI"], "left-hand-slow")

stats(hands_df.filter(pl.col("BIGRAM_HAND") == "L")["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "fast"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "L") & (pl.col("SPEED") == "slow"))["IKI_REPETITION"].fill_nan(None).fill_null(strategy="mean"), "left-hand-r-slow")

statistics_df

### Hand-alteration statistics

In [None]:
stats(hands_df.filter(pl.col("BIGRAM_HAND") == "Alteration")["IKI"], "hand-alt")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 1))["IKI"], "hand-alt-trained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("HAS_TAKEN_TYPING_COURSE") == 0))["IKI"], "hand-alt-untrained")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("SPEED") == "fast"))["IKI"], "hand-alt-fast")
stats(hands_df.filter((pl.col("BIGRAM_HAND") == "Alteration") & (pl.col("SPEED") == "slow"))["IKI"], "hand-alt-slow")
statistics_df