In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.offline import init_notebook_mode
from plotly.subplots import make_subplots

import warnings

import polars as pl

from helpers import *

from IPython.core.display import display, HTML

warnings.filterwarnings("ignore")
display(HTML("<style>.container { width:95% !important; }</style>"))

In [None]:
big_metadata = pl.read_csv("data/preprocessed_big_metadata.csv")
big_metadata.head()

**Things to do:**
- FINGERS vs WPM
- ROR vs WPM
- KSPC vs WPM
- FINGERS vs ERROR_RATE
- ROR vs ERROR_RATE
- KSPC vs ERROR_RATE

**High-Performers:**
- Compare IKIs with other people
- Compare RORs with other people
- Compare Keypresses with other people

In [None]:
big_metadata["ERROR_RATE"].quantile(0.2), big_metadata["AVG_WPM_15"].quantile(0.75)

In [None]:
big_metadata.filter((pl.col("AVG_WPM_15") >= 64) & (pl.col("ERROR_RATE") <= 0.24))

### WPM vs Fingers

In [None]:
print(big_metadata.shape)
big_metadata = big_metadata.filter(pl.col("FINGERS") != "10+")
print(big_metadata.shape)

In [None]:
wpm_fingers = big_metadata.groupby("FINGERS").agg(pl.col("AVG_WPM_15").mean()).sort(by="AVG_WPM_15")
wpm_fingers

In [None]:
fig = make_subplots()

trace_1 = go.Bar(x=wpm_fingers["FINGERS"], y=wpm_fingers["AVG_WPM_15"])
trace_2 = go.Scatter(x=wpm_fingers["FINGERS"], y=wpm_fingers["AVG_WPM_15"], line=dict(width=5), marker=dict(size=10))

fig.add_trace(trace_1)
fig.add_trace(trace_2)
fig.update_layout(template="none", width=1600, height=1000, font=dict(size=18), showlegend=False, xaxis_title="Fingers", yaxis_title="Average WPM")
fig.update_yaxes(range=[30, 60])


In [None]:
fig = ff.create_distplot(
    [
        big_metadata.filter(pl.col("FINGERS") == "1-2")["AVG_WPM_15"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "3-4")["AVG_WPM_15"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "5-6")["AVG_WPM_15"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "7-8")["AVG_WPM_15"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "9-10")["AVG_WPM_15"].view().tolist(),
    ], group_labels=["1-2", "3-4", "5-6", "7-8", "9-10"], bin_size=2, show_rug=False,
)

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=10), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ))

fig.update_xaxes(showgrid=True, range=[0, 170])
fig.show()

### WPM vs ROR

In [None]:
big_metadata

In [None]:
fig = px.scatter(big_metadata.to_pandas(), x="AVG_WPM_15", y="ROR", trendline="ols", trendline_color_override="red")

fig.update_layout(template="none", width=1500, height=900, font=dict(size=18), yaxis=dict(dtick=0.05), xaxis=dict(dtick=10), showlegend=False, 
                 xaxis_title="Words per Minute", yaxis_title="Rollover Rate")

fig.update_yaxes(range=[0, 1])

fig.show()

### WPM vs KSPC

In [None]:
fig = px.scatter(big_metadata.to_pandas(), x="AVG_WPM_15", y="KSPC", trendline="ols", trendline_color_override="red")

fig.update_layout(template="none", width=1500, height=900, font=dict(size=18), xaxis=dict(dtick=10), yaxis=dict(dtick=0.25), showlegend=False,
                 xaxis_title="Words per Minute", y="Keystrokes per Character")

fig.show()

### Error Rate vs Fingers

In [None]:
fig = ff.create_distplot(
    [
        big_metadata.filter(pl.col("FINGERS") == "1-2")["ERROR_RATE"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "3-4")["ERROR_RATE"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "5-6")["ERROR_RATE"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "7-8")["ERROR_RATE"].view().tolist(),
        big_metadata.filter(pl.col("FINGERS") == "9-10")["ERROR_RATE"].view().tolist(),
    ], group_labels=["1-2", "3-4", "5-6", "7-8", "9-10"], show_rug=False, bin_size=0.2
)

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=0.5), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Error Rate", yaxis_title="Density")

fig.update_xaxes(showgrid=True, range=[0, 8])
fig.show()

### Error Rate vs ROR

In [None]:
fig = px.scatter(big_metadata.to_pandas(), x="ERROR_RATE", y="ROR") #, trendline="ols", trendline_color_override="red")

fig.update_layout(template="none", width=1500, height=900, font=dict(size=18), yaxis=dict(dtick=0.05), xaxis=dict(dtick=1), showlegend=False, 
                 xaxis_title="Error Rate", yaxis_title="Rollover Rate")

fig.update_yaxes(range=[0, 1])
fig.update_xaxes(range=[0, 25])

fig.show()

### Error Rate vs KSPC

In [None]:
fig = px.scatter(big_metadata.to_pandas(), x="ERROR_RATE", y="KSPC")

fig.update_layout(template="none", width=1500, height=900, font=dict(size=18), xaxis=dict(dtick=1), yaxis=dict(dtick=0.25), showlegend=False,
                 xaxis_title="Error Rate", yaxis_title="Keystrokes per Character")

fig.update_yaxes(range=[1, 5])
fig.update_xaxes(range=[0, 25])

fig.show()

### High-performers

In [None]:
wpm_hp = big_metadata["AVG_WPM_15"].quantile(0.75)
error_hp = big_metadata["ERROR_RATE"].quantile(0.25)

wpm_hp, error_hp

In [None]:
# high_performers = big_metadata.filter((pl.col("AVG_WPM_15") >= wpm_hp) & (pl.col("ERROR_RATE") <= error_hp))
big_metadata_1 = big_metadata.select([
    pl.col("*"),
    pl.when((pl.col("AVG_WPM_15") >= wpm_hp) & (pl.col("ERROR_RATE") <= error_hp)).then(1).otherwise(0).alias("HIGH_PERFORMER")
])

big_metadata_1["HIGH_PERFORMER"].value_counts()

### Compare IKIs with other people

In [None]:
fig = ff.create_distplot(
    [
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 0)["AVG_IKI"].view().tolist(),
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 1)["AVG_IKI"].view().tolist()
    ], group_labels=["No", "Yes"], show_rug=False
)

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=50), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Inter-Key Interval Time", yaxis_title="Density")

fig.update_xaxes(showgrid=True, range=[0, 600])
fig.show()

### Compare RORs with other people

In [None]:
fig = ff.create_distplot(
    [
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 0)["ROR"].view().tolist(),
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 1)["ROR"].view().tolist()
    ], group_labels=["No", "Yes"], show_rug=False, bin_size=0.01
)

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=0.05), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Rollover Ratio", yaxis_title="Density")

fig.update_xaxes(showgrid=True, range=[0, 1])
fig.show()

### Compare Keypresses with other people

In [None]:
fig = ff.create_distplot(
    [
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 0)["AVG_KEYPRESS"].view().tolist(),
        big_metadata_1.filter(pl.col("HIGH_PERFORMER") == 1)["AVG_KEYPRESS"].view().tolist()
    ], group_labels=["No", "Yes"], show_rug=False
)

fig.update_layout(template="none", width=1600, height=1000, xaxis=dict(dtick=25), font=dict(size=18), 
                 legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.8,
                        font=dict(size=30)
                    ), xaxis_title="Average Keypress Time", yaxis_title="Density")

fig.update_xaxes(showgrid=True, range=[0, 300])
fig.show()