In [None]:
from pathlib import Path
import pandas as pd
import altair as alt
from altair import datum

import ideafast_deviceselection as ifds

source = Path(ifds.__file__).parent.parent.absolute() / 'local/keyboard/parsed'

raw = pd.read_csv(source / 'implicit_raw.csv')
df = pd.read_csv(source / 'implicit_only_valid.csv')

list(df)
    

## Overviews of the data

In [None]:
# amount and quality per participants

quality_by_patient = raw.groupby(['participant','quality']).agg({'day':['count']})
quality_by_patient.columns = ['count']

selection = alt.selection_multi(fields=['quality'], bind='legend')

alt.Chart(quality_by_patient.reset_index()).mark_bar().encode(
    x='participant:N',
    y='count',
    color='quality'
).add_selection(
    selection
).transform_filter(
    selection
).properties(
    title='Total recorded implicit datapoints'
)

In [None]:
# VALID metrics per participant per day

summed_metrics_by_day = df.groupby(['participant', 'day_relative']).sum()

selection = alt.selection_multi(fields=['participant'], bind='legend')

alt.Chart(summed_metrics_by_day.reset_index()).mark_line().encode(
    alt.X('day_relative:Q', axis=alt.Axis(title='Days into the study', tickCount=14), scale=alt.Scale(domain=(0, 13))),
    alt.Y('action-count:Q', axis=alt.Axis(title='Total recorded actions')),
    color='participant:N',
).add_selection(
    selection
).transform_filter(
    selection
).properties(
    width = 800
)

In [None]:
# overview of metrics per participant 
subdf = df[['participant','timestamp','total-error-rate','words-per-minute']]

print("Double click to reset zoom level.")

charts = []

for p in range(1,21):
    
    data = (subdf[subdf['participant'] == p].drop(columns=['participant'])).melt('timestamp',var_name='metric',value_name='value')
   
    base = alt.Chart(data).mark_line(opacity=.7).encode(
        alt.X('timestamp:T',
              scale=alt.Scale(domain=[data.timestamp.min(), pd.to_datetime(data.timestamp.min()) + pd.Timedelta("14 days")]),
             ),
        #x='timestamp:T',
        y='value:Q',
        color='metric:N'
    ).properties(
        width=800,
        height=100,
        title= f'Participant {p}'
    ).interactive(bind_y=False)
    
    charts.append(base)
    
alt.vconcat(*charts)


## Overall Metrics

Overall, we want to report on the following metrics - at least for the Device Selection report for both WP3 and WP4:

### Implicit/Passive text-entry behaviour collection
These metrics are based on data combined from both studies (FCID and UNEW)
- Mean, SD, min-max of implicit text-entry sessions per day
- Intra-/Individual variation on average implicit sessions per day per participant (i.e., consistency)
- Mean, SD, min-max duration in between implicit text-entry sessions (i.e., consistency) [optional, @André what do you think?]
    
### Explicit text-entry behaviour collection (composition and transcription tasks)
These metrics are split per-study (due to differences in setup)
- composition compliance: % of at least one response for a composition task
- transcription compliance: % of at least one respone for a transcription task

### Correlations between fatigue measurement and any calculated metrics
The below repeated measure correlation is mimicked from [WP4's approach to correlating with SMA data](https://github.com/ideafast/ideafast-pipeline/blob/pipe-dev-Rana/src/Functions_RepeatedMeasureCorrelation.py).
- 


In [None]:
# 