In [1]:
import pandas as pd
from os import walk
from copy import copy
import math, pywt, numpy as np
#%matplotlib widget

pd.set_option('display.max_rows', 1000)

In [2]:
# data can be found smb://nas-weber01.unisg.ch/data/Nassy/03_Online_Model/data
_, _, file_names = next(walk('./split'))

files = [(name.split('_')[0], name) for name in file_names]

files[0:20]

[('005', '005_t1.csv'),
 ('002', '002_t6.csv'),
 ('002', '002_t4.csv'),
 ('002', '002_t2.csv'),
 ('005', '005_t5.csv'),
 ('006', '006_t1.csv'),
 ('001', '001_t4.csv'),
 ('007', '007_t2.csv'),
 ('004', '004_t6.csv'),
 ('006', '006_t4.csv'),
 ('001', '001_t2.csv'),
 ('003', '003_t8.csv'),
 ('002', '002_t1.csv'),
 ('001', '001_t6.csv'),
 ('007', '007_t7.csv'),
 ('005', '005_t6.csv'),
 ('007', '007_t5.csv'),
 ('008', '008_t8.csv'),
 ('003', '003_t1.csv'),
 ('008', '008_t5.csv')]

In [3]:
tasks = [
    ('t1', 't1_gsr', 't1_eclipse (drawing_a_1.java)'),
    ('t2', 't2_gsr', 't2_eclipse (drawing_a_2.java)'),
    ('t3', 't3_gsr', 't3_eclipse (drawing_b_1.java)'),
    ('t4', 't4_gsr', 't4_eclipse (drawing_b_2.java)'),
    ('t5', 't5_gsr', 't5_eclipse (drawing_b_3.java)'),
    ('t6', 't6_gsr', 't6_eclipse (drawing_b_6.java)'),
    ('t7', 't7_gsr', 't7_eclipse (drawing_b_9.java)'),
    ('t8', 't8_gsr', 't8_eclipse (drawing_b_11.java)'),
]

tasks

[('t1', 't1_gsr', 't1_eclipse (drawing_a_1.java)'),
 ('t2', 't2_gsr', 't2_eclipse (drawing_a_2.java)'),
 ('t3', 't3_gsr', 't3_eclipse (drawing_b_1.java)'),
 ('t4', 't4_gsr', 't4_eclipse (drawing_b_2.java)'),
 ('t5', 't5_gsr', 't5_eclipse (drawing_b_3.java)'),
 ('t6', 't6_gsr', 't6_eclipse (drawing_b_6.java)'),
 ('t7', 't7_gsr', 't7_eclipse (drawing_b_9.java)'),
 ('t8', 't8_gsr', 't8_eclipse (drawing_b_11.java)')]

In [4]:
import json
import requests
import random, string
import pandas as pd
import io
import time

DEFAULT_SETTINGS = {
    "authorisation": {
        'username': 'admin@dcap.local',
        'password': 'admin'
    },
    "url": 'http://localhost:8989'
}

DEFAULT_FILTERS = [
    {
        'name': "SubstitutePupilFilter",
        'actualParameters': {
            'left_pupil': 'ET_PupilLeft',
            'right_pupil': 'ET_PupilRight',
            'timestampcolumn': 'Timestamp',
        },
        'columns': {
            'left_pupil': 'ET_PupilLeft',
            'right_pupil': 'ET_PupilRight',
        }

    },
    {
        'name': "SubstituteGazePointFilter",
        'actualParameters': {
            'leftPupilGazeXName': 'ET_GazeLeftx',
            'leftPupilGazeYName': 'ET_GazeLefty',
            'rightPupilGazeXName': 'ET_GazeRightx',
            'rightPupilGazeYName': 'ET_GazeRighty',
            'timestampcolumn': 'Timestamp',
        },
        'columns': {}

    },
    {
        'name': "ButterworthFilter",
        'actualParameters': {
            'hertz': 4,
            'sampleRate': 300,
            'timestampcolumn': 'Timestamp',
        },
        'columns': {
            'left_pupil': 'ET_PupilLeft',
            'right_pupil': 'ET_PupilRight',
        },
        'decimalSeparator': '.'
    },
    {
        'name': "LinearInterpolationFilter",
        'actualParameters': {
            'left_pupil': 'ET_PupilLeft',
            'right_pupil': 'ET_PupilRight',
            'timestampcolumn': 'Timestamp',
        },
        'columns': {
            'left_pupil': 'ET_PupilLeft',
            'right_pupil': 'ET_PupilRight',
        }
    }
]
"""
Broken Filter
{
    'name': "BlinkDetectionFilter",
    'actualParameters': {
        'leftPupilGazeXColumnName': 'ET_GazeLeftx',
        'leftPupilGazeYColumnName': 'ET_GazeLefty',
        'rightPupilGazeXColumnName': 'ET_GazeRightx',
        'rightPupilGazeYColumnName': 'ET_GazeRighty',
        'blinkDetectionTimeThreashold': 60,
        'left_pupil': 'ET_PupilLeft',
        'right_pupil': 'ET_PupilRight',
        'timestampcolumn': 'Timestamp',
    },
    'columns': {}
},
"""

def randomword(length):
   letters = string.ascii_lowercase
   return ''.join(random.choice(letters) for i in range(length))

def clean(df, settings=DEFAULT_SETTINGS, filters=DEFAULT_FILTERS):
    with requests.Session() as s:
        # login
        r = s.post(
            url=settings.get('url') + "/cheetah/login", 
            data=settings.get('authorisation')
        )
        # create study
        try:
            data = json.loads(s.post(
                url=settings.get('url') + "/cheetah/api/user/study",
                headers={'content-type': 'application/json'},
                data=json.dumps({
                    'comment': '',
                    'name': randomword(20)
                })
            ).content.decode('UTF-8'))
            study_id = data['resBody']["id"]
        except Exception as e:
            print('Could not create study')
            raise e


        # create subject
        try:
            data = json.loads(s.post(
                url=settings.get('url') + "/cheetah/api/user/addsubject", 
                headers={'content-type': 'application/json'},
                data=json.dumps({
                    'comment': '',
                    'email': f'{randomword(20)}@dcap.local',
                    'id': 0,
                    'studyId': study_id,
                    'studyName': randomword(20),
                    'subject_id': randomword(20),
                    'synchronized_from': 0,
                })
            ).content.decode('UTF-8'))
            subject_id = data['resBody']["id"]
        except Exception as e:
            print('Could not create subject')
            raise e

        # upload data
        try:
            stream = io.StringIO()
            df.to_csv(stream, sep='\t', index=False)

            data = json.loads(s.post(
                url=settings.get('url') + "/cheetah/api/user/uploadFile",
                files=[('files', ('file.tsv', stream.getvalue(), 'application/octet-stream'))],
                data={
                    'subjectIds': [subject_id]
                }
            ).content.decode('UTF-8'))
            file_id = data['resBody']['easyUserDataListSuccessfultMapped'][0]["id"]
        except Exception as e:
            print('Could not upload data')
            raise e

        # filter data
        try:
            # request filter
            data = json.loads(s.post(
                url=settings.get('url') + "/cheetah/api/user/filterrequest", 
                headers={'content-type': 'application/json'},
                data=json.dumps({
                    "files": [file_id],
                    "filters": filters
                })
            ).content.decode('UTF-8'))
            task_id = data['resBody']

            # wait for task to finish
            while True:
                data = json.loads(s.get(
                    url=settings.get('url') + "/cheetah/api/user/taskFinished/" + task_id, 
                    headers={'content-type': 'application/json'},
                ).content.decode('UTF-8'))
                if data['resBody'] == True:
                    break
                time.sleep(0.1)

            # get filtered file id
            data = json.loads(s.get(
                url=settings.get('url') + "/cheetah/api/user/taskid/" + task_id, 
            ).content.decode('UTF-8'))
            filtered_file_id = data['resBody']["id"]

            # download filtered data
            data = s.get(
                url=settings.get('url') + "/cheetah/api/user/download/" + str(filtered_file_id), 
            ).content.decode("utf-8")

        except Exception as e:
            print('Could not filter data')
            raise e

        # cleanup
        # TODO: also delete study and subject
        s.delete(
            url=settings.get('url') + "/cheetah/api/user/file/"+str(file_id)
        )
        s.delete(
            url=settings.get('url') + "/cheetah/api/user/file/"+str(filtered_file_id)
        )

        return pd.read_csv(io.StringIO(data), sep='\t')

In [5]:

def modmax(d):
    m = [0.0]*len(d)
    for i in iter(range(len(d))):
        m[i] = math.fabs(d[i])

        t = [0.0]*len(d)
        for i in iter(range(len(d))):
            ll = m[i-1] if i >= 1 else m[i]
            oo = m[i]
            rr = m[i+1] if i < len(d) - 2 else m[i]

            if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
                t[i] = math.sqrt(d[i]**2)
            else:
                t[i] = 0.0

    return t

def lhipa(d):
    w = pywt.Wavelet('sym16')
    maxlevel = pywt.dwt_max_level(len(d), filter_len=w.dec_len)

    hif, lof = 1, int(maxlevel/2)

    if hif <=0 or lof <=0:
        return 0.0

    cD_H = pywt.downcoef('d', d, 'sym16', 'per', level=hif)
    cD_L = pywt.downcoef('d', d, 'sym16', 'per', level=lof)

    cD_H[:] = [x/math.sqrt(2**hif) for x in cD_H]
    cD_L[:] = [x/math.sqrt(2**lof) for x in cD_L]

    cD_LH = cD_L
    for i in range(len(cD_L)):
        cD_LH[i] = cD_L[i] / cD_H[int(((2**lof)/(2**hif))*i)]

    cD_LHm = modmax(cD_LH)

    luniv = np.std(cD_LHm) * math.sqrt(2.0*np.log2(len(cD_LHm)))
    cD_LHt = pywt.threshold(cD_LHm, luniv, mode="less")

    tt = 10#d[-1].timestamp() - d[0].timestamp()

    ctr = 0
    for i in iter(range(len(cD_LHt))):
        if math.fabs(cD_LHt[i]) > 0: ctr += 1

    return float(ctr)/tt

In [6]:
def extract_features(df):
    df_min, df_max, df_mean, df_median, df_std = df['ET_PubilAvg'].agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.median, pd.np.std])
    df['rolling_mean'] = df['ET_PubilAvg'].rolling('8s', min_periods=1).mean()
    df_resampled = df[~df.index.duplicated(keep='first')].resample('1ms').pad()
    df_resampled['rolling_mean'] = df_resampled['rolling_mean'].shift(periods=-4000)
    df_resampled['phasic'] = df_resampled['ET_PubilAvg'] - df_resampled['rolling_mean']
    df_resampled['rolling_phasic'] = df_resampled['phasic'].rolling('1s', min_periods=1).mean().shift(periods=-500)
    df_resampled['peaks'] = df_resampled['rolling_phasic'][(df_resampled['rolling_phasic'].shift(1) < df_resampled['rolling_phasic']) & (df_resampled['rolling_phasic'].shift(-1) < df_resampled['rolling_phasic']) & (df_resampled['rolling_phasic'] > 0.1)]
    peak_count = df_resampled['peaks'].dropna().count()

    duration = (df_resampled.index[-1] - df_resampled.index[0]).seconds / 60
    peak_count_pm = peak_count / duration

    return (df_min, df_max, df_mean, df_median, df_std, peak_count, peak_count_pm, lhipa(df['ET_PubilAvg'].dropna().to_numpy()))

In [7]:
files[52:]

[('008', '008_t1.csv'),
 ('002', '002_t8.csv'),
 ('003', '003_t3.csv'),
 ('005', '005_t7.csv'),
 ('002', '002_t5.csv'),
 ('008', '008_t7.csv'),
 ('001', '001_t5.csv'),
 ('008', '008_t2.csv'),
 ('005', '005_t3.csv'),
 ('003', '003_t5.csv'),
 ('001', '001_t8.csv'),
 ('003', '003_t2.csv')]

In [8]:
for file_name in files[52:53]:
    print(file_name)
    df = pd.read_csv(f'./split/{file_name[1]}', comment='#')
    #df = df.set_index(pd.TimedeltaIndex(df['timestamp'].values))
    df['ET_PupilRight'] = df['ET_PupilRight'].apply(lambda x: x if x > 0 else np.nan)
    df['ET_PupilLeft'] = df['ET_PupilLeft'].apply(lambda x: x if x > 0 else np.nan)
    df['ET_PubilAvg'] = df.apply(lambda x: (x['ET_PupilLeft'] + x['ET_PupilRight']) / 2, axis=1)
    df['Timestamp'] = df['Timestamp'].astype('int64')
    df['Timestamp'] = df.apply(lambda x: int(x['Timestamp'] * 1000), axis=1).astype('int64')
    baseline = clean(copy(df[df['type'] == 'B']))
    baseline = baseline.set_index(pd.to_datetime(baseline['Timestamp'].values))
    b_min, b_max, b_mean, b_median, b_std, b_peak_count, b_peak_count_pm, b_lhipa = extract_features(baseline)

    experiment = clean(copy(df[df['type'] == 'M']))
    experiment = experiment.set_index(pd.to_datetime(experiment['Timestamp'].values))
    e_min, e_max, e_mean, e_median, e_std, e_peak_count, e_peak_count_pm, e_lhipa = extract_features(experiment)

    d_mean = b_mean - e_mean
    d_median = b_median - e_median
    d_std = b_std - e_std

    f = open(f'dataset/{file_name[1]}', "a")
    f.write(f'task,subject,d_mean,d_median,d_std,e_min,e_max,e_mean,e_median,e_std,e_peak_count,e_peak_count_pm,b_min,b_max,b_mean,b_median,b_std,b_peak_count,b_peak_count_pm,e_lhipa,b_lhipa\n')
    f.write(f'{file_name[1].split("_")[1].split(".")[0]},{file_name[0]},{d_mean},{d_median},{d_std},{e_min},{e_max},{e_mean},{e_median},{e_std},{e_peak_count},{e_peak_count_pm},{b_min},{b_max},{b_mean},{b_median},{b_std},{b_peak_count},{b_peak_count_pm},{e_lhipa},{b_lhipa}')
    f.close()

    


('008', '008_t1.csv')


  interactivity = "none" if silent else self.ast_node_interactivity
  df_min, df_max, df_mean, df_median, df_std = df['ET_PubilAvg'].agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.median, pd.np.std])
  df_min, df_max, df_mean, df_median, df_std = df['ET_PubilAvg'].agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.median, pd.np.std])
  peak_count_pm = peak_count / duration
