# Generating .pkl's containing only the dragging tasks 

In [1]:
import os
import ast

import numpy as np
import pandas as pd
from pandarallel import pandarallel
# Initialization
pandarallel.initialize()

import matplotlib.pyplot as plt

import scipy
from scipy import interpolate, stats

%run py/constants.py

INFO: Pandarallel will run on 96 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
def sqrt_sum(tileX,tileY,targetX,targetY):
    a = np.array([tileX,tileY])
    b = np.array([targetX,targetY])
    dist = np.sqrt(np.sum((a-b)**2, axis=0))       
    return dist

In [3]:
def getFirstTime(timestamp):
    if type(timestamp) is np.ndarray:
        firstTime = timestamp[0]
        return firstTime
    else:
        return timestamp

In [4]:
def getLastTime(timestamp):
    if type(timestamp) is np.ndarray:
        lastTime = timestamp[-1]
        return lastTime
    else:
        return timestamp

In [5]:
def applyChanges(f, np_arr):
    #print(len(np_arr))
    np_arr = list(np_arr)
    np_arr = list(map(lambda x: x + np.round(f(x)), np_arr))
    np_arr = list(map(lambda x: x.astype(int), np_arr))
    return np_arr

In [6]:
%%time
task = "DRAG"

phone_dict = {"S3":[], "S4":[], "OPO":[], "N6":[]}

for path, subdirs, files in os.walk("./raw_data_phone/"):
    for file in files:
        file_path = os.path.join(path, file)
        
        if "StudyRun" not in file or task not in file:
            continue
            
        pid, phone, cond, task = file.replace("StudyRun","").replace(".txt","").split("_")
        pid = int(pid)
        device = conv_phone_name(phone)
        
        df = pd.read_csv(file_path, sep=";")
        df["Phone"] = device
        df["Participant"] = pid
        df["Cond"] = cond
        phone_dict[device].append(df)

for phone, df_dict in phone_dict.items():
    phone_dict[phone] = pd.concat(df_dict)
    

for phone, phones_df in phone_dict.items():
    phones_df["Distance"] =  phones_df.apply(lambda row: (sqrt_sum(row['tileX'],row['tileY'],row['targetX'],row['targetY'])*get_pixelsize_for_device(row['Phone']))/10, axis=1)
    phones_df = phones_df.reset_index(drop=True)
    phones_df = phones_df.sort_values(by=["Distance"])
    print(phone,int(len(phones_df)*0.75), len(phones_df))
    #phones_df = phones_df[int(len(phones_df)*0.75):]
    phones_df.timestamp = phones_df.timestamp.apply(lambda x: np.array(ast.literal_eval(x)))
    phones_df['FirstTime'] = phones_df.apply(lambda row: getFirstTime(row['timestamp']), axis=1) 
    phones_df['LastTime'] = phones_df.apply(lambda row: getLastTime(row['timestamp']), axis=1)
    phone_dict[phone] = phones_df.sort_values(by=["LastTime"])

S3 288 384
S4 288 384
OPO 288 384
N6 288 384
CPU times: user 974 ms, sys: 0 ns, total: 974 ms
Wall time: 972 ms


In [7]:
def syncS3Times(pid, c, df):
    filepath = "./raw_data_phone/timestamp_adjusted/timestamp_matching_s3_P%s_%s.txt"%(str(pid),c)
    #all S3 files for participant pid combined
    concatDf = df
    #UDP - file with PC timestamps and related phonetimestamps
    matching_df = pd.read_csv(filepath, delimiter =",")
    #name the columns
    matching_df= matching_df.rename(index=str , columns = {matching_df.columns[0]:"Phonestamp",matching_df.columns[1]:"Motivestamp"})
    #get difference in ms
    matching_df["diff"] = matching_df["Motivestamp"] - matching_df["Phonestamp"]
    #matching_df = 
    #merge the matching df with S3 df (Phonestamp X LastTime)
    merged_df = pd.merge_asof(matching_df, concatDf, left_on='Phonestamp',right_on='LastTime', direction = 'nearest')

    '''
    create df that holds X and Y axis for the 1D interpolation 
    (append the first diff and last diff for border case)
    '''
    interpol_df = pd.DataFrame(data={"interpolTime":merged_df["Phonestamp"],"diff": merged_df["diff"]}).append(
        {"diff":merged_df["diff"].iloc[0],"interpolTime":0}, ignore_index=True).append(
        {"diff":merged_df["diff"].iloc[-1],"interpolTime":1600000000000}, ignore_index=True).sort_values(by=['interpolTime'])

    #interpolate along the X(timestamps) and Y(diff) axes 
    f = interpolate.interp1d(interpol_df["interpolTime"], interpol_df ["diff"])
    #add new column that holds interpolated values 
    concatDf["interpol"] = applyChanges(f, concatDf["timestamp"])
    concatDf['FirstTime'] = concatDf.apply(lambda row: getFirstTime(row['interpol']), axis=1) 
    concatDf['LastTime'] = concatDf.apply(lambda row: getLastTime(row['interpol']), axis=1)

    return concatDf

In [8]:
def getDraggingTimes(pid,phone,cond):
    df = phone_dict[phone]
    df = df[(df.Phone==phone)&(df.Cond == cond)&(df.Participant == pid)].copy(deep=True)
    if (len(df)==0):
        return None,None
    if phone == "S3":
        df = syncS3Times(pid, cond, df)
    return df.FirstTime.tolist(), df.LastTime.tolist()

In [9]:
def doJob(pickle):
    i = 0
    path = "./TransformedPickles/"
    pid,phone,cond = pickle.replace(".pkl","").split("_")
    pid = int(pid.replace("P",""))
    filepath = path+pickle
    print("Working on file %s"%(filepath))
    firstTimes, lastTimes = getDraggingTimes(pid,phone,cond)
    if firstTimes == None:
        return
    df = pd.read_pickle(filepath)
    df = df[df.Task == "Fitts"].copy(deep=True)
    df["Drag"] = None
    for firstTime, lastTime in zip(firstTimes,lastTimes):
        df.loc[(df.Time>=firstTime)&(df.Time<=lastTime),'Drag'] = 'P%i_%i' % (pid, i)
        i+=1
    df = df.dropna(subset=["Drag"])
    #get list with first and lasttimes of longest movements that were calculated before
    #print(firstTimes,lastTimes)
    pickle_path = "./DraggingPickles/"
    pickle_name = 'P%i'%pid
    pickle_path = pickle_path+pickle_name + ("_%s_%s.pkl"%(phone, cond))
    df.to_pickle(pickle_path)
    return

In [10]:
files = []
for pickle in os.listdir("./TransformedPickles/"):
    if "checkpoints" in pickle:
        continue
    files.append(pickle)

In [11]:
%%time
dfFiles = pd.DataFrame(files)
dfFiles.columns=["File"]
ret = dfFiles.File.parallel_apply(lambda x: doJob(x))

Working on file ./TransformedPickles/P3_N6_walking.pkl
Working on file ./TransformedPickles/P5_OPO_seated.pkl
Working on file ./TransformedPickles/P14_S4_seated.pkl
Working on file ./TransformedPickles/P10_OPO_seated.pkl
Working on file ./TransformedPickles/P6_S3_walking.pkl
Working on file ./TransformedPickles/P11_N6_seated.pkl
Working on file ./TransformedPickles/P8_OPO_seated.pkl
Working on file ./TransformedPickles/P10_S4_seated.pkl
Working on file ./TransformedPickles/P21_OPO_seated.pkl
Working on file ./TransformedPickles/P19_S3_walking.pkl
Working on file ./TransformedPickles/P19_S4_seated.pkl
Working on file ./TransformedPickles/P7_N6_seated.pkl
Working on file ./TransformedPickles/P7_OPO_seated.pkl
Working on file ./TransformedPickles/P12_S4_seated.pkl
Working on file ./TransformedPickles/P10_S3_seated.pkl
Working on file ./TransformedPickles/P12_S4_walking.pkl
Working on file ./TransformedPickles/P6_S4_walking.pkl
Working on file ./TransformedPickles/P7_S4_walking.pkl
Working