# Process Raw Files

This Notebook provides the code to process the raw _.txt_ files collected by the Psytoolkit Experiments.

Experiments:
 - Navon
 - Fitts
 - N-Back
 - WCST
 - Corsi Block Span





----------

```
Author: Zach Wolpe 

Email:  zachcolinwolpe@gmail.com
```

In [133]:
import pandas as pd
import numpy as np
import os

## Automatic Processing

Use the _data.csv_ file as the mapping between files.

File Types:
 - _Corsi_Block_Span_Task_
 - _Navon_Task_
 - _N_Back_Task_
 - _WCST_Task_
 - _Fitts_Law_
 - _survey_: psytoolkit abtracted code
 - _welcome_screen_: instruction set
 - _s_: use ID and meta data about the user.
 - _data_: mapping between files
 - _data_times_: meta data no experiments

In [134]:
# view test files
path = '../data/data_sample'
for l in os.listdir(path): print(l)

# mapping table (participant to task file)
mapping = pd.read_csv(path + '/data.csv', index_col=False)
mapping.head()

Corsi_Block_Span_Task.2021-05-01-1605.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
Navon_Task.2021-05-10-0657.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
N_Back_Task.2021-05-10-0652.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
Corsi_Block_Span_Task.2021-05-10-0654.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
survey.txt
Navon_Task.2021-05-01-1609.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
WCST_Task.2021-05-10-0647.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
data.xlsx
data.csv
data_times.xlsx
s.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
WCST_Task.2021-05-01-1558.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
data_times.csv
welcome_screen.2021-05-10-0639.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
Fitts_Law.2021-05-10-0655.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
s.32ff642a-efe0-436f-8075-fa703d677fed.txt
welcome_screen.2021-05-01-1551.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
N_Back_Task.2021-05-01-1604.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
Fitts_Law.2021-05-01-

Unnamed: 0,participant,participant_code:1,Welcome_Screen:1,wcst_task:1,n_back_task:1,corsi_block_span_task:1,fitts_law:1,navon_task:1,TIME_start,TIME_end,TIME_total
0,s.32ff642a-efe0-436f-8075-fa703d677fed.txt,851366,welcome_screen.2021-05-01-1551.data.32ff642a-e...,WCST_Task.2021-05-01-1558.data.32ff642a-efe0-4...,N_Back_Task.2021-05-01-1604.data.32ff642a-efe0...,Corsi_Block_Span_Task.2021-05-01-1605.data.32f...,Fitts_Law.2021-05-01-1606.data.32ff642a-efe0-4...,Navon_Task.2021-05-01-1609.data.32ff642a-efe0-...,2021-05-01-15-50,2021-05-01-16-09,19
1,s.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt,490901,welcome_screen.2021-05-10-0639.data.d3b74af9-3...,WCST_Task.2021-05-10-0647.data.d3b74af9-3b24-4...,N_Back_Task.2021-05-10-0652.data.d3b74af9-3b24...,Corsi_Block_Span_Task.2021-05-10-0654.data.d3b...,Fitts_Law.2021-05-10-0655.data.d3b74af9-3b24-4...,Navon_Task.2021-05-10-0657.data.d3b74af9-3b24-...,2021-05-10-06-39,2021-05-10-06-57,18


In [135]:
# participant 1
mapping.iloc[0,]

participant                       s.32ff642a-efe0-436f-8075-fa703d677fed.txt
participant_code:1                                                    851366
Welcome_Screen:1           welcome_screen.2021-05-01-1551.data.32ff642a-e...
wcst_task:1                WCST_Task.2021-05-01-1558.data.32ff642a-efe0-4...
n_back_task:1              N_Back_Task.2021-05-01-1604.data.32ff642a-efe0...
corsi_block_span_task:1    Corsi_Block_Span_Task.2021-05-01-1605.data.32f...
fitts_law:1                Fitts_Law.2021-05-01-1606.data.32ff642a-efe0-4...
navon_task:1               Navon_Task.2021-05-01-1609.data.32ff642a-efe0-...
TIME_start                                                  2021-05-01-15-50
TIME_end                                                    2021-05-01-16-09
TIME_total                                                                19
Name: 0, dtype: object

# Process _.txt_ Files

Now that the text files after locating

In [136]:
# ---- paths ----x
path = '../data/data_sample'
keys = ['participant', 'Welcome_Screen:1', 'wcst_task:1', 'n_back_task:1', 'corsi_block_span_task:1', 'fitts_law:1', 'navon_task:1'] 


# mappings 
mapping.iloc[0,]['participant']
mapping.iloc[0,]['Welcome_Screen:1'] 
mapping.iloc[0,]['wcst_task:1'] 
mapping.iloc[0,]['n_back_task:1'] 
mapping.iloc[0,]['corsi_block_span_task:1'] 
mapping.iloc[0,]['fitts_law:1'] 
mapping.iloc[0,]['navon_task:1'] 
print('start time: ', mapping.iloc[0,]['TIME_start'])
print('end time:   ', mapping.iloc[0,]['TIME_end']) 
print('total time: ', mapping.iloc[0,]['TIME_total'])

# example: read files
for key in keys:

    print('----------------------------------------------')
    print('Participant: ', mapping.iloc[0,]['participant'])
    print('')
    print('key: ', key)
    print('')
    print('----------------------------------------------')
    print('')

    cur_path = path + '/' + mapping.iloc[0,][key] 
    f = open(cur_path, 'r')


    for l in f.readlines():
        print(l)
 

start time:  2021-05-01-15-50
end time:    2021-05-01-16-09
total time:  19
----------------------------------------------
Participant:  s.32ff642a-efe0-436f-8075-fa703d677fed.txt

key:  participant

----------------------------------------------

version: 3.3.0

server-time: 2021-05-01-15-50

user_agent: mozilla/5.0 (macintosh; intel mac os x 10_15_7) applewebkit/537.36 (khtml, like gecko) chrome/90.0.4430.93 safari/537.36

screen_width: 0

screen_height: 0

html_width: 0

html_height: 0

user_time: 0

T: 0

l: participant_code

stime: 2021-05-01-15-50

t: set

a: 851366

l: Welcome_Screen

stime: 2021-05-01-15-51

T: 1619877061490

t: experiment

l: wcst_task

stime: 2021-05-01-15-58

T: 1619877532192

t: experiment

l: n_back_task

stime: 2021-05-01-16-04

T: 1619877847954

t: experiment

l: corsi_block_span_task

stime: 2021-05-01-16-05

T: 1619877955361

t: experiment

l: fitts_law

stime: 2021-05-01-16-06

T: 1619878012377

t: experiment

l: navon_task

stime: 2021-05-01-16-09

T

In [137]:
cur_path

'../data/data_sample/Navon_Task.2021-05-01-1609.data.32ff642a-efe0-436f-8075-fa703d677fed.txt'

In [138]:
['corsi_block_span_task' in k for k in keys]
keys

# ---- for participant p ----x
for p in mapping.index:
    print('')
    print('------------------ User', mapping.iloc[p,0], '------------------')
    print()

    # ---- for each task k ----x
    for k in keys:
        print(mapping.iloc[p,][k])    
        


------------------ User s.32ff642a-efe0-436f-8075-fa703d677fed.txt ------------------

s.32ff642a-efe0-436f-8075-fa703d677fed.txt
welcome_screen.2021-05-01-1551.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
WCST_Task.2021-05-01-1558.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
N_Back_Task.2021-05-01-1604.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
Corsi_Block_Span_Task.2021-05-01-1605.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
Fitts_Law.2021-05-01-1606.data.32ff642a-efe0-436f-8075-fa703d677fed.txt
Navon_Task.2021-05-01-1609.data.32ff642a-efe0-436f-8075-fa703d677fed.txt

------------------ User s.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt ------------------

s.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
welcome_screen.2021-05-10-0639.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
WCST_Task.2021-05-10-0647.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
N_Back_Task.2021-05-10-0652.data.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt
Corsi_Block_Span_Task.2021-05-10-0654.data.d3b74af9-3b24-4820-8

# Formalise

Now that the logic is created, a class can be written to formalise the data acquisition within a object class structure.

In [184]:
# !conda activate dynocog
# !conda init
# !conda install pandas -y
# !conda install -c pytorch pytorch -y
# !conda install -c conda-forge numpyro
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import sys
from tqdm import tqdm
import plotly.express as px
import plotly.graph_objects as go
import os
import re
from tqdm import tqdm

Unnamed: 0,participant,participant_code:1,Welcome_Screen:1,wcst_task:1,n_back_task:1,corsi_block_span_task:1,fitts_law:1,navon_task:1,TIME_start,TIME_end,TIME_total
0,s.32ff642a-efe0-436f-8075-fa703d677fed.txt,851366,welcome_screen.2021-05-01-1551.data.32ff642a-e...,WCST_Task.2021-05-01-1558.data.32ff642a-efe0-4...,N_Back_Task.2021-05-01-1604.data.32ff642a-efe0...,Corsi_Block_Span_Task.2021-05-01-1605.data.32f...,Fitts_Law.2021-05-01-1606.data.32ff642a-efe0-4...,Navon_Task.2021-05-01-1609.data.32ff642a-efe0-...,2021-05-01-15-50,2021-05-01-16-09,19
1,s.d3b74af9-3b24-4820-83a0-67986b3ec0bf.txt,490901,welcome_screen.2021-05-10-0639.data.d3b74af9-3...,WCST_Task.2021-05-10-0647.data.d3b74af9-3b24-4...,N_Back_Task.2021-05-10-0652.data.d3b74af9-3b24...,Corsi_Block_Span_Task.2021-05-10-0654.data.d3b...,Fitts_Law.2021-05-10-0655.data.d3b74af9-3b24-4...,Navon_Task.2021-05-10-0657.data.d3b74af9-3b24-...,2021-05-10-06-39,2021-05-10-06-57,18


In [316]:
class batch_processing:
    """
    Input:  path to data 
    Return: tools too write pandas dataframes of data to a specified location
    """
    def __init__(self, path_to_data):
        self.path          = path_to_data
        self.mapping       = pd.read_csv(path + '/data.csv', index_col=False)
        self.data_times    = pd.read_csv(path + '/data_times.csv', index_col=False)
        self.participants  = self.mapping['participant'].tolist()
        self.parti_code    = self.mapping['participant_code:1'].tolist()
        self.n             = self.mapping.shape[0]
        self.wcst_paths    = [self.path  + wp for wp in self.mapping['wcst_task:1'].tolist()]
        self.nback_paths   = [self.path  + wp for wp in self.mapping['n_back_task:1'].tolist()]
        self.corsi_paths   = [self.path  + wp for wp in self.mapping['corsi_block_span_task:1'].tolist()]
        self.fitts_paths   = [self.path  + wp for wp in self.mapping['fitts_law:1'].tolist()]
        self.navon_paths   = [self.path  + wp for wp in self.mapping['navon_task:1'].tolist()]
        self.wcst_data     = None
        self.nback_data    = None
        self.corsi_data    = None
        self.fitts_data    = None
        self.navon_data    = None

    def create_wcst_data(self):
        message = """

        ------------------------------------------------------------------
                                WCST data created
        ------------------------------------------------------------------

        """
        print(message)
        df = pd.DataFrame()
        for p in range(bp.n):
            # _____ FOR EACH PARTICIPANT -----x
            pc = bp.parti_code[p]
            pt = bp.participants[p]

            # _____ FOR EACH PARTICIPANT -----x
            f = open(self.wcst_paths[p], 'r')
            for l in f.readlines():
                st  = l.split(' ')
                crd = re.split(r'(\d+)', st[5]) 
                dt  = {
                    'participant':            pc,
                    'participant_code':       pt,
                    'card_no':                st[0],
                    'correct_card':           st[1],
                    'correct_persevering':    st[2],
                    'seq_no':                 st[3],
                    'rule':                   st[4],
                    'card_shape':             crd[0],
                    'card_number':            crd[1],
                    'card_colour':            crd[2],
                    'reaction_time_ms':       st[6],
                    'status':                 st[7],
                    'card_selected':          st[8],
                    'error':                  st[9],
                    'perseverance_error':     st[10],
                    'not_perseverance_error': st[11].split('\n')[0],
                }
                df = df.append(dt, ignore_index=True)[dt.keys()]
        f.close()
        self.wcst_data = df


    def create_navon_data(self):
        message = """

        ------------------------------------------------------------------
                                Navon data created
        ------------------------------------------------------------------

        """
        print(message)
        df = pd.DataFrame()
        for p in range(bp.n):
            # _____ FOR EACH PARTICIPANT -----x
            pc = bp.parti_code[p]
            pt = bp.participants[p]

            # _____ FOR EACH PARTICIPANT -----x
            f = open(self.navon_paths[p], 'r')
            for l in f.readlines():
                st  = l.split(' ')
                dt  = {
                    'participant':            pc,
                    'participant_code':       pt,
                    'large_letter':           st[0][0],
                    'small_letter':           st[0][0],
                    'level_of_target':        st[1],
                    'level_of_target_n':      st[2],
                    'status':                 st[3],
                    'reaction_time_ms':       st[4].split('\n')[0],
                }
                df = df.append(dt, ignore_index=True)[dt.keys()]
        f.close()
        self.navon_data = df


    def create_nback_data(self):
        message = """

        ------------------------------------------------------------------
                                N back data created
        ------------------------------------------------------------------

        """
        print(message)
        df = pd.DataFrame()
        for p in range(bp.n):
            # _____ FOR EACH PARTICIPANT -----x
            pc = bp.parti_code[p]
            pt = bp.participants[p]

            # _____ FOR EACH PARTICIPANT -----x
            f = open(self.nback_paths[p], 'r')
            for l in f.readlines():
                st  = l.split(' ')
                dt  = {
                    'participant':              pc,
                    'participant_code':         pt,
                    'block_number':             st[0],
                    'score':                    st[1],
                    'status':                   st[2],
                    'miss':                     st[3],
                    'false_alarm':              st[4],
                    'reaction_time_ms':         st[5],
                    'match':                    st[6],
                    'stimuli':                  st[7],
                    'stimuli_n_1':              st[8],
                    'stimuli_n_2':              st[9].split('\n')[0],
                }
                df = df.append(dt, ignore_index=True)[dt.keys()]
        f.close()
        self.nback_data = df


    def create_corsi_data(self):
        message = """

        ------------------------------------------------------------------
                                Corsi data created
        ------------------------------------------------------------------

        """
        print(message)
        df = pd.DataFrame()
        for p in range(bp.n):
            # _____ FOR EACH PARTICIPANT -----x
            pc = bp.parti_code[p]
            pt = bp.participants[p]

            # _____ FOR EACH PARTICIPANT -----x
            f = open(self.corsi_paths[p], 'r')
            for l in f.readlines():
                st  = l.split(' ')
                dt  = {
                    'participant':              pc,
                    'participant_code':         pt,
                    'highest_span':             st[0],
                    'n_items':                  st[1],
                    'status':                   st[2].split('\n')[0],
                }
                df = df.append(dt, ignore_index=True)[dt.keys()]
        f.close()
        self.corsi_data = df



    def create_fitts_data(self):
        message = """

        ------------------------------------------------------------------
                                Fitts data created
        ------------------------------------------------------------------

        """
        print(message)
        df = pd.DataFrame()
        for p in range(bp.n):
            # _____ FOR EACH PARTICIPANT -----x
            pc = bp.parti_code[p]
            pt = bp.participants[p]

            # _____ FOR EACH PARTICIPANT -----x
            f = open(self.fitts_paths[p], 'r')
            for l in f.readlines():
                st  = l.split(' ')
                dt  = {
                    'participant':              pc,
                    'participant_code':         pt,
                    'x_loc':                    st[0],
                    'y_loc':                    st[1],
                    'size':                     st[2],
                    'distance':                 st[3],
                    'fitts_prediction':         st[4],
                    'reaction_time_ms':         st[5],
                    'status':                   st[6].split('\n')[0],
                }
                df = df.append(dt, ignore_index=True)[dt.keys()]
        f.close()
        self.fitts_data = df



    def convert_data_to_int(self):
        """Change the schema of the dataframes to include integers"""
        # converter function
        def str_to_int(df, columns):
            for c in columns: df[c] = df[c].astype(int)
            return(df)

        # convert schemas
        self.fitts_data = str_to_int(self.fitts_data, 
        ['x_loc', 'y_loc', 'size', 'distance', 'fitts_prediction', 'reaction_time_ms', 'status'])

        self.corsi_data = str_to_int(self.corsi_data, ['highest_span', 'n_items', 'status'])

        self.nback_data = str_to_int(self.nback_data, 
        ['block_number', 'score', 'status','miss', 'false_alarm', 'reaction_time_ms', 'match', 
        'stimuli','stimuli_n_1', 'stimuli_n_2'])

        self.wcst_data = str_to_int(self.wcst_data, 
        ['card_no', 'correct_card', 'correct_persevering', 'seq_no', 'card_number', 'reaction_time_ms', 'status', 
        'card_selected', 'error', 'perseverance_error', 'not_perseverance_error'])

        self.navon_data = str_to_int(self.navon_data, ['level_of_target_n', 'status', 'reaction_time_ms'])
        message="""
        ------------------------------------------------------------------
        Schemas Converted!
        ------------------------------------------------------------------
        """
        print(message)


    def write_to_pickle(self, path):
        """Write the data to pickle files"""
        try: os.mkdir(path)
        except: None

        self.fitts_data.to_pickle(path + 'fitts_data.pkl')
        self.wcst_data.to_pickle(path  + 'wcst_data.pkl')
        self.nback_data.to_pickle(path + 'nback_data.pkl')
        self.corsi_data.to_pickle(path + 'corsi_data.pkl')
        self.navon_data.to_pickle(path + 'navon_data.pkl')
        message="""
        ------------------------------------------------------------------
        Dataframes successfully written to path {}!
        ------------------------------------------------------------------
        """.format(path)
        print(message)


    def read_from_pickle(self, path):
        """Read the data to pickle files"""
        self.fitts_data = pd.read_pickle(path + 'fitts_data.pkl')
        self.wcst_data  = pd.read_pickle(path + 'wcst_data.pkl')
        self.nback_data = pd.read_pickle(path + 'nback_data.pkl')
        self.corsi_data = pd.read_pickle(path + 'corsi_data.pkl')
        self.navon_data = pd.read_pickle(path + 'navon_data.pkl')
        message="""
        ------------------------------------------------------------------
        Dataframes:

            - fitts_data
            - wcst_data
            - nback_data
            - corsi_data
            - navon_data

        Successfully read from path: \'{}\'!
        ------------------------------------------------------------------
        """.format(path)
        print(message)


    def write_class_to_pickle(self, path):
        """serialize object to pickle object"""

        #save it
        filename = path + 'batch_processing_object.pkl'
        with open(filename, 'wb') as file:
            pickle.dump(bp, file) 

        # #load it
        # with open(filename, 'rb') as file2:
        #     bp = pickle.load(file2)
        message="""
        ------------------------------------------------------------------
        Object successfully written to path: \'{}\'!

        To retrieve run:
            with open(\'{}\', 'rb') as file2:
                bp = pickle.load(file2)
        ------------------------------------------------------------------
        """.format(filename, filename)
        print(message)

In [317]:
path  = '../data/data_sample/'
path2 = '../data/data_samples_pandas/'
bp   = batch_processing(path)


bp.create_wcst_data()
bp.create_navon_data()
bp.create_nback_data()
bp.create_corsi_data()
bp.create_fitts_data()
bp.convert_data_to_int()
bp.write_to_pickle(path2)
bp.read_from_pickle(path2)
bp.write_class_to_pickle(path2)



        ------------------------------------------------------------------
                                WCST data created
        ------------------------------------------------------------------

        


        ------------------------------------------------------------------
                                Navon data created
        ------------------------------------------------------------------

        


        ------------------------------------------------------------------
                                N back data created
        ------------------------------------------------------------------

        


        ------------------------------------------------------------------
                                Corsi data created
        ------------------------------------------------------------------

        


        ------------------------------------------------------------------
                                Fitts data created
        ------------