# JATOS oMST Analysis
JATOS spits out a large text file that is quite close to a JSON file. It's really close to a JSON array or list format file in that each row is a JSON formatted object. Even knowing this, though, there's a ton of data in here.

The first code block here sets up a bunch of functions that try to parse out the log files. It's likely more complex than you'll need, but the added complexity shouldn't hurt you.  The idea is that there can be multiple MST runs inside of a single subject's session and that this can include both the traditional study-test format and continuous format data. It'll create output files for each of these.

The basic steps are:
- Update the `fname` variable in the last panel to let this code find your JATOS output
- Update the output filename you want to use for the large trial-by-trial, all-subjects output on both the `extract_data` and `compute_std_OSNmetrics` lines
- Run both cells below

What it's doing is to first load and clean-up the JATOS output (`load_data`). Then, it runs through that output, looking for tasks whose data you want analyzed (`extract_data`). Use the flags on `extract_data` to tell it if you want the output to include any study sessions, any test sessions (these for study-test format data), and any continuous-format data (e.g., the oMST).  Also, if you want to remove any subjects (e.g., your testing subjects), you can pull them from the lists of subject ids before calling `extract_data`. The output here is a large CSV file that has trialwise data for all subjects. You can process this on your own if you like, but the next step is typically having `compute_std_OSNmetrics` load up that file and create the LDI and REC measures from the data.

In [10]:
import pandas as pd
import numpy as np
import sys, os, json
from datetime import date
#from scipy.stats import norm

def load_data(filename):
    # Updated 10/13/22 to bring in the 2nd lines and to give a list of sids it finds
    fp = open(filename,"r")
    print(filename,os.getcwd())
    rawdata=fp.readlines()
    jsdata=[]
    sidlist=[]
    for d in rawdata:
        if d[0] == '[' :  # Remove the non-list items
            tmpdata=json.loads(d)
            jsdata.append(tmpdata)
            sid=tmpdata[0]['subject']
            sidlist.append(sid)
        elif d[0] == "{": # Allow this back in', 'but make it into a list
            jsdata.append(json.loads('[ ' + d + ' ]'))
        # elif d.startswith("Consent"):
        #     sid=d[d.find("ID=")+3:].split()[0]
        #     #print('sid=',sid)
        #     try:
        #         if int(sid) < 500:  # We used to have URL SID be the correct thing -- try it
        #             urlsid=d[d.find("ID=")+3:].split()[2][3:]
        #             #print('urlsid=',urlsid)
        #             if (urlsid != 'undefined') and (int(urlsid)>1000):
        #                 #print('swapping ',sid,urlsid)
        #                 sid=urlsid
        #             else:
        #                 print('not swapping',sid,urlsid)
        #     except:
        #         print('issue: ',sid,d)
        #     sidlist.append(sid)
    return jsdata,sidlist

def extract_data(jsd,sidlist,outfile,verbose=False,include_study=True,include_test=True,include_continuous=True):  
    with open (outfile, "w") as fout:
        outln='sid,task,subtask,orderf,trial,stimulus,selfpaced,set,type,response,rt,correct,correct_resp,truth,resp,lureBin'
        fout.write(outln + '\n')
        for i,taskdata in enumerate(jsd):
            if 'task' in taskdata[0].keys():
                d_sid=taskdata[0]['subject']
                if verbose:
                    print(d_sid,len(taskdata))
                if d_sid not in sidlist:
                    if verbose:
                        print('Found data for ',i,d_sid, 'but skipping - not in list')
                    continue
                if include_study and taskdata[0]['task']=='msts':
                    d_task='MST_Study'
                    if len(taskdata) >=130: # Full length - instructions included
                        d_subtask="Full-128"
                    elif len(taskdata) == 66: # Short version
                        d_subtask="Reduced-64"
                    else:
                        d_subtask="Unk-"+str(len(taskdata))
                        print(d_subtask)
                    if verbose:
                        print(outln)
                    trial=0
                    for line in taskdata:
                        if 'condition' in line.keys(): # Actual study trilas have this
                            trial+=1
                            if line['rt'] is None:
                                d_rt = -1
                            else:
                                d_rt=line['rt']
                            if 'response' not in line.keys() or line['response'] is None:
                                line['response']=-1
                            if 'selfpaced' not in line.keys():
                                line['selfpaced']=-1
                            if 'nstimper' not in line.keys():
                                line['nstimper']=-1
                            try:
                                outln="{},{},{},{},{},{},{},{},{},{},{:.0f}".format(line['subject'],d_task,d_subtask,
                                    line['orderfile'],trial,line['stimulus'],line['selfpaced'],line['set'],
                                    line['condition'],line['response'],d_rt)
                                fout.write(outln + '\n')
                            except Exception as e:
                                print('ERROR: ',line)
                                print(e)   
                            
                            if verbose:
                                print(outln)
                elif include_test and taskdata[0]['task']=='mstt':
                    d_task='MST_Test'
                    if len(taskdata) >= 194: # Full length - instructions included
                        d_subtask="Full-192"
                    elif len(taskdata) == 86: # Short version
                        d_subtask="Reduced-84"
                    else:
                        d_subtask="Unk-"+str(len(taskdata))
                        print(d_subtask)
                    if verbose:
                        print(outln)
                    trial=0
                    for line in taskdata:
                        if 'condition' in line.keys(): # Actual trials have this
                            trial+=1
                            # resp/truth: We want this in 1=old/targ', '2=new/foil', '3=lure/sim for ML's format
                            if line['condition']=='TF':
                                d_truth=2
                            elif line['condition']=='TR':
                                d_truth=1
                            elif line['condition']=='TL':
                                d_truth=3
                            else:
                                d_truth=0
                            if line['resp']=='o':
                                d_resp=1
                            elif line['resp']=='n':
                                d_resp=2
                            elif line['resp']=='s':
                                d_resp=3
                            else:
                                d_resp=0
                                line['resp'] = ''  # This helps with no-responses from timeouts
                            if 'lbin' not in line.keys():
                                line['lbin']=0
                            if line['rt'] is None:
                                line['rt'] = -1
                            if 'selfpaced' not in line.keys():
                                line['selfpaced']=-1
                            if 'nstimper' not in line.keys():
                                line['nstimper']=-1
                            try:
                                outln="{},{},{},{},{},{},{},{},{},{},{:.0f},{},{},{},{},{}".format(line['subject'],d_task,d_subtask,
                                    line['orderfile'],trial,line['stimulus'],line['selfpaced'],line['set'],
                                    line['condition'],line['resp'],line['rt'],line['correct'],line['correct_response'],
                                    d_truth,d_resp,line['lbin'])
                                fout.write(outln + '\n')
                            except Exception as e:
                                print('ERROR: ',line)
                                print(e)
                            if verbose:
                                print(outln)
                elif include_continuous and (taskdata[0]['task'] == 'cMSTCont' or taskdata[0]['task'] == 'contmst' or taskdata[0]['task'] == 'oMSTCont'):
                    d_task='ContMST'
                    if len(taskdata) == 258: # Full length - instructions included
                        d_subtask="Full-256"
                    elif len(taskdata) <= 131: # Short version
                        d_subtask="oMST"
                    else:
                        d_subtask="Unk-"+str(len(taskdata))
                        print(d_subtask)
                    if verbose:
                        print(outln)
                    trial=0
                    for line in taskdata:
                        if 'condition' in line.keys(): # Actual trials have this
                            trial+=1
                            # resp/truth: We want this in 1=old/targ', '2=new/foil', '3=lure/sim for ML's format
                            if line['condition']=='foil':
                                d_truth=2
                            elif line['condition']=='target':
                                d_truth=1
                            elif line['condition']=='lure':
                                d_truth=3
                            else:
                                d_truth=0
                            if line['resp']=='o':
                                d_resp=1
                            elif line['resp']=='n':
                                d_resp=2
                            elif line['resp']=='s':
                                d_resp=3
                            else:
                                d_resp=0
                            if 'lbin' not in line.keys():
                                line['lbin']=0
                            if line['rt'] is None:
                                line['rt'] = -1
                            if 'selfpaced' not in line.keys():
                                line['selfpaced']=-1
                            if 'nstimper' not in line.keys():
                                line['nstimper']=-1
                            try:
                                outln="{},{},{},{},{},{},{},{},{},{},{:.0f},{},{},{},{},{}".format(line['subject'],d_task,d_subtask,
                                    line['orderfile'],trial,line['stimulus'],line['selfpaced'],line['set'],
                                    line['condition'],line['resp'],line['rt'],line['correct'],line['correct_response'],
                                    d_truth,d_resp,line['lbin'])
                                fout.write(outln + '\n')
                            except Exception as e:
                                print('ERROR: ',line)
                                print(e)
                            
                            if verbose:
                                print(outln)

def compute_std_OSNmetrics(fname):
    """
    Pass in a long-format JAGSian file with our OSN data and it'll compute the original metrics for
    each subject
    """
    try:
        df=pd.read_csv(fname)
        # Remap common variants on headings
        if ('bin' in df.columns) and ('lureBin' not in df.columns):
            df.rename(columns={'bin':'lureBin'},inplace=True)
        if ('resp' in df.columns) and ('decisionOSN' in df.columns): # resp here is osn and decisonOSN is our code
            df.rename(columns={'resp':'resp_txt'},inplace=True)
            df.rename(columns={'decisionOSN':'resp'},inplace=True)
        if ('participant' in df.columns) and ('sid' not in df.columns):
            df.rename(columns={'participant':'sid'},inplace=True)
    except:
        ValueError('Error reading file: ' + fname)
    outname=os.path.join(os.path.dirname(fname), 'OrigMetrics_'+os.path.basename(fname))
    sids=df['sid'].unique()
    summarydf=pd.DataFrame(columns=['sid','rec','ldi','nsim'],index=sids)
    summarydf['sid']=sids
    #print (df['sid'])
    for s in sids:
        #print (s)
        subjdf=df[df['sid']==s]
        #print(subjdf)
        subjdf=df[(df['sid']==s) & (df['resp'] >=1)]  # Factor out no-response trials
        targ=subjdf[subjdf['truth']==1]
        foil=subjdf[subjdf['truth']==2]
        lure=subjdf[subjdf['truth']==3]
        try:
            p_O_T= np.count_nonzero(targ['resp']==1) / len(targ)
            p_O_F= np.count_nonzero(foil['resp']==1) / len(foil)
            p_S_L= np.count_nonzero(lure['resp']==3) / len(lure)
            p_S_F= np.count_nonzero(foil['resp']==3) / len(foil)
            nsim=np.count_nonzero(subjdf['resp']==3)
            summarydf.at[s,'rec']=p_O_T-p_O_F
            summarydf.at[s,'ldi']=p_S_L-p_S_F
            summarydf.at[s,'nsim']=nsim
        except:
            print('issue on ',s,'skipping')
            print('len',len(targ),len(foil),len(lure))
            #print('count',np.count_nonzero(targ['resp']==1),np.count_nonzero(foil['resp']==1),np.count_nonzero(lure['resp']==3),p_S_F= np.count_nonzero(foil['resp']==3))
        #print(s,'Old New Sim')
        #print('targets',np.count_nonzero(targ['resp']==1),np.count_nonzero(targ['resp']==2),np.count_nonzero(targ['resp']==3))
        #print('lures',np.count_nonzero(lure['resp']==1),np.count_nonzero(lure['resp']==2),np.count_nonzero(lure['resp']==3))
        #print('foils',np.count_nonzero(foil['resp']==1),np.count_nonzero(foil['resp']==2),np.count_nonzero(foil['resp']==3))

    summarydf.to_csv(outname)
    return summarydf




In [12]:
# Enter the path (aboslute or relative) to your jatos results file here
# Note for Windows users -- use / and not \ or be Pythonic and use os.join()
fname="C:/Users/craig/Downloads/jatos_results_data_20231011225544.txt"

# This part loads the data, returning a cleaned "jsdata" (json format data) and a list of all subject IDs it found
jsdata,allsids=load_data(fname)

# This step now extracts the data you want and saves it in a CSV formatted file.  For the oMST, you'll just enable
# the "use_continuous"
extract_data(jsdata,allsids,'C:/Users/craig/Downloads/jatos_results_oMST.csv',include_study=False,include_test=False,include_continuous=True)

# Now that we have a big CSV file with all the subjects' trialwise data, load that file and compute our standard 
# old/similar/new metrics:
compute_std_OSNmetrics('C:/Users/craig/Downloads/jatos_results_oMST.csv')


C:/Users/craig/Downloads/jatos_results_data_20231011225544.txt c:\Users\craig\dev\jatos_win_java\study_assets_root\oMST\misc


Unnamed: 0,sid,rec,ldi,nsim
1,1,0.2,0.491477,34
