In [1]:
import os
import re
import pandas as pd
import numpy as np


In [2]:
#job_number = '(2546846|2547047|2545083|2545195|2538358|2538714|2538714|2538714)'
job_number = 'hcp1st-25\d{5}_'
log_dir = '../log/'
job_re = re.compile(r'.*{}.*'.format(job_number))
scan_re = re.compile(r'.*LevelOnefMRIName:.*(tfMRI_(?:CARIT|GUESSING)_(?:AP|PA)).*')
PID_re = re.compile(r'.*[_/](HCD.*?)[_/].*')
parcellated_re = re.compile(r'^Running (parcellated) analysis$')
logfiles = [os.path.join(log_dir, f) for f in os.listdir(log_dir) if job_re.match(f)]

In [3]:
len(logfiles)

7260

In [4]:
error_strings = {"F-test" : "F-test 2 isn't valid",
                "dtseries" : "ERROR: failed to open file",
                "csv" : "ls: cannot access"}
def error_info(error_strings, f):
    if type(error_strings) not in [dict, list]:
        raise TypeError("error_strings is not a list")
    if type(error_strings) is dict:
        error_strings = list(error_strings.values())
    errors = []
    scan = ''
    PID = ''
    parcellated = ''
    for line in f:
        if(len(PID) is 0):
            PID_match = PID_re.match(line)
            if PID_match:
                PID = PID_match.group(1)
        if(any([e for e in error_strings if e in line])):
            errors.append(line)
        m = scan_re.match(line)
        if m:
            scan = m.group(1)
        parcellated_match = parcellated_re.match(line)
        if parcellated_match:
            parcellated = parcellated_match.group(1)
    return(errors, scan, parcellated, PID)
def error_types(error_strings, errors):
    if type(error_strings) not in [dict, list]:
        raise TypeError("error_strings is not a list")
    if type(error_strings) is dict:
        error_strings = list(error_strings.values())
    error_types = [0, 0, 0]
    for error in errors:
        ii = [i for i, e in enumerate(error_strings) if e in error]
        for i in ii:
            error_types[i] += 1
    return(error_types) 

In [5]:
errors = []
for logfile in logfiles:
    with open(logfile) as f:
        these_errors, scan_with_error, parcellated, PID_with_error = error_info(error_strings, f)
        these_error_types = error_types(error_strings, these_errors)
        errors.append([ logfile, len(these_errors), these_errors, *these_error_types, scan_with_error, parcellated, PID_with_error])

In [6]:
df = pd.DataFrame(errors, columns = ['logfile', 'n_errors', 'errors', *error_strings.keys(), 'scan', 'parcellated', 'PID'])
df.n_errors = [len(np.unique(err)) for err in df.errors]
df = df.drop(df[df.n_errors == 0].index)

In [7]:
df.to_csv('l1_errors.csv')

In [8]:
errs=df[df.PID == 'HCD2996590'].errors

In [9]:
errs.array[0][0]

"ls: cannot access '/ncf/hcp/data/HCD-tfMRI-MultiRunFix/HCD2996590_V1_MR/MNINonLinear/Results/tfMRI_CARIT_PA/tfMRI_CARIT_PREPOT_hp200_s4_level1_hp0_clean_ColeAnticevic.feat/GrayordinatesStats/cope*.dtseries.nii': No such file or directory\n"

In [10]:
df

Unnamed: 0,logfile,n_errors,errors,F-test,dtseries,csv,scan,parcellated,PID
90,../log/hcp1st-2536947_85.out,1,[ls: cannot access '/ncf/hcp/data/intradb_mult...,0,0,1,,,HCD1324839
324,../log/hcp1st-2536947_108.out,1,[ls: cannot access '/ncf/hcp/data/intradb_mult...,0,0,1,,,HCD1569970
326,../log/hcp1st-2536947_110.out,1,[ls: cannot access '/ncf/hcp/data/intradb_mult...,0,0,1,,,HCD1646760
334,../log/hcp1st-2536947_118.out,1,[ls: cannot access '/ncf/hcp/data/intradb_mult...,0,0,1,,,HCD1742251
382,../log/hcp1st-2536947_151.out,1,[ls: cannot access '/ncf/hcp/data/intradb_mult...,0,0,1,,,HCD2111625
...,...,...,...,...,...,...,...,...,...
7255,../log/hcp1st-2546846_1424.out,2,[ls: cannot access '/ncf/hcp/data/HCD-tfMRI-Mu...,0,0,2,tfMRI_CARIT_AP,parcellated,HCD2990073
7256,../log/hcp1st-2546846_1425.out,2,[ls: cannot access '/ncf/hcp/data/HCD-tfMRI-Mu...,0,0,2,tfMRI_CARIT_AP,parcellated,HCD2992784
7257,../log/hcp1st-2546846_1426.out,2,[ls: cannot access '/ncf/hcp/data/HCD-tfMRI-Mu...,0,0,2,tfMRI_CARIT_AP,parcellated,HCD2993079
7258,../log/hcp1st-2546846_1427.out,2,[ls: cannot access '/ncf/hcp/data/HCD-tfMRI-Mu...,0,0,2,tfMRI_CARIT_AP,parcellated,HCD2995083
