In [None]:
import pandas
import glob
import os
import gzip
import configparser
import matplotlib
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

font = {'family' : 'normal',
        'size'   : 18}

matplotlib.rc('font', **font)

# show all columns in outputs
pandas.set_option('display.max_columns', None)

In [None]:
#gc_workdir = "../hlt_raw_processing_evp_scripts_phase2_cosmics_reco_monitor_novxd/work.hlt_raw_processing/"
gc_workdir = "../hlt_raw_processing_expressreco_phase2_cosmics_reco/work.hlt_raw_processing/"

In [None]:
def read_app_log(fname, fname_uncomp):
    # extract app log files, if present
    if not os.path.isfile(fname_uncomp) and os.path.isfile(fname):
        print("Uncompressing {}".format(fname))
        with open(fname_uncomp, 'wb+') as f_out, gzip.open(fname, 'rb') as f_in:
            shutil.copyfileobj(f_in, f_out)

    app_stderr = ""
    if os.path.isfile(fname_uncomp):
        with open(fname_uncomp, 'r') as f:
            line_count = 0
            for line in f:
                line_count += 1
                app_stderr = app_stderr + str(line)
    return app_stderr

def gc_parse_jobresults(folder):
    job_folders = glob.glob(os.path.join(gc_workdir, "output") + "/job_*/")
    
    df = pandas.DataFrame()
    
    for job_folder in job_folders:
        job_info_file = os.path.join(job_folder, "job.info")
        gc_stdout_file = os.path.join(job_folder, "gc.stdout")
        app_stdout_file_uncomp = os.path.join(job_folder, "job.stdout")
        app_stdout_file = app_stdout_file_uncomp + ".gz"
        app_stderr_file_uncomp = os.path.join(job_folder, "job.stderr")
        app_stderr_file = app_stderr_file_uncomp + ".gz"
        
        myvars = {}
        with open(job_info_file) as myfile:
            for line in myfile:
                name, var = line.partition("=")[::2]
                myvars[name.strip()] = var.strip()

        job_id = int(myvars["JOBID"])
        job_exitcode = int(myvars["EXITCODE"])
                
        input_file_name = None
        # read GC output file
        if os.path.isfile(gc_stdout_file):
            with open(gc_stdout_file) as myfile:
                for line in myfile:
                    if line.startswith("export FILE_NAMES"):
                        name, var = line.partition("=")[::2]
                        input_file_name = var.strip('"')

        app_stderr = read_app_log( app_stderr_file, app_stderr_file_uncomp)
        app_stdout = read_app_log( app_stdout_file, app_stdout_file_uncomp)        
        
        ser = pandas.Series({"exitcode" : job_exitcode,
                             "jobid": job_id,
                             "input_file_name": input_file_name,
                             "app_stderr_lines" : app_stderr.count("\n"),
                             "app_stderr" : app_stderr,
                             "app_stdout_lines" : app_stdout.count("\n"),
                             "app_stdout" : app_stdout})
        df = df.append(ser, ignore_index=True)                 
    
    return df

In [None]:
df = gc_parse_jobresults(gc_workdir)

In [None]:
# extract experiment and run numbers from file names
df["experiment"] = df.input_file_name.str.extract('(?P<experiment>e0[0-9]*)', expand=True)
df["experiment"] = df["experiment"].str.replace("e","").astype('float')
df["run"] = df.input_file_name.str.extract('(?P<run>r0[0-9]*)', expand=True)
df["run"] = df["run"].str.replace("r","").astype('float')

In [None]:
df.run.hist(bins=50)

In [None]:
df.exitcode.hist(bins=50)

In [None]:
df.exitcode.value_counts()
# https://ekptrac.physik.uni-karlsruhe.de/trac/grid-control/wiki/ErrorCodes
# 107 	killed by batch system 
# 121 -> HUGE log file

In [None]:
df.app_stderr_lines.hist()

In [None]:
df.app_stdout_lines.hist(bins=50)

In [None]:
df_failed = df[df.exitcode != 107]#[df.exitcode != 0]
df_failed = df_failed[df_failed.exitcode != 0]
df_failed["explained"] = False
df_failed["fail_reason"] = "Unknown"

In [None]:
error_class = {"trackfit_consistency" : ["fi->checkConsistency()", True],
               "time_extract_crash" : ["extractTrackTimeFrom", True],
                "cant_open_input" : ["Couldn't open input file", False]}

for (name, val) in error_class.items():
    check_string = val[0]
    basf2_fault = val[1]
    # check for the problem in the std err
    df_failed["explained"] = df_failed["explained"] | df_failed.app_stderr.str.count(check_string) > 0
    this_count_err = df_failed.app_stderr.str.count(check_string).sum()

    # check for the problem in the std out
    df_failed["explained"] = df_failed["explained"] | df_failed.app_stdout.str.count(check_string) > 0
    this_count_out = df_failed.app_stdout.str.count(check_string).sum()
    
    # store the failure reason 
    df_failed.loc[df_failed.app_stdout.str.count(check_string) > 0,"fail_reason"] = name
    df_failed.loc[df_failed.app_stderr.str.count(check_string) > 0,"fail_reason"] = name
    
    print ("{} occurs {} times in stderr and {} in stdout. This is basf2's fault: {}".format(name, this_count_err, this_count_out, basf2_fault))

print("NOTE: there might be double counting, if the string which is searched for is two times in the strack trace.")

In [None]:
for fr in df_failed.fail_reason.unique():
    print ("## File names for fail reason {} ##".format(fr))
    for fname in df_failed[df_failed.fail_reason == fr].input_file_name:
        print(fname.replace('"',""), end='', flush=True)

In [None]:
success_range = (0, 360)
success_bins = 80

f = plt.figure(figsize=(10,10))
ax = df.run.hist(label="All completed input files", range=success_range, bins=success_bins)
df_failed_basf2fault.run.hist(label="Failed Reconstruction", range=success_range, bins=success_bins)
plt.legend()
ax.set_title("HLT Reco NoVXD Release 1.1 Experiment 2")
ax.set_xlabel("Run Number")