In [2]:
import pathlib
import datetime
import sys
import re
import collections

import IPython.display
print_md = IPython.display.Markdown

import pandas as pd
import numpy as np
import bokeh
import bokeh.plotting as bkh
import bokeh.models as bkhmodels
bkh.output_notebook()

# Run1.2i Initial Processing

A look at the logs from coadd and multiband run on Edison (coadd, multiband) and KNL (calexp) in October, 2018

The coadd jobs ran on 16 cores.  Each coadd job ran on one tract and one filter.

Multiband jobs were run on 20 cores.  Each multiband job, ran on 4 patches of one tract, across all filters.

In [3]:
# Location at NERSC

df = pd.DataFrame()

#df = df.append(pd.read_json('/global/cscratch1/sd/heatherk/Run1.2i/processinglogs/edison/coadd/orglogs/coadd.json', lines=True))
#df = df.append(pd.read_json('/global/cscratch1/sd/heatherk/Run1.2i/processinglogs/edison/multi/logs/multi.json', lines=True))
df = df.append(pd.read_json('./coadd.json', lines=True))
df = df.append(pd.read_json('./multi.json', lines=True))
df = df.append(pd.read_json('./calexp.json', lines=True))

    
rows = df.shape[0]
if rows == 0:
    raise Exception("no job records could be loaded")
    
print_md(f'Loaded **{rows}** job accounting records')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


Loaded **2148** job accounting records

In [4]:
num_rows, num_columns = df.shape
columns = sorted(list(df))
print_md(f'Dataframe size: **{num_rows} rows, {num_columns} columns:**\n\n `{", ".join(columns)}`')

Dataframe size: **2148 rows, 14 columns:**

 `band, cores, elapsed_time, elapsed_time_seconds, exit_status, job, max_mem_gbytes, max_mem_kbytes, patch, run, system_time_sec, tract, user_time_sec, visit`

# Looking at min, median, and max elapsed times and memory

## Total elapsed time in hours for calexp + coadd + multiband

In [5]:
(df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"elapsed_time_seconds"].sum() + df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"].sum()+df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"elapsed_time_seconds"].min())/3600

5516.9183333333331

## Calexp elapsed time in hours

In [6]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"elapsed_time_seconds"].min()/3600

0.035277777777777776

In [7]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"elapsed_time_seconds"].median()/3600

2.117777777777778

In [8]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"elapsed_time_seconds"].max()/3600

14.396944444444445

In [9]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"elapsed_time_seconds"].sum()/3600

5220.745277777778

## calexp memory use in gigabytes

In [10]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"max_mem_gbytes"].min()

0.42854799999999998

In [11]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"max_mem_gbytes"].median()

0.45078

In [12]:
df.loc[(df['exit_status']==0)&(df['job']=='calexp'),"max_mem_gbytes"].max()

1.7376960000000001

## Coadd elapsed time in hours

In [13]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"].min()/3600

0.12722222222222221

In [14]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"].median()/3600

2.144722222222222

In [15]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"].max()/3600

8.8755555555555556

In [16]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"].sum()/3600

296.14916666666664

## coadd memory use in gigabytes

In [17]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"max_mem_gbytes"].min()

3.411292

In [18]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"max_mem_gbytes"].median()

4.05598

In [19]:
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"max_mem_gbytes"].max()

6.1644480000000001

## Multiband elapsed time in hours

In [20]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"elapsed_time_seconds"].median()/3600

0.4865277777777778

In [21]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"elapsed_time_seconds"].max()/3600

2.013611111111111

In [22]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"elapsed_time_seconds"].min()/3600

0.02388888888888889

In [23]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"elapsed_time_seconds"].sum()/3600

175.85166666666666

## Multiband memory use in gigabytes

In [24]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"max_mem_gbytes"].min()

0.52136799999999994

In [25]:
df.loc[(df['exit_status']==0)&(df['job']=='multiband'),"max_mem_gbytes"].median()

1.181486

In [26]:
df.loc[(df['job']=='multiband'),"max_mem_gbytes"].max()

2.2215639999999999

In [27]:
def format_pct_hours(v):
    """Format v to be displayed as a percentile, in hours. Argument v is expected to be seconds.
    """
    return "<1" if v < 3600.0 else f'{int(v/3600.0)}'

def format_pct_min(v):
    """Format v to be displayed as a percentile, in minutes. Argument v is expected to be seconds.
    """
    return "<1" if v < 60.0 else f'{int(v/60.0)}'

def style_figure(fig, x_font_style='bold', x_label_standoff=15, y_font_style='bold', y_label_standoff=15):
    """Style some attributes of a Bokeh figure
    """
    # Figure size (in screen units)
    fig.plot_width = 800
    fig.plot_height = 600
    
    # Autohide figure toolbar
    #fig.toolbar.autohide = True

    # Figure background fill color and alpha
    fig.background_fill_color = 'whitesmoke'
    fig.background_fill_alpha = 0.8
    
    # Axis properties
    fig.xaxis.axis_label_text_font_style = x_font_style
    fig.xaxis.axis_label_standoff =  x_label_standoff
    fig.yaxis.axis_label_text_font_style = y_font_style
    fig.yaxis.axis_label_standoff =  y_label_standoff

In [28]:
def format_failed_pct(v):
    return '{:.0%}'.format(v) if v > 0.0 else 'n/a'

def format_job_count(count):
    return '{:,}'.format(count)

d = {}
bands = df.groupby(['band']).size().sort_values(ascending=False)
for b in bands.index:
    job_count = bands[b]
    succeeded = df.loc[(df['band'] == b) & (df['exit_status'] == 0)].shape[0]
    d[b] = {
        'job_count': job_count,
        'succeeded': succeeded/job_count if job_count > 0 else 0,
        'failed':    (job_count - succeeded)/job_count if job_count > 0 else 0,
    }

table = """
### Percentage of failed jobs (`exit_status != 0`)

| Band | Job Count | Succeeded | Failed |
| ----: | --------: | --------: | -----: |
"""

for k, v in d.items():
    table += f'| **{k}** | {format_job_count(v["job_count"])} | {format_failed_pct(v["succeeded"])} | {format_failed_pct(v["failed"])} |\n'

table += """
*The table above shows the fraction of succeeded and failed jobs per band. A job is considered failed if its `exit_status` is not zero.*
*g^i^r^u^y^z indicates multiband, while the single band jobs were coadds.*
* where no band is indicated, it is a calexp job.*
"""    
print_md(table)


### Percentage of failed jobs (`exit_status != 0`)

| Band | Job Count | Succeeded | Failed |
| ----: | --------: | --------: | -----: |
| **NA** | 1,726 | 99% | 1% |
| **g^i^r^u^y^z** | 320 | 100% | n/a |
| **y** | 21 | 100% | n/a |
| **r** | 21 | 100% | n/a |
| **z** | 20 | 100% | n/a |
| **u** | 20 | 95% | 5% |
| **i** | 20 | 100% | n/a |

*The table above shows the fraction of succeeded and failed jobs per band. A job is considered failed if its `exit_status` is not zero.*
*g^i^r^u^y^z indicates multiband, while the single band jobs were coadds.*
* where no band is indicated, it is a calexp job.*


In [29]:
fig = bkh.figure(
    title = 'DISTRIBUTION OF JOB EXECUTION WALLCLOCK TIME CALEXP ON KNL',
    x_axis_label = 'execution wallclock time (hours)',
    y_axis_label = 'job count'
)


style_figure(fig)
color, alpha = 'darkseagreen', 0.7
hist, edges = np.histogram(df.loc[(df['exit_status']==0)&(df['job']=='calexp'),'elapsed_time_seconds']/3600, density=False, bins=40)
fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha=alpha, fill_color=color, line_color=color)
fig.yaxis.formatter = bkhmodels.formatters.NumeralTickFormatter(format="0,0")
fig.add_tools(bkhmodels.HoverTool(
    tooltips = [
        ('jobs', '@top{0,000}'),
        ('low',  '@left{0.0} hours'),
        ('high', '@right{0.0} hours'),
    ],
    mode = 'mouse',
))
bkh.show(fig)

In [30]:
fig = bkh.figure(
    title = 'DISTRIBUTION OF JOB EXECUTION WALLCLOCK TIME COADDS ON EDISON',
    x_axis_label = 'execution wallclock time (hours)',
    y_axis_label = 'job count'
)


style_figure(fig)
color, alpha = 'darkseagreen', 0.7
df.loc[(df['exit_status']==0)&(df['job']=='coadd'),"elapsed_time_seconds"]
hist, edges = np.histogram(df.loc[(df['exit_status']==0)&(df['job']=='coadd'),'elapsed_time_seconds']/3600, density=False, bins=40)
fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha=alpha, fill_color=color, line_color=color)
fig.yaxis.formatter = bkhmodels.formatters.NumeralTickFormatter(format="0,0")
fig.add_tools(bkhmodels.HoverTool(
    tooltips = [
        ('jobs', '@top{0,000}'),
        ('low',  '@left{0.0} hours'),
        ('high', '@right{0.0} hours'),
    ],
    mode = 'mouse',
))
bkh.show(fig)

In [31]:
fig = bkh.figure(
    title = 'DISTRIBUTION OF JOB EXECUTION WALLCLOCK TIME MULTIBAND ON EDISON',
    x_axis_label = 'execution wallclock time (hours)',
    y_axis_label = 'job count'
)


style_figure(fig)
color, alpha = 'darkseagreen', 0.7
hist, edges = np.histogram(df.loc[(df['exit_status']==0)&(df['job']=='multiband'),'elapsed_time_seconds']/3600, density=False, bins=40)
fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha=alpha, fill_color=color, line_color=color)
fig.yaxis.formatter = bkhmodels.formatters.NumeralTickFormatter(format="0,0")
fig.add_tools(bkhmodels.HoverTool(
    tooltips = [
        ('jobs', '@top{0,000}'),
        ('low',  '@left{0.0} hours'),
        ('high', '@right{0.0} hours'),
    ],
    mode = 'mouse',
))
bkh.show(fig)

In [32]:
successful_jobs = df.loc[(df['exit_status'] == 0)&(df['job']!='calexp')]

# Create the figure
fig = bkh.figure(
    title = 'DISTRIBUTION OF MEMORY UTILIZATION OF COADD AND MULTIBAND',
    x_axis_label = 'gigabyte',
    y_axis_label = 'job count'
)
style_figure(fig)

# Build a histogram per band
colors = ('dodgerblue', 'darkmagenta', 'mediumseagreen', 'orange', 'crimson', 'sienna')
bands = successful_jobs.groupby(['band']).size().sort_values(ascending=False)
for b, color in zip(bands.index, colors):
    # Select the jobs in this band which wallclock time was higher than the 5th percentile,
    # to exclude jobs which may have terminated early
    job_set = successful_jobs.loc[successful_jobs['band'] == b]
    wallclock_threshold = np.percentile(job_set['elapsed_time_seconds'], [5])[0]
    memory = job_set.loc[job_set['elapsed_time_seconds'] >= wallclock_threshold, 'max_mem_gbytes']
    hist, edges = np.histogram(memory, density=False, bins=20)
    fig.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], alpha= 0.5, fill_color=color, line_color=color, legend=b)

# Finalize the figure
fig.legend.click_policy = 'hide'
fig.yaxis.formatter = bkhmodels.formatters.NumeralTickFormatter(format="0,0")
fig.add_tools(bkhmodels.HoverTool(
    tooltips = [
        ('jobs', '@top{0,000}'),
        ('low',  '@left{0.0} GB'),
        ('high', '@right{0.0} GB'),
    ],
    mode = 'mouse',
))
bkh.show(fig)
print_md(f"""*The figure above shows the distribution of RAM the jobs (in gigabyte). Click on the legend to hide / show information.*""")

*The figure above shows the distribution of RAM the jobs (in gigabyte). Click on the legend to hide / show information.*

In [None]:
def pandas_df_to_markdown_table(df):
    from IPython.display import Markdown, display
    fmt = ['---' for i in range(len(df.columns))]
    df_fmt = pd.DataFrame([fmt], columns=df.columns)
    df_formatted = pd.concat([df_fmt, df])
    display(Markdown(df_formatted.to_csv(sep="|", index=False)))

pandas_df_to_markdown_table(df)