In [1]:
import pathlib
import datetime
import sys
import re
import collections
import json

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bokeh
import bokeh.plotting as bkh
import bokeh.models as bkhmodels
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, output_file
from bokeh.models import ColumnDataSource, HoverTool, Panel
bkh.output_notebook()

# Metacal Log JSON data

| columns             | Description                                                |
|---------------------|------------------------------------------------------------|
| tract               |                                                            |
| patch               |                                                            |
| cputime             | CPU time returned by SRS                                   |
| cputimeseconds      | CPU time returned by SRS in seconds                        |
| deblendedsources    | Number of Deblended Sources                                |
| metcalmax_success   | True if processDeblendedCoaddsMetacalMax ended successfully| 
| metacalmax_time     | Running time (minutes) of processDeblendedCoaddsMetacalMax |
| metacalmax_timeper  | Running time (seconds) per source                          |
| ngmixmax_success    | True if processDeblendedCoaddsNGMixMax ended successfully  |
| ngmixmax_time       | Running time (minutes) of processDeblendedCoaddsNGMixMax   |
| ngmixmax_timeper    | Running time (seconds) per source                          |
| maxfev              | Set to True if calls to functin has reached maxfev logged  |
| maxfevstr           | if maxfev==True, stores the full log message               |
| slots               | Number of cores used for this job, expect it is alwasy 1   |
| skiptract           | Set to True if Skipping tract message was logged           |
| skiptracttstr       | if skiptract==True, stores the full log message            |

In [3]:
# Read metacal log data
df = pd.DataFrame()
#df = pd.read_json('/global/cfs/cdirs/lsst/groups/CO/heatherk/Run2.2i/metacal/metacalEval/data/metacal_logs.json', convert_dates=False)
df = pd.read_json('../data/metacal_logs.json', convert_dates=False)

In [4]:
# Read coadd ?,?_nImage.fits data
df_coadds = pd.DataFrame()
df_coadds.append(pd.read_json('../data/g_band.json'))
df_coadds.append(pd.read_json('../data/i_band.json'))
df_coadds.append(pd.read_json('../data/r_band.json'))
df_coadds.append(pd.read_json('../data/i_band.json'))

Unnamed: 0,file,tract,patch,band,min,max,mean,median
0,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,3451,64,i,0,117,104.800922,105
1,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,3451,23,i,0,116,103.145989,103
2,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,3451,66,i,0,121,107.714958,108
3,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,3451,36,i,0,110,98.205193,98
4,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,3451,03,i,0,117,103.617903,104
...,...,...,...,...,...,...,...,...
6997,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,4858,52,i,0,127,110.580529,111
6998,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,4858,22,i,0,110,96.151189,96
6999,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,4858,44,i,0,119,103.577022,104
7000,/sps/lssttest/dataproducts/desc/DC2/Run2.2i/v1...,4858,21,i,0,114,101.456103,102


In [5]:
#with open('/global/cfs/cdirs/lsst/groups/CO/heatherk/Run2.2i/metacal/metacalEval/data/metacal_logs.json') as f:
#  data = json.load(f)
#  print(data)

In [6]:
print(df)
df['cpuminutes'] = df.apply(lambda row: row.cpuseconds/60.0, axis = 1) 
columns = sorted(list(df))
print(columns)

                                                logfile  \
0     /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
1     /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
2     /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
3     /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
4     /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
...                                                 ...   
3501  /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
3502  /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
3503  /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
3504  /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   
3505  /global/cfs/cdirs/lsst/groups/CO/heatherk/Run2...   

                               date  deblendedsources  metacalmax_success  \
0     Wed May 27 12:40:17 CEST 2020            1248.0                True   
1     Wed May 27 12:40:06 CEST 2020            1010.0                True   
2     Thu Apr 16 13:19:37 CEST 2020             962.0       

In [7]:
print(df.shape)

(3506, 32)


# CPU Time vs Number of Deblended Sources

In [8]:
df.loc[(df['metacalmax_success']==True)&(df['ngmixmax_success']==True),"deblendedsources"].max()

26383.0

In [9]:
# Focus on jobs where both processDeblendedCoaddsMetacalMax and processDeblendedCoaddsNGMixMax ran to completion successfully

successful_jobs = df.loc[(df['metacalmax_success'] == True)&(df['ngmixmax_success']==True)]

successful_jobs.loc[(successful_jobs['metacalmax_success']==True)&(successful_jobs['ngmixmax_success']==True),"metacalmax_time"].max()


824.826

In [10]:
successful_jobs.loc[(successful_jobs['metacalmax_success']==True)&(successful_jobs['ngmixmax_success']==True),"metacalmax_time"].min()


0.000828858

In [11]:
successful_jobs.loc[(successful_jobs['metacalmax_success']==True)&(successful_jobs['ngmixmax_success']==True),"metacalmax_time"].median()


2.34502

In [12]:
# shamelessly "borrowed" 

def hist_hover(dataframe, column, colors=["SteelBlue", "Tan"], bins=30, log_scale=False, show_plot=True):

    # build histogram data with Numpy
    hist, edges = np.histogram(dataframe[column], bins = bins)
    hist_df = pd.DataFrame({column: hist,
                             "left": edges[:-1],
                             "right": edges[1:]})
    hist_df["interval"] = ["%d to %d" % (left, right) for left, 
                           right in zip(hist_df["left"], hist_df["right"])]

    # bokeh histogram with hover tool
    if log_scale == True:
        hist_df["log"] = np.log(hist_df[column])
        src = ColumnDataSource(hist_df)
        plot = figure(plot_height = 600, plot_width = 600,
              title = "Histogram of {}".format(column.capitalize()),
              x_axis_label = column.capitalize(),
              y_axis_label = "Log Count")    
        plot.quad(bottom = 0, top = "log",left = "left", 
            right = "right", source = src, fill_color = colors[0], 
            line_color = "black", fill_alpha = 0.7,
            hover_fill_alpha = 1.0, hover_fill_color = colors[1])
    else:
        src = bkh.ColumnDataSource(hist_df)
        plot = bkh.figure(plot_height = 600, plot_width = 600,
              title = "Histogram of {}".format(column.capitalize()),
              x_axis_label = column.capitalize(),
              y_axis_label = "Count")    
        plot.quad(bottom = 0, top = column,left = "left", 
            right = "right", source = src, fill_color = colors[0], 
            line_color = "black", fill_alpha = 0.7,
            hover_fill_alpha = 1.0, hover_fill_color = colors[1])
    # hover tool
    hover = bkhmodels.HoverTool(tooltips = [('Interval', '@interval'),
                              ('Count', str("@" + column))])
    plot.add_tools(hover)
    # output
    if show_plot == True:
        bkh.show(plot)
    else:
        return plot



In [13]:
# There were some jobs where number of deblended sources was NaN - need to look at that, but for now, just discarding

nonan_jobs = successful_jobs.loc[(successful_jobs['deblendedsources'].notna())]

hist_hover(nonan_jobs.fillna(value=-1,axis=1),"deblendedsources", bins=100)

In [14]:
hist_hover(nonan_jobs, "cpuseconds", bins=100)
hist_hover(nonan_jobs, "cpuminutes", bins=100)

In [15]:
hist_hover(nonan_jobs, "metacalmax_time", bins=100)

In [16]:
def hist2d_hover(dataframe, xcol, ycol, title, xaxis, yaxis, colors=["SteelBlue", "Tan"], bins=30, show_plot=True):
    p = bkh.figure()
    p.scatter(x=xcol, y=ycol,
         source=dataframe,
         size=10, color='green')
    p.title.text = title
    p.xaxis.axis_label = xaxis
    p.yaxis.axis_label = yaxis
    hover = HoverTool()
    hover.tooltips=[
        ('CPUseconds', '@cpuseconds'),
        ('tract', '@tract'),
        ('patch', '@patch'),
        ('metacalMax Time (min)', '@metacalmax_time'),
        ('ngmixMax Time (min)', '@ngmixmax_time') 
    ]

    p.add_tools(hover)
    if show_plot == True:
        show(p)
    else:
        return p

In [17]:
hist2d_hover(nonan_jobs,'metacalmax_time', 'deblendedsources', "metacalMax Time vs Deblended Sources", "metacalMax Time (min)", "Number of Deblended Sources" )

In [18]:
hist2d_hover(nonan_jobs,'ngmixmax_time', 'deblendedsources', "ngmixMax Time vs Deblended Sources", "ngmixMax Time (min)", "Number of Deblended Sources" )

In [19]:
hist2d_hover(nonan_jobs,'cpuminutes', 'deblendedsources', "Total CPU Time vs Deblended Sources", "CPU Time (min)", "Number of Deblended Sources" )