# Setup 

This notebook requires the following job(s) to be present in the database. 
```
615503
```

If you haven't done so yet, please **epmt submit** those directories to the database of your choice from the shell. See **README.md**:

```
$ epmt -v submit sample/615503.tgz
INFO:epmt_cmds:submit_to_db(sample/615503.tgz,*-papiex-[0-9]*-[0-9]*.csv,False)
INFO:epmt_cmds:./job_metadata is 6772 bytes in archive
INFO:epmt_cmds:1 files to submit
INFO:epmt_cmds:1 hosts found: ['pp053-collated-']
INFO:epmt_cmds:host pp053-collated-: 1 files to import
INFO:epmt_job:Binding to DB: {'filename': 'database.sqlite', 'create_db': True, 'provider': 'sqlite'}
INFO:epmt_job:Generating mapping from schema...
INFO:epmt_job:job_tags: {'atm_res': 'c96l49', 'exp_component': 'atmos', 'exp_name': 'ESM4_historical_D151', 'script_name': 'ESM4_historical_D151_atmos_18540101', 'ocn_res': '0.5l75', 'exp_time': '18540101'}
INFO:epmt_job:Processing job id 615503
INFO:epmt_job:Creating user Jeffrey.Durachta
INFO:epmt_job:Creating job 615503
INFO:epmt_job:Creating host pp053
INFO:epmt_job:Did 1000 (37734 in file)...236.49/sec
INFO:epmt_job:Did 2000 (37734 in file)...279.28/sec
INFO:epmt_job:Did 3000 (37734 in file)...293.66/sec
```

In [1]:
# Import the needed modules
import pandas as pd
import epmt_query as eq
import epmt_outliers as eod

{'host': 'localhost', 'password': 'example', 'user': 'postgres', 'dbname': 'EPMT', 'provider': 'postgres'}


In [2]:
jobs = eq.get_jobs(fmt='pandas')
print(jobs.columns.values)
display(jobs[['jobid', 'duration', 'cpu_time', 'num_procs']])

newdf = eod.detect_outlier_jobs(jobs)
print("Outlier jobs")
display(newdf)

[u'PERF_COUNT_SW_CPU_CLOCK' 'account' u'all_proc_tags'
 u'cancelled_write_bytes' 'cpu_time' u'delayacct_blkio_time' 'duration'
 'end' 'env_changes_dict' 'env_dict' 'exitcode' u'guest_time' u'inblock'
 'info_dict' u'invol_ctxsw' 'jobid' 'jobname' 'jobscriptname' u'majflt'
 u'minflt' u'num_procs' u'num_threads' u'outblock' 'ppr' u'processor'
 'queue' u'rchar' u'rdtsc_duration' u'read_bytes' u'rssmax' 'sessionid'
 'start' 'submit' u'syscr' u'syscw' u'systemtime' 'tags' u'time_oncpu'
 u'time_waiting' u'timeslices' 'updated_at' 'user' u'user+system'
 u'usertime' u'vol_ctxsw' u'wchar' u'write_bytes']


Unnamed: 0,jobid,duration,cpu_time,num_procs
0,615503,2266020000.0,1327237000.0,35483
1,kernel-build-20190610-081150,232336400.0,591514700.0,10601


Outlier jobs


Unnamed: 0,jobid,duration,cpu_time,num_procs
0,615503,False,False,False
1,kernel-build-20190610-081150,False,False,False


In [14]:
jobs = eq.get_jobs(fmt='terse')
print('jobs: ' + str(jobs))

tags = eq.get_unique_process_tags(exclude = ['op_instance', 'op_sequence', 'operation_count', 'instance'], fold=False)
print('tags:' + str(tags))
print

ops = eq.agg_metrics_by_tags(jobs=jobs,tags=tags,fmt='pandas')
print(ops.columns.values)

# Should be jobid
ops.rename(columns={'job':'jobid'},inplace=True)
                
display(ops[['jobid','tags', 'duration','exclusive_cpu_time','num_procs']])


newdfo = eq.detect_outlier_ops(ops)
print("Outlier operations")
display(newdfo)

jobs: [u'615503', u'kernel-build-20190610-081150']
tags:[{u'op': u'cp'}, {u'op': u'dmput'}, {u'op': u'fregrid'}, {u'op': u'hsmget'}, {u'op': u'mv'}, {u'op': u'ncatted'}, {u'op': u'ncks'}, {u'op': u'ncrcat'}, {u'op': u'plevel'}, {u'op': u'rm'}, {u'op': u'splitvars'}, {u'op': u'timavg'}, {u'op': u'untar'}, {u'operation': u'build'}, {u'operation': u'configure'}, {u'operation': u'download'}, {u'operation': u'extract'}]

[u'PERF_COUNT_SW_CPU_CLOCK' u'cancelled_write_bytes'
 u'delayacct_blkio_time' 'duration' 'exclusive_cpu_time' u'guest_time'
 u'inblock' u'invol_ctxsw' 'job' u'majflt' u'minflt' 'num_procs'
 'num_tids' u'outblock' u'processor' u'rchar' u'rdtsc_duration'
 u'read_bytes' u'rssmax' u'syscr' u'syscw' u'systemtime' 'tags'
 u'time_oncpu' u'time_waiting' u'timeslices' u'user+system' u'usertime'
 u'vol_ctxsw' u'wchar' u'write_bytes']


Unnamed: 0,jobid,tags,duration,exclusive_cpu_time,num_procs
0,615503,{u'op': u'cp'},122453800.0,40980648.0,3902
1,615503,{u'op': u'dmput'},2286065000.0,12167869.0,272
2,615503,{u'op': u'fregrid'},399781300.0,396146734.0,38
3,615503,{u'op': u'hsmget'},2863656000.0,270022419.0,15948
4,615503,{u'op': u'mv'},1142269000.0,129375864.0,3393
5,615503,{u'op': u'ncatted'},33003460.0,33910501.0,3210
6,615503,{u'op': u'ncks'},51725280.0,45118505.0,720
7,615503,{u'op': u'ncrcat'},107993500.0,93014605.0,282
8,615503,{u'op': u'plevel'},208466600.0,105212285.0,2592
9,615503,{u'op': u'rm'},59274200.0,27965861.0,2772


AttributeError: 'module' object has no attribute 'detect_outlier_ops'

In [55]:
procs = eq.get_procs(list(jobs.jobid), fmt='pandas')
print(procs.columns.values)
print(procs['exename'].count())

def detect_outlier_processes(processes, trained_model=None, features=['duration','exclusive_cpu_time']):
    retval = pd.DataFrame(columns=features, index=processes.index)
    for c in features:
        outlier_rows = eod.outliers_iqr(processes[c])[0]
        print(c,outlier_rows)
        retval.loc[outlier_rows,c] = True
    retval['id'] = processes['id']
    retval['exename'] = processes['exename']
    retval['tags'] = processes['tags']
    retval = retval[['id','exename','tags']+features]
    return retval

olps = detect_outlier_processes(procs)
display(procs[['id','exename','tags','duration','exclusive_cpu_time']].tail(20))
olps[['id','exename','tags','duration','exclusive_cpu_time']].tail(20)


['PERF_COUNT_SW_CPU_CLOCK' 'args' 'cancelled_write_bytes'
 'delayacct_blkio_time' 'duration' 'end' 'exclusive_cpu_time' 'exename'
 'exitcode' 'gen' 'group' 'guest_time' 'host' 'id' 'inblock'
 'inclusive_cpu_time' 'invol_ctxsw' 'job' 'majflt' 'minflt' 'numtids'
 'outblock' 'parent' 'path' 'pgid' 'pid' 'ppid' 'processor' 'rchar'
 'rdtsc_duration' 'read_bytes' 'rssmax' 'sid' 'start' 'syscr' 'syscw'
 'systemtime' 'tags' 'time_oncpu' 'time_waiting' 'timeslices' 'updated_at'
 'user' 'user+system' 'usertime' 'vol_ctxsw' 'wchar' 'write_bytes']
35483
duration [    0     3    11 ..., 35391 35396 35465]
exclusive_cpu_time [    0     3    11 ..., 35396 35465 35467]


Unnamed: 0,id,exename,tags,duration,exclusive_cpu_time
35463,35464,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",148.0,999.0
35464,35465,getopt,"{'op': 'cp', 'op_instance': '6', 'op_sequence'...",74.0,5998.0
35465,35466,globus-url-copy,"{'op': 'hsmget', 'op_instance': '1', 'op_seque...",2219768.0,2017692.0
35466,35467,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",157.0,1998.0
35467,35468,globus-url-copy,"{'op': 'cp', 'op_instance': '6', 'op_sequence'...",167603.0,105983.0
35468,35469,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",156.0,1999.0
35469,35470,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",151.0,999.0
35470,35471,cat,"{'op': 'timavg', 'op_instance': '11', 'op_sequ...",111.0,4998.0
35471,35472,mv,"{'op': 'ncks', 'op_instance': '11', 'op_sequen...",134.0,8998.0
35472,35473,which,"{'op': 'mv', 'op_instance': '9', 'op_sequence'...",172.0,5998.0


Unnamed: 0,id,exename,tags,duration,exclusive_cpu_time
35463,35464,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",,
35464,35465,getopt,"{'op': 'cp', 'op_instance': '6', 'op_sequence'...",,
35465,35466,globus-url-copy,"{'op': 'hsmget', 'op_instance': '1', 'op_seque...",True,True
35466,35467,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",,
35467,35468,globus-url-copy,"{'op': 'cp', 'op_instance': '6', 'op_sequence'...",,True
35468,35469,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",,
35469,35470,tcsh,"{'op': 'hsmget', 'op_instance': '7', 'op_seque...",,
35470,35471,cat,"{'op': 'timavg', 'op_instance': '11', 'op_sequ...",,
35471,35472,mv,"{'op': 'ncks', 'op_instance': '11', 'op_sequen...",,
35472,35473,which,"{'op': 'mv', 'op_instance': '9', 'op_sequence'...",,
