## Dependencies

In [18]:
# System dependencies
from os import listdir
import time as clock
from datetime import timedelta
from IPython.display import clear_output
import pickle

In [2]:
# Data manipulation dependencies
import pandas as pd
import numpy as np
import datetime as dt

In [3]:
# Custom data handling methods
import prep_IO

In [16]:
# Directory of pre-determined lustre failed jobs
src = './lustre_fail_set.pkl'

## Data

- first level keys: search labels
- second level keys: ['Acct Info', 'Host Info', 'Source Files']
- 'Host Info' keys: ['Timely Data', 'Schemas', 'Specs']


In [22]:
# read in search_out from saved dict in src_file
search_out = pickle.load( open( src, "rb" ) )
keys = list(search_out.keys())
len(keys)

53

In [24]:
search_out[ keys[0] ]['Host Info']['Timely Data']

[('block', 'io_ticks(U=ms)', 'md0', 0, '2017-04-04T16:37:31'),
 ('block', 'io_ticks(U=ms)', 'md1', 0, '2017-04-04T16:37:31'),
 ('block', 'io_ticks(U=ms)', 'sda', 47379147, '2017-04-04T16:37:31'),
 ('block', 'io_ticks(U=ms)', 'sdb', 47371705, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '22', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '20', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '21', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '11', 3, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '10', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '13', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '12', 24, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '15', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '14', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '17', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '16', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '19', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs)', '18', 0, '2017-04-04T16:37:31'),
 ('cpu', 'irq(U=cs

## Temp A

In [63]:
def get_stats( data ):         
    stats = {
        'Max' : max(data),
        'Min' : min(data),
        'Mean' : np.mean(data),
        'Std. Dev' : np.std(data),
        'Count' : len(data)
    }
    return stats

#def cpicore ( timely_data ):
#    data = [ item for item in timely_data if item[2] == 'CLOCKS_UNHALTED_CORE' or item[2] == 'INSTRUCTIONS_RETIRED']
    #times = job_df.columns.tolist()
    #cpicore_list = []
    #
    #for i in range(1, len(times)):
    #    chunk = data[times[:i+1]]
    #    devices = { row : np.mean(col.values) for row,col in chunk.iterrows() }
    #    avg_c = { key[0]:0 for key,val in devices.items() }
    #    sum_avgs = 0
    #    
    #    for key,val in avg_c.items():
    #        avg_c[ key ] = devices[ (key, 'CLOCKS_UNHALTED_CORE') ] / devices[ (key, 'INSTRUCTIONS_RETIRED') ]
    #
    #    for key,val in avg_c.items():
    #        sum_avgs += val
    #        
    #    cpicore_list.append(sum_avgs/24)
    #
    #if monitor:
    #    return cpicore_list
    #
    #return data#sum_avgs/24

## Setup Structures for Analysis

In [76]:
# All valid 'Timely Data' occurences in dataset
list_of_timelys = []
dropped = []

for i in range(len(keys)):
    try:
        key = keys[i]
        list_of_timelys.append( search_out[key]['Host Info']['Timely Data'] )
    except:
        host_file = search_out[key]['Host Info']
        dropped.append( ( key, host_file ) )
    
len(list_of_timelys)

50

In [77]:
# data points organized by label reported
data_by_label = {}

for i in range(len( list_of_timelys )):
    timely_i = list_of_timelys[i]
    
    for j in range(len( timely_i )):
        tup_j = timely_i[j]
        label = ( tup_j[:3] )
        
        if label in data_by_label:
            data_by_label[label].append( tup_j[3] )
        else:
            data_by_label[label] = [ tup_j[3] ]

In [78]:
# labels of all reported metrics
data_keys = list(data_by_label.keys())
data_keys

[('block', 'io_ticks(U=ms)', 'md0'),
 ('block', 'io_ticks(U=ms)', 'md1'),
 ('block', 'io_ticks(U=ms)', 'sda'),
 ('block', 'io_ticks(U=ms)', 'sdb'),
 ('cpu', 'irq(U=cs)', '22'),
 ('cpu', 'irq(U=cs)', '20'),
 ('cpu', 'irq(U=cs)', '21'),
 ('cpu', 'irq(U=cs)', '11'),
 ('cpu', 'irq(U=cs)', '10'),
 ('cpu', 'irq(U=cs)', '13'),
 ('cpu', 'irq(U=cs)', '12'),
 ('cpu', 'irq(U=cs)', '15'),
 ('cpu', 'irq(U=cs)', '14'),
 ('cpu', 'irq(U=cs)', '17'),
 ('cpu', 'irq(U=cs)', '16'),
 ('cpu', 'irq(U=cs)', '19'),
 ('cpu', 'irq(U=cs)', '18'),
 ('cpu', 'irq(U=cs)', '23'),
 ('cpu', 'irq(U=cs)', '1'),
 ('cpu', 'irq(U=cs)', '0'),
 ('cpu', 'irq(U=cs)', '3'),
 ('cpu', 'irq(U=cs)', '2'),
 ('cpu', 'irq(U=cs)', '5'),
 ('cpu', 'irq(U=cs)', '4'),
 ('cpu', 'irq(U=cs)', '7'),
 ('cpu', 'irq(U=cs)', '6'),
 ('cpu', 'irq(U=cs)', '9'),
 ('cpu', 'irq(U=cs)', '8'),
 ('ib', 'symbol_error(W=32)', 'mlx4_0.1'),
 ('ib_ext', 'port_multicast_xmit_pkts', 'mlx4_0/1'),
 ('ib_sw', 'tx_bytes(U=4B)', 'mlx4_0/1'),
 ('intel_hsw', 'FIXED_CTR1(W

## Descriptive Statistics

In [79]:
# generating general stats for each list in data_by_label
descriptives = { }
static = []

for i in range(len( data_keys )):
    key = data_keys[i]
    data_list = data_by_label[ key ]
    
    if sum(data_list) == 0:
        static.append( key )
    else:
        stats = get_stats( data_list )
        descriptives[ key ] = stats

descriptives

{('block', 'io_ticks(U=ms)', 'sda'): {'Max': 171816728,
  'Min': 17432,
  'Mean': 22129970.74528675,
  'Std. Dev': 35249062.212975614,
  'Count': 7585},
 ('block', 'io_ticks(U=ms)', 'sdb'): {'Max': 172456547,
  'Min': 22209,
  'Mean': 22237056.575477917,
  'Std. Dev': 35283712.03987915,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '11'): {'Max': 55,
  'Min': 0,
  'Mean': 3.33843111404087,
  'Std. Dev': 9.36820417600003,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '10'): {'Max': 65,
  'Min': 0,
  'Mean': 4.168490441661174,
  'Std. Dev': 12.647185763679186,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '12'): {'Max': 26,
  'Min': 0,
  'Mean': 4.778510217534608,
  'Std. Dev': 6.237339421108527,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '1'): {'Max': 359,
  'Min': 0,
  'Mean': 20.29492419248517,
  'Std. Dev': 60.93858634569444,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '0'): {'Max': 55118,
  'Min': 197,
  'Mean': 14531.799208965063,
  'Std. Dev': 12734.567617187862,
  'Count': 7585},
 ('cpu', 'irq(U=cs)', '3

#### Standard Deviation for each Label

In [80]:
all_devs = []

for label,stat_dict in descriptives.items():
    all_devs.append( (label,stat_dict['Std. Dev']) )

all_devs

[(('block', 'io_ticks(U=ms)', 'sda'), 35249062.212975614),
 (('block', 'io_ticks(U=ms)', 'sdb'), 35283712.03987915),
 (('cpu', 'irq(U=cs)', '11'), 9.36820417600003),
 (('cpu', 'irq(U=cs)', '10'), 12.647185763679186),
 (('cpu', 'irq(U=cs)', '12'), 6.237339421108527),
 (('cpu', 'irq(U=cs)', '1'), 60.93858634569444),
 (('cpu', 'irq(U=cs)', '0'), 12734.567617187862),
 (('cpu', 'irq(U=cs)', '3'), 56.4600362046807),
 (('cpu', 'irq(U=cs)', '2'), 21.371751334593075),
 (('cpu', 'irq(U=cs)', '5'), 11.5631483806886),
 (('cpu', 'irq(U=cs)', '4'), 13.7010546204813),
 (('cpu', 'irq(U=cs)', '7'), 7.089671018065),
 (('cpu', 'irq(U=cs)', '6'), 14.409687680912418),
 (('cpu', 'irq(U=cs)', '9'), 6.194154306987354),
 (('cpu', 'irq(U=cs)', '8'), 7.988968304747921),
 (('ib_ext', 'port_multicast_xmit_pkts', 'mlx4_0/1'), 16565.003807965797),
 (('ib_sw', 'tx_bytes(U=4B)', 'mlx4_0/1'), 667083981744596.0),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '22'), 82581442620525.69),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '20'), 78

In [None]:
test_sort = all_devs.sort(key=lambda tup: tup[1], reverse=True)

#### Occurences for each Label

In [81]:
all_counts = []

for label,stat_dict in descriptives.items():
    all_counts.append( (label,stat_dict['Count']) )

all_counts

[(('block', 'io_ticks(U=ms)', 'sda'), 7585),
 (('block', 'io_ticks(U=ms)', 'sdb'), 7585),
 (('cpu', 'irq(U=cs)', '11'), 7585),
 (('cpu', 'irq(U=cs)', '10'), 7585),
 (('cpu', 'irq(U=cs)', '12'), 7585),
 (('cpu', 'irq(U=cs)', '1'), 7585),
 (('cpu', 'irq(U=cs)', '0'), 7585),
 (('cpu', 'irq(U=cs)', '3'), 7585),
 (('cpu', 'irq(U=cs)', '2'), 7585),
 (('cpu', 'irq(U=cs)', '5'), 7585),
 (('cpu', 'irq(U=cs)', '4'), 7585),
 (('cpu', 'irq(U=cs)', '7'), 7585),
 (('cpu', 'irq(U=cs)', '6'), 7585),
 (('cpu', 'irq(U=cs)', '9'), 7585),
 (('cpu', 'irq(U=cs)', '8'), 7585),
 (('ib_ext', 'port_multicast_xmit_pkts', 'mlx4_0/1'), 7585),
 (('ib_sw', 'tx_bytes(U=4B)', 'mlx4_0/1'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '22'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '20'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '21'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '11'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '10'), 7585),
 (('intel_hsw', 'FIXED_CTR1(W=48)', '13'), 7585),
 (('intel_hsw', 'FIXED_CTR1(

#### Cpicore Check Across All Labels

In [90]:
dict_of_timelys = {}
count = 0

for i in range(len(keys)):
    try:
        key = keys[i]
        dict_of_timelys[ count ] = search_out[key]['Host Info']['Timely Data']
        count += 1
    except:
        next

In [91]:
# search for metrics used in previously tested cpicore() measure
for i in range(len( dict_of_timelys )):
    test_list = dict_of_timelys[i]
    
    for item in test_list:
        for val in item:
            try:
                if 'CLOCKS_UNHALTED_CORE' in val or 'INSTRUCTIONS_RETIRED' in val:
                    print(item)
            except:
                next

## What is the typical behavior of ___label___?
#### Can cpicore be accurately predicted at t_1?

In [None]:
def quartile_dict( data_list ):

    first_q = np.round( np.quantile(all_cpi, 0.25), 4 )
    second_q = np.round( np.quantile(all_cpi, 0.5), 4 )
    third_q = np.round( np.quantile(all_cpi, 0.75), 4 )
    fourth_q = np.round( np.quantile(all_cpi, 0.99), 4 )
    
    

In [14]:
test_quart_dict = quartile_dict(  )

print "----------------------"
print "\tQuartiles"
print "----------------------" 
print "First:\t\t", 
print "Second:\t\t", 
print "Third:\t\t", 
print "Fourth:\t\t", 

----------------------
	Quartiles
----------------------
First:		1.7146
Second:		1.9624
Third:		2.4735
Fourth:		5.0981


In [19]:
print "High CPI:\t\t", len(high_cpi)
print "Med. CPI:\t\t", len(med_cpi)
print "Low CPI:\t\t", len(low_cpi)
print "=============================="
print "Total Jobs:\t\t", total
print "% High Performing:\t{:.1%}".format((len(high_cpi))/total)

High CPI:		6
Med. CPI:		247
Low CPI:		2044
Total Jobs:		2297
% High Performing:	0.0%


In [20]:
high_cpi

[(('4711272', 'comet-07-53'), 13.400145829553407),
 (('4718522', 'comet-21-70'), 30.57388955872469),
 (('4702365', 'comet-07-22'), 25030.569254107602),
 (('4717632', 'comet-10-69'), 978.174678773496),
 (('4720026', 'comet-07-22'), 13.236789002823942),
 (('4714085', 'comet-26-21'), 21.716839497218484)]

In [28]:
med_cpi.sort(key=lambda tup: tup[1])
med_cpi[-14:]

[(('4684830', 'comet-03-50'), 5.262090885629687),
 (('4721144', 'comet-12-35'), 5.287045936974469),
 (('4724829', 'comet-07-20'), 5.4479779176300385),
 (('4731534', 'comet-12-02'), 5.474687887725914),
 (('4729920', 'comet-06-05'), 5.664439570633658),
 (('4721129', 'comet-12-35'), 5.750437729751802),
 (('4702820', 'comet-06-20'), 5.960345555952602),
 (('4709032', 'comet-10-69'), 5.987278048286104),
 (('4709383', 'comet-10-56'), 6.021832300460242),
 (('4721218', 'comet-10-69'), 6.481195792558062),
 (('4700402', 'comet-04-15'), 6.591559441529338),
 (('4721087', 'comet-10-69'), 7.180496023899781),
 (('4719398', 'comet-04-08'), 7.991633346415882),
 (('4712381', 'comet-06-05'), 9.173618165850725)]

In [26]:
low_cpi.sort(key=lambda tup: tup[1])
low_cpi[:10]

[(('4720087', 'comet-07-22'), 0.6212727016596244),
 (('4720114', 'comet-06-54'), 0.6363775359515448),
 (('4731646', 'comet-04-15'), 0.6657674632391879),
 (('4720115', 'comet-26-29'), 0.6706806833123542),
 (('4719865', 'comet-29-02'), 0.7907878122867514),
 (('4715269', 'comet-10-46'), 0.9191008109183371),
 (('4682407', 'comet-31-09'), 0.9902301792733068),
 (('4716428', 'comet-10-56'), 0.9965364710785738),
 (('4628539', 'comet-13-48'), 0.9989864050144558),
 (('4716475', 'comet-10-63'), 0.9995107282508456)]

## 