In [1]:
# File dependencies
from os import listdir

In [2]:
import pickle

In [3]:
# Data manipulation dependencies
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from collections import OrderedDict 

In [5]:
# Directory of all pickled jobs via comet
source_dir = '../data/raw/'
save_dir_jpg = '../data/cpicore/images'

# List of date directories in source_dir
slice_files = listdir( source_dir )

In [6]:
cpiset_out = '../data/summary_stats/cpicore/cpicore_set.pkl'

In [7]:
len(slice_files)

5398

# Setup

In [8]:
def check_len ( df, num ):
    return len(df.columns.values.tolist()) > num

def purge_str ( df ):
    for row,col in df.iterrows():
        for i in range(len(col.values)):
            val = col.values[i]
            time = col.index[i]
            
            # certain numeric responses are recorded as str
            if type(val) is str:
                try:
                    df.at[row,time] = np.float64( val )
                except:
                    df.at[row,time] = np.float64(0)
                else:
                    df.at[row,time] = np.float64(0)
                    
    return df
    
def get_host_id ( file_name ):
    host,jobid = file_name.split('_')
    return host[:11] , jobid[:7]

def get_dfs ( file_list, min_jobs, min_cycles=0 ):
    job_dfs = {}
    count = 0
    
    for i in range( len(file_list) ):
        
        if count < min_jobs:
            job_file = file_list[i]
            df = purge_str( pd.read_csv( source_dir+job_file, index_col=[0,1,2], low_memory=False ) )
            host,jobid = get_host_id( job_file )
        
            if check_len( df, min_cycles ):
                job_dfs[jobid] = {}
                job_dfs[jobid][host] = df
            else:
                next
            
            count += 1
            
    return job_dfs
        
def sort_hosts ( file_list ):
    hosts = {}
    
    for job_file in file_list:
        host,jobid = job_file.split('_')
        
        if host in hosts:
            hosts[host].append( jobid[:7] )
        else:
            hosts[host] = [ jobid[:7] ]
            
    return hosts
            
def sort_jobs ( file_list, job_dfs ):
    jobs = {}
    
    for job_file in file_list:
        host,jobid = get_id( job_file )
        
        if jobid in job_dfs.keys():
            if jobid in jobs:
                jobs[jobid].append( host )
            else:
                jobs[jobid] = [ host ]
            
    return jobs, multiple_hosts(jobs)

def multiple_hosts ( jobs_dict ):
    return any( len(host) > 1 for job,host in jobs_dict.items() )

In [9]:
def cpicore_simple ( job_df, monitor=False ):
    data = job_df.loc['intel_hsw']
    times = job_df.columns.tolist()
    cpicore_list = []
    
    for i in range(1, len(times)):
        chunk = data[times[:i+1]]
        devices = { row : np.mean(col.values) for row,col in chunk.iterrows() }
        avg_c = { key[0]:0 for key,val in devices.items() }
        sum_avgs = 0
        
        for key,val in avg_c.items():
            avg_c[ key ] = devices[ (key, 'CLOCKS_UNHALTED_CORE') ] / devices[ (key, 'INSTRUCTIONS_RETIRED') ]
    
        for key,val in avg_c.items():
            sum_avgs += val
            
        cpicore_list.append(sum_avgs/24)
    
    if monitor:
        return cpicore_list
    
    return sum_avgs/24

def cpicore ( job_df, monitor=False ):
    data = job_df.loc['intel_hsw']
    times = job_df.columns.tolist()
    cpicore_dict = OrderedDict( )
    
    for t in times:
        cpicore_dict[t] = 0
    
    for i in range(1, len(times)):
        chunk = data[times[:i+1]]
        devices = { row : np.mean(col.values) for row,col in chunk.iterrows() }
        avg_c = { key[0]:0 for key,val in devices.items() }
        sum_avgs = 0
        current = times[i]
        
        for key,val in avg_c.items():
            avg_c[ key ] = devices[ (key, 'CLOCKS_UNHALTED_CORE') ] / devices[ (key, 'INSTRUCTIONS_RETIRED') ]
    
        for key,val in avg_c.items():
            sum_avgs += val
            
        cpicore_dict[current] = sum_avgs/24
    
    if monitor:
        return cpicore_dict
    
    return sum_avgs/24

def cpiref ( devices_dict ):
    avg_d = { key[0]:0 for key,val in devices_dict.items() }
    sum_avgs = 0
    
    for key,val in avg_d.items():
        avg_d[ key ] = devices_dict[ (key, 'CLOCKS_UNHALTED_REF') ] / devices_dict[ (key, 'INSTRUCTIONS_RETIRED') ]
        
    for key,val in avg_d.items():
        sum_avgs += val
        
    return sum_avgs/24

In [10]:
#def find_notable( cpi_set ):
#    notable = []
#    
#    for jobid,data_dict in cpi_set.items():
#        vals = [ val for val in data_dict.values()[1:] ]
#        
#        if not all():
#            notable.append( jobid )

def get_stats( cpi_set, outliers=False ):
    data = []
    outliers = []
    
    for jobid,cpi_dict in cpicore_set.items():
        for val in cpi_dict.values():
            if (val > 0):
                data.append(np.float64(val))
            else:
                outliers.append(val)   
                
    stats = {
        'Max' : max(data),
        'Min' : min(data),
        'Mean' : np.mean(data),
        'Std. Dev' : np.std(data),
        'Count' : len(data),
        'Excluded' : len(outliers)
    }
    
    if outliers:
        return stats,outliers
    
    return stats

# Pull Data

In [11]:
job_dfs = get_dfs( slice_files, len(slice_files), 6 )

In [12]:
jobids = job_dfs.keys()
len(jobids)

607

# Compiling CPICORE

In [13]:
cpicore_set = {}

for jobid in job_dfs.keys():
    for host in job_dfs[jobid].keys():
        cpicore_set[jobid] = {}
        cpicore_set[jobid][host] = cpicore_simple( job_dfs[jobid][host], monitor=True )

In [14]:
out_file = open(cpiset_out, 'wb')
pickle.dump(cpicore_set, out_file)
check_cpicore_set = pickle.load(open(cpiset_out, 'rb'))

check = check_cpicore_set.keys()
orig = cpicore_set.keys()
for key in check:
    if key not in orig:
        print key

EOFError: 

In [None]:
multiples = []

for job_id in cpicore_set.items():
    if len(job_id[1].keys()) > 1:
        multiples.append()
        print(job_id[0])
        print(job_id[1].keys())
    else:
        next

## Can cpicore be accurately predicted at t_1?

In [37]:
overview = {}
total = 0
count = 0

for job_tup in cpicore_set.items():
    for host_name,cpi_list in job_tup[1].items():
        final = cpi_list[-1]
        t1 = cpi_list[0]                                          # Note: t1 is intentionally placed at i=0 here
        label = (job_tup[0], host_name)
        std_dev = np.std(cpi_list)
        error = ( final / t1 )
        total += 1
        
        if error > 0.99:
            count += 1
        
        overview[label] = {
            "Standard Deviation" : std_dev,
            "t1 Accuracy" : error,
            "Cycles" : len(cpi_list),
            "Final" : final,
            "Data" : cpi_list
        }

        print "---------------------------------------------------------------------------------------------------------------------"
        print "Job:\t\t", label
        print "---------------------------------------------------------------------------------------------------------------------"
        print "Final cpicore:\t\t{:.3}".format(final)
        print "Length of Job:\t\t", len(cpi_list)
        print "Standard Deviation:\t{:.3}".format(std_dev)
        print "t1 Accuracy:\t\t{:.1%}".format(error)
        print "Data:"
        print
        print cpi_list
        print

---------------------------------------------------------------------------------------------------------------------
Job:		('4715660', 'comet-02-24')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.89
Length of Job:		2
Standard Deviation:	0.00496
t1 Accuracy:		99.5%
Data:

[1.9011233513040293, 1.8912020352105332]

---------------------------------------------------------------------------------------------------------------------
Job:		('4704908', 'comet-05-36')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.43
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[2.4273341440582885]

---------------------------------------------------------------------------------------------------------------------
Job:		('4705003', 'comet-07-11')
--------------------------------------------------------------

Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.7597684424518196]

---------------------------------------------------------------------------------------------------------------------
Job:		('4730838', 'comet-12-02')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.78
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.7828591206598496]

---------------------------------------------------------------------------------------------------------------------
Job:		('4730839', 'comet-12-02')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.77
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.7689822853011474]

---------------------------------------------------------------------------------------------------------------------
Job:		('4706025', 'comet-0

Job:		('4718401', 'comet-07-47')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.46
Length of Job:		16
Standard Deviation:	0.00532
t1 Accuracy:		100.3%
Data:

[2.4551419272484276, 2.454656693785853, 2.4520850573638895, 2.4547064786498036, 2.4587509080742618, 2.46271819336272, 2.465223368555653, 2.467140474666377, 2.468461641975252, 2.4683705454560565, 2.467238767838672, 2.4663840282883345, 2.465545522532099, 2.4649807014027196, 2.4639936229667994, 2.4632941964785613]

---------------------------------------------------------------------------------------------------------------------
Job:		('4723680', 'comet-03-43')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.75
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[2.752450297044529]

----------------------------------------------------------

---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.54
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.5433724937692848]

---------------------------------------------------------------------------------------------------------------------
Job:		('4708237', 'comet-05-68')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		3.26
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[3.2589544970912683]

---------------------------------------------------------------------------------------------------------------------
Job:		('4719408', 'comet-06-37')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.7
Length of Job:		5
Standard Deviation:	0.0226
t1 Accuracy:		97.6%
Data:

[2.769493745948481, 2.7408

---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.59
Length of Job:		2
Standard Deviation:	0.000342
t1 Accuracy:		100.0%
Data:

[1.5914097252011556, 1.5907266397314161]

---------------------------------------------------------------------------------------------------------------------
Job:		('4712270', 'comet-05-01')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.44
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.4384796198103114]

---------------------------------------------------------------------------------------------------------------------
Job:		('4707740', 'comet-03-20')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.8
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.

Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.8744471819290247]

---------------------------------------------------------------------------------------------------------------------
Job:		('4723125', 'comet-03-02')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.28
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[2.2793513660451405]

---------------------------------------------------------------------------------------------------------------------
Job:		('4710005', 'comet-04-23')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.22
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.2230826517219333]

---------------------------------------------------------------------------------------------------------------------
Job:		('4709865', 'comet-0

Job:		('4695388', 'comet-04-11')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.01
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[2.0136204642717197]

---------------------------------------------------------------------------------------------------------------------
Job:		('4718564', 'comet-27-06')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		3.49
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[3.493670396519759]

---------------------------------------------------------------------------------------------------------------------
Job:		('4722652', 'comet-10-10')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.89
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Da

Data:

[2.882364260169504, 2.8356915506699973, 2.7957433268222833, 2.761984019228738, 2.7329242855343145, 2.7078761497209904, 2.6858836497770358, 2.6613496261161784]

---------------------------------------------------------------------------------------------------------------------
Job:		('4710311', 'comet-04-11')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		3.06
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[3.0552513397254324]

---------------------------------------------------------------------------------------------------------------------
Job:		('4704148', 'comet-05-68')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.74
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.7430824093488173]

---------------------------------------------------------------------

[1.8223237342004506]

---------------------------------------------------------------------------------------------------------------------
Job:		('4719331', 'comet-04-15')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		2.19
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[2.1855534534542866]

---------------------------------------------------------------------------------------------------------------------
Job:		('4710944', 'comet-05-68')
---------------------------------------------------------------------------------------------------------------------
Final cpicore:		1.81
Length of Job:		1
Standard Deviation:	0.0
t1 Accuracy:		100.0%
Data:

[1.8080893605505208]

---------------------------------------------------------------------------------------------------------------------
Job:		('4709690', 'comet-07-24')
---------------------------------------------------------------

In [38]:
print "Total Jobs:\t\t", total
print "Outliers:\t\t", total - count
print "=============================="
print "Accurate t1 (%):\t{:.1%}".format(count/total)

Total Jobs:		2297
Outliers:		297
Accurate t1 (%):	0.0%


## What is the typical behavior of cpicore?

In [39]:
all_cpi = []
all_cpi_data = []
high_cpi = []
med_cpi = []
low_cpi = []

for job_tup in overview.items():
    label = job_tup[0]
    info = job_tup[1]
    
    all_cpi.append( info['Final'] )
    all_cpi_data.append( (label,info['Final']) )
    
    if info['Final'] < 3:
        low_cpi.append( (label,info['Final']) )
    elif info['Final'] < 10:
        med_cpi.append( (label,info['Final']) )
    else:
        high_cpi.append( (label,info['Final']) )

In [40]:
first_q = np.quantile(all_cpi, 0.25)
second_q = np.quantile(all_cpi, 0.5)
third_q = np.quantile(all_cpi, 0.75)
fourth_q = np.quantile(all_cpi, 0.99)


print "----------------------"
print "\tQuartiles"
print "----------------------" 
print "First:\t\t", np.round(first_q, 4)
print "Second:\t\t", np.round(second_q, 4)
print "Third:\t\t", np.round(third_q, 4)
print "Fourth:\t\t", np.round(fourth_q, 4)

----------------------
	Quartiles
----------------------
First:		1.7146
Second:		1.9624
Third:		2.4735
Fourth:		5.0981


In [41]:
print "Total Jobs:\t\t", total
print "High CPI:\t\t", len(high_cpi)
print "Med. CPI:\t\t", len(med_cpi)
print "Low CPI:\t\t", len(low_cpi)
print "=============================="
print "Unusual Jobs (%):\t{:.1%}".format((len(high_cpi))/total)

Total Jobs:		2297
High CPI:		6
Med. CPI:		247
Low CPI:		2044
Unusual Jobs (%):	0.0%


In [42]:
high_cpi

[(('4711272', 'comet-07-53'), 13.400145829553407),
 (('4718522', 'comet-21-70'), 30.57388955872469),
 (('4702365', 'comet-07-22'), 25030.569254107602),
 (('4717632', 'comet-10-69'), 978.174678773496),
 (('4720026', 'comet-07-22'), 13.236789002823942),
 (('4714085', 'comet-26-21'), 21.716839497218484)]

In [43]:
med_cpi.sort(key=lambda tup: tup[1])

In [44]:
med_cpi[-2:]

[(('4719398', 'comet-04-08'), 7.991633346415882),
 (('4712381', 'comet-06-05'), 9.173618165850725)]

In [45]:
low_cpi.sort(key=lambda tup: tup[1])

In [46]:
low_cpi[:5]

[(('4720087', 'comet-07-22'), 0.6212727016596244),
 (('4720114', 'comet-06-54'), 0.6363775359515448),
 (('4731646', 'comet-04-15'), 0.6657674632391879),
 (('4720115', 'comet-26-29'), 0.6706806833123542),
 (('4719865', 'comet-29-02'), 0.7907878122867514)]

# Displaying Data

In [None]:
jobids = cpicore_set.keys()

In [88]:
#for jobid,data_dict in cpicore_set.items():
focus = jobids[0]
data_dict = cpicore_set[ focus ]
x = [ i for i in range( len(data_dict.values()[0]) ) ]
y = [ val for val in data_dict.values()[0] ]
plt.scatter( x, y )
#plt.plot(x,y)
#plt.title('CPI Core Over Runtime')
#plt.xlabel('Time Interval')
#plt.ylabel('CPI Core')
plt.savefig("./src/data/summary_stats/cpicore/images/{}.png".format(jobid))
#plt.clf()

AttributeError: 'module' object has no attribute 'to_rgba'