# Preparing

## Import

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpmax, fpgrowth
from joblib import Parallel, delayed
from utils import *
import pdb
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_columns', 100)
matplotlib.rcParams.update({"font.size": 16,'lines.linewidth': 2.5})
# matplotlib.rcdefaults()

figures saved to /tmp/figures


In [2]:
DATA_DIR = '../trace-data/'
df = pd.read_csv(DATA_DIR + 'preprocessed.csv') # dataframe of tasks

In [3]:
df

Unnamed: 0.1,Unnamed: 0,vc,jobid,num_attemps,user,status,num_gpus,runtime,queue_time,gpu_util_mean,gpu_util_min,gpu_util_max,cpu_util,mem_util,machine_gpu_mem,machine_gpu_num
0,0,11cb48,application_1506638472019_17235,1,066e99,Pass,1,1.100000,0.166667,25.591667,0.0,51.183333,2.108333,95.686876,24GB,8
1,1,6214e9,application_1506638472019_17145,1,2c46d5,Pass,1,49.183333,0.833333,0.000000,0.0,0.000000,48.659673,99.553043,12GB,2
2,2,6c71a0,application_1506638472019_0262,1,a04bf9,Pass,1,14344.366667,0.133333,90.033048,0.0,96.000000,6.554313,98.141959,24GB,8
3,3,6214e9,application_1506638472019_17148,1,2c46d5,Pass,1,45.983333,3.250000,0.000000,0.0,0.000000,18.046454,79.824283,12GB,2
4,4,6214e9,application_1506638472019_17159,1,2c46d5,Pass,1,46.100000,0.550000,0.000000,0.0,0.000000,21.289007,85.083449,12GB,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111606,111606,11cb48,application_1508364558011_30316,1,066e99,Pass,1,0.950000,2.333333,0.000000,0.0,0.000000,0.562500,23.500736,24GB,8
111607,111607,ee9e8c,application_1508364558011_30284,5,686d6e,Failed,8,67.616667,0.266667,0.000000,0.0,0.000000,0.694722,61.297971,12GB,2
111608,111608,6c71a0,application_1508364558011_30313,1,450add,Killed,1,2.750000,1.133333,0.000000,0.0,0.000000,0.572500,22.961763,24GB,8
111609,111609,6c71a0,application_1508364558011_30301,1,450add,Pass,1,31.600000,2.250000,46.726768,0.0,99.316667,7.037121,92.978473,24GB,8


In [4]:
df['machine_gpu_num'].value_counts()

2    73455
8    38156
Name: machine_gpu_num, dtype: int64

In [5]:
np.percentile(df['mem_util'][~np.isnan(df['mem_util'])], 15)

67.10107492859574

In [6]:
top_vc = '6214e9'
multi_attempt = 2
user_counts = df['user'].value_counts()
freq_usr_thr = np.percentile(user_counts.to_list(), 85)
new_usr_thr = np.percentile(user_counts.to_list(), 15)
# top_user = user_counts.index[0]
freq_user = user_counts[user_counts >= freq_usr_thr].index.to_list() 
new_user = user_counts[user_counts < freq_usr_thr].index.to_list()
# new_user = user_counts[user_counts <= new_usr_thr].index.to_list()
non_pass = ['Failed', 'Killed']
multigpu = 2
runtime_high = np.percentile(df['runtime'], 85)
runtime_low = np.percentile(df['runtime'], 15)
queue_high = np.percentile(df['queue_time'], 85)
queue_low = 1

In [7]:
mean_util_high = np.percentile(df['gpu_util_mean'][~np.isnan(df['gpu_util_mean'])], 85)
mean_util_low = np.percentile(df['gpu_util_mean'][~np.isnan(df['gpu_util_mean'])], 15)
low_min = 0
high_max = np.percentile(df['gpu_util_max'][~np.isnan(df['gpu_util_max'])], 85)
cpu_util_high = np.percentile(df['cpu_util'][~np.isnan(df['cpu_util'])], 85)
cpu_util_low = np.percentile(df['cpu_util'][~np.isnan(df['cpu_util'])], 15)
mem_util_high = np.percentile(df['mem_util'][~np.isnan(df['mem_util'])], 85)
mem_util_low = np.percentile(df['mem_util'][~np.isnan(df['mem_util'])], 15)
gpu_24gb = df['machine_gpu_mem'].value_counts().index[1]

In [8]:
def inner_loop(df):
    results = []
    for df_iter in df.iterrows():
        trans = [] # transaction list
        df_row = df_iter[1]
        if df_row['vc'] == top_vc:
            trans.append('top_vc')
        if df_row['num_attemps'] >= multi_attempt:
            trans.append('multi-attempts')
        if df_row['user'] in new_user:
            trans.append('new_user')
        elif df_row['user'] in freq_user:
            trans.append('freq_user')
        if df_row['status'] in non_pass:            
            trans.append(df_row['status'])
#             trans.append('Unsuccessful')
        if df_row['num_gpus'] >= multigpu:
            trans.append('multigpu')
        if df_row['runtime'] >= runtime_high:
            trans.append('long_runtime')
        elif df_row['runtime'] <= runtime_low:
            trans.append('short_runtime')
        if df_row['queue_time'] >= queue_high:
            trans.append('long_queue')
        elif df_row['queue_time'] <= queue_low:
            trans.append('short_queue')
        mean_gpu_util = df_row['gpu_util_mean']
        if not np.isnan(mean_gpu_util):
            if mean_gpu_util >= mean_util_high:
                trans.append('high_gpu_util')
            elif mean_gpu_util <= mean_util_low:
                trans.append('low_gpu_util')
        min_gpu_util = df_row['gpu_util_min']
        if not np.isnan(min_gpu_util):
            if min_gpu_util <= low_min:
                trans.append('low_min_gpu_util')
        max_gpu_util = df_row['gpu_util_max']
        if not np.isnan(max_gpu_util):
            if max_gpu_util >= high_max:
                trans.append('high_max_gpu_util')
        cpu_util = df_row['cpu_util']
        if not np.isnan(cpu_util):
            if cpu_util >= cpu_util_high:
                trans.append('high_cpu_util')
            elif cpu_util <= cpu_util_low:
                trans.append('low_cpu_util')
        mem_util = df_row['mem_util']
        if not np.isnan(mem_util):
            if mem_util >= mem_util_high:
                trans.append('high_mem_util')
            elif mem_util <= mem_util_low:
                trans.append('low_mem_util')
        if df_row['machine_gpu_mem'] == gpu_24gb:
            trans.append('24GB_GPU')
        results.append(trans)
    return results

In [9]:
df_chunk = pd.read_csv(DATA_DIR + 'preprocessed.csv', chunksize=5000) # tasks with sensor data but sum

In [10]:
usable_cores = os.sched_getaffinity(0)
dataset = Parallel(n_jobs=len(usable_cores))(delayed(inner_loop)(df) for df in df_chunk)

In [11]:
transactions = []
for data in dataset:
    transactions += data

In [12]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)


In [23]:
len(df[df['Failed'] == True]) / len(df)

0.1558627733825519

In [14]:
frequent_itemsets = fpgrowth(df, min_support=0.07, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.869135,(freq_user)
1,0.635260,(low_min_gpu_util)
2,0.417772,(short_queue)
3,0.341866,(24GB_GPU)
4,0.152610,(short_runtime)
...,...,...
94,0.110607,"(long_queue, low_min_gpu_util)"
95,0.088692,"(long_queue, top_vc)"
96,0.103225,"(long_queue, low_min_gpu_util, freq_user)"
97,0.085906,"(long_queue, top_vc, freq_user)"


In [18]:
from mlxtend.frequent_patterns import association_rules

res = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.8)

In [19]:
res

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(low_min_gpu_util),(freq_user),0.63526,0.869135,0.550985,0.867338,0.997933,-0.001141,0.986456
1,(short_queue),(freq_user),0.417772,0.869135,0.35174,0.84194,0.968711,-0.011361,0.827949
2,"(short_queue, low_min_gpu_util)",(freq_user),0.24528,0.869135,0.203618,0.830143,0.955137,-0.009564,0.770444
3,"(short_queue, top_vc)",(freq_user),0.155827,0.869135,0.144152,0.92508,1.064369,0.008718,1.746745
4,"(short_queue, low_min_gpu_util, top_vc)",(freq_user),0.091425,0.869135,0.084571,0.925029,1.064311,0.00511,1.745554
5,(24GB_GPU),(freq_user),0.341866,0.869135,0.297157,0.869221,1.000099,3e-05,1.00066
6,"(low_min_gpu_util, 24GB_GPU)",(freq_user),0.19912,0.869135,0.173486,0.871265,1.002451,0.000424,1.01655
7,"(short_queue, 24GB_GPU)",(freq_user),0.151141,0.869135,0.129405,0.856186,0.985101,-0.001957,0.909961
8,"(24GB_GPU, top_vc)",(freq_user),0.149869,0.869135,0.137961,0.920548,1.059154,0.007705,1.64709
9,"(low_min_gpu_util, 24GB_GPU, top_vc)",(freq_user),0.098019,0.869135,0.091111,0.929525,1.069483,0.005919,1.856895


In [20]:
res.to_csv('mined_rules.csv')