Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYSTEMML-1451][GSoC Phase 1] Single script to run perf tests
- Single entry point to run perf tests in any combination of algoriths, families, matrix shapes & densities - Reports time taken by a single perf test by parsing the output and grep-ing for the time - Detects tests that did not run and reports in the generated log - Robust error handling and reporting, informative help message Closes #537
- Loading branch information
1 parent
31952e4
commit e7cfcad
Showing
5 changed files
with
1,583 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
#!/usr/bin/env python3 | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
import itertools | ||
from os.path import join | ||
from utils import split_rowcol, config_writer | ||
|
||
# This file contains configuration settings for data generation | ||
DATA_FORMAT = 'csv' | ||
|
||
MATRIX_TYPE_DICT = {'dense': '0.9', | ||
'sparse': '0.01'} | ||
|
||
FAMILY_NO_MATRIX_TYPE = ['clustering', 'stats1', 'stats2'] | ||
|
||
|
||
def multinomial_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['multinomial', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
numSamples = row | ||
numFeatures = col | ||
sparsity = MATRIX_TYPE_DICT[matrix_type] | ||
num_categories = '150' | ||
intercept = '0' | ||
X = join(full_path, 'X.data') | ||
Y = join(full_path, 'Y.data') | ||
fmt = DATA_FORMAT | ||
|
||
config = [numSamples, numFeatures, sparsity, num_categories, intercept, | ||
X, Y, fmt, '1'] | ||
|
||
config_writer(full_path + '.json', config) | ||
|
||
return full_path | ||
|
||
|
||
def binomial_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['binomial', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
numSamples = row | ||
numFeatures = col | ||
maxFeatureValue = '5' | ||
maxWeight = '5' | ||
loc_weights = join(full_path, 'weight.data') | ||
loc_data = join(full_path, 'X.data') | ||
loc_labels = join(full_path, 'Y.data') | ||
noise = '1' | ||
intercept = '0' | ||
sparsity = MATRIX_TYPE_DICT[matrix_type] | ||
tranform_labels = '1' | ||
fmt = DATA_FORMAT | ||
|
||
config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, | ||
loc_labels, noise, intercept, sparsity, fmt, tranform_labels] | ||
config_writer(full_path + '.json', config) | ||
|
||
return full_path | ||
|
||
|
||
def regression1_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['regression1', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
numSamples = row | ||
numFeatures = col | ||
maxFeatureValue = '5' | ||
maxWeight = '5' | ||
loc_weights = join(full_path, 'weight.data') | ||
loc_data = join(full_path, 'X.data') | ||
loc_labels = join(full_path, 'Y.data') | ||
noise = '1' | ||
intercept = '0' | ||
sparsity = MATRIX_TYPE_DICT[matrix_type] | ||
tranform_labels = '1' | ||
fmt = DATA_FORMAT | ||
|
||
config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, | ||
loc_labels, noise, intercept, sparsity, fmt, tranform_labels] | ||
config_writer(full_path + '.json', config) | ||
|
||
return full_path | ||
|
||
|
||
def regression2_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['regression2', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
numSamples = row | ||
numFeatures = col | ||
maxFeatureValue = '5' | ||
maxWeight = '5' | ||
loc_weights = join(full_path, 'weight.data') | ||
loc_data = join(full_path, 'X.data') | ||
loc_labels = join(full_path, 'Y.data') | ||
noise = '1' | ||
intercept = '0' | ||
sparsity = MATRIX_TYPE_DICT[matrix_type] | ||
tranform_labels = '1' | ||
fmt = DATA_FORMAT | ||
|
||
config = [numSamples, numFeatures, maxFeatureValue, maxWeight, loc_weights, loc_data, | ||
loc_labels, noise, intercept, sparsity, fmt, tranform_labels] | ||
config_writer(full_path + '.json', config) | ||
|
||
return full_path | ||
|
||
|
||
def clustering_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['clustering', matrix_type, str(matrix_dim)]) | ||
|
||
full_path = join(datagen_dir, path_name) | ||
X = join(full_path, 'X.data') | ||
Y = join(full_path, 'Y.data') | ||
YbyC = join(full_path, 'YbyC.data') | ||
C = join(full_path, 'C.data') | ||
nc = '50' | ||
dc = '10.0' | ||
dr = '1.0' | ||
fbf = '100.0' | ||
cbf = '100.0' | ||
|
||
config = dict(nr=row, nf=col, nc=nc, dc=dc, dr=dr, fbf=fbf, cbf=cbf, X=X, C=C, Y=Y, | ||
YbyC=YbyC, fmt=DATA_FORMAT) | ||
|
||
config_writer(full_path + '.json', config) | ||
return full_path | ||
|
||
|
||
def stats1_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['stats1', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
DATA = join(full_path, 'X.data') | ||
TYPES = join(full_path, 'types') | ||
TYPES1 = join(full_path, 'set1.types') | ||
TYPES2 = join(full_path, 'set2.types') | ||
INDEX1 = join(full_path, 'set1.indices') | ||
INDEX2 = join(full_path, 'set2.indices') | ||
MAXDOMAIN = '1100' | ||
SETSIZE = '20' | ||
LABELSETSIZE = '10' | ||
|
||
# NC should be less than C and more than num0 | ||
# NC = 10 (old value) | ||
# num0 = NC/2 | ||
# num0 < NC < C | ||
# NC = C/2 | ||
NC = int(int(col)/2) | ||
|
||
config = dict(R=row, C=col, NC=NC, MAXDOMAIN=MAXDOMAIN, DATA=DATA, TYPES=TYPES, SETSIZE=SETSIZE, | ||
LABELSETSIZE=LABELSETSIZE, TYPES1=TYPES1, TYPES2=TYPES2, INDEX1=INDEX1, INDEX2=INDEX2, | ||
fmt=DATA_FORMAT) | ||
|
||
config_writer(full_path + '.json', config) | ||
|
||
return full_path | ||
|
||
|
||
def stats2_datagen(matrix_dim, matrix_type, datagen_dir): | ||
|
||
row, col = split_rowcol(matrix_dim) | ||
path_name = '.'.join(['stats2', matrix_type, str(matrix_dim)]) | ||
full_path = join(datagen_dir, path_name) | ||
|
||
D = join(full_path, 'X.data') | ||
Xcid = join(full_path, 'Xcid.data') | ||
Ycid = join(full_path, 'Ycid.data') | ||
A = join(full_path, 'A.data') | ||
|
||
config = dict(nr=row, nf=col, D=D, Xcid=Xcid, Ycid=Ycid, | ||
A=A, fmt=DATA_FORMAT) | ||
|
||
config_writer(full_path + '.json', config) | ||
return full_path | ||
|
||
|
||
def config_packets_datagen(algo_payload, matrix_type, matrix_shape, datagen_dir): | ||
""" | ||
This function has two responsibilities. Generate the configuration files for | ||
datagen algorithms and return a dictionary that will be used for execution. | ||
algo_payload : List of tuples | ||
The first tuple index contains algorithm name and the second index contains | ||
family type. | ||
matrix_type: String | ||
Type of matrix to generate e.g dense or sparse | ||
matrix_shape: String | ||
Shape of matrix to generate e.g 100k_10 | ||
return: Dictionary {string: list} | ||
This dictionary contains algorithms to be executed as keys and the path of configuration | ||
json files to be executed list of values. | ||
""" | ||
|
||
config_bundle = {} | ||
|
||
distinct_families = set(map(lambda x: x[1], algo_payload)) | ||
|
||
# Cross Product of all configurations | ||
for current_family in distinct_families: | ||
if current_family in FAMILY_NO_MATRIX_TYPE: | ||
config = list(itertools.product(matrix_shape, ['dense'])) | ||
config_bundle[current_family] = config | ||
else: | ||
config = list(itertools.product(matrix_shape, matrix_type)) | ||
# clustering : [[10k_1, dense], [10k_2, dense], ...] | ||
config_bundle[current_family] = config | ||
|
||
config_packets = {} | ||
for current_family, configs in config_bundle.items(): | ||
config_packets[current_family] = [] | ||
for size, type in configs: | ||
family_func = current_family.lower() + '_datagen' | ||
conf_path = globals()[family_func](size, type, datagen_dir) | ||
config_packets[current_family].append(conf_path) | ||
|
||
return config_packets |
Oops, something went wrong.