In [83]:
# Data manipulation
import pandas as pd
import numpy as np
# Evaluation of the model
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.size'] = 18
%matplotlib inline
MAX_EVALS = 100
import csv
from hyperopt import STATUS_OK
from timeit import default_timer as timer
import json
import shlex
import subprocess
import logging
from pprint import pprint
import re
import os


In [84]:
file_suffix = "temp" + str(MAX_EVALS)
def runthebenchmark(hyperparameters):
    os.chdir('/home/meghaagr/project/progress/active/../')
    storeinfile(hyperparameters)
    out=subprocess.Popen(["python3","read_config_general.py","-n 8","-c200 200 400 4 4 4 1"], shell=False, stdout=subprocess.PIPE)
    logging.basicConfig(level=logging.DEBUG)
    output=out.stdout.read().decode('utf-8')
    print("output"+output)
    if len(output.split(" ")) > 5:
        values = output.split(" ")
        value = float(float(values[6])*1024)/float(values[5]) + float(float(values[3])*1024)/float(values[2])
        value = float(value)
        print(value)
        return float((value/100)**3),output
    return 0,0

In [45]:
# Import section
import pickle
import json
from sklearn.externals import joblib
import os
os.chdir('/Users/megha/parallel/mnt/project/progress/active/')


# Loading the model and scaler
filename="final.sav"
loaded_model = pickle.load(open(filename, 'rb'))
scaler_filename = "scaler.save"
scaler = joblib.load(scaler_filename) 


In [85]:
def runfromsavedmodel(hyperparameters):
   # Active learning model choosees and saves the values of parameters
    storeinfile(hyperparameters)
    with open('/Users/megha/parallel/mnt/project/progress/confex.json') as f:
        data = json.load(f)

    cb_buffer_size = data["mpi"]["cb_buffer_size"]
    stripe_size = data["lfs"]["setstripe"]["size"]
    stripe_count = data["lfs"]["setstripe"]["count"]
    col_names =  ['cb1', 'cb2', 'cb3']
    cb_df  = pd.DataFrame(columns = col_names)
    cb_df.loc[len(cb_df)] = [stripe_size, stripe_count, cb_buffer_size]

    #normalize the test data
    norm_df = scaler.transform( cb_df )
    temp = pd.DataFrame(norm_df)

    col_names =  ['0','1', '2','3','4','5']
    my_df = pd.DataFrame(columns=col_names)
    
    ### INITIALIZE THIS MANUALLY ####
    my_df.loc[len(my_df)] = [50,50,50,2,2,2]
    x = pd.concat([my_df,temp], axis=1)
    print(x)
    out = loaded_model.predict(x)
    return float(1/float(out[0])),0
    

In [86]:
def storeinfile(hyperparameters):
    data={"mpi": {"romio_ds_read": "enable", "romio_ds_write": "disable"}, "lfs": {"setstripe": {"size": "16777216", "count": 4}}}
    data["lfs"]["setstripe"]["size"] = int(hyperparameters["setstripe-size"])
    data["lfs"]["setstripe"]["count"] = int(hyperparameters["setstripe-count"])

    data["mpi"]["romio_ds_read"] = hyperparameters["romio_ds_read"]
    data["mpi"]["romio_ds_write"] = hyperparameters["romio_ds_write"]
    data["mpi"]["romio_cb_read"] = hyperparameters["romio_cb_read"]
    data["mpi"]["romio_cb_write"] = hyperparameters["romio_cb_write"]
    data["mpi"]["cb_buffer_size"] = str(int(hyperparameters["cb_buffer_size"]))
    print(os.getcwd())
    with open("/Users/megha/parallel/mnt/project/progress/confex.json","w") as fp:
        json.dump(data,fp)
    print(data)


In [87]:
def objective(hyperparameters):
    global ITERATION
    ITERATION += 1
    start = timer()
    result=0
    while(result == 0):
        print("rerun")
        result,output = runfromsavedmodel(hyperparameters)
        print(result)
    run_time = timer() - start
    print({'loss': result, 'hyperparameters': hyperparameters, 'iteration': ITERATION, 'iteration_time': run_time, 'status': STATUS_OK})
    # Write to the csv file ('a' means append)
    of_connection = open(out_file, 'a')
    writer = csv.writer(of_connection)
    writer.writerow([float(result), hyperparameters, ITERATION, run_time, output])
    return {'loss': float(result), 'hyperparameters': hyperparameters, 'iteration': ITERATION, 'iteration_time': run_time, 'status': STATUS_OK}

In [88]:
from hyperopt import hp
from hyperopt.pyll.stochastic import sample

space = {
    'romio_ds_read' : hp.choice('romio_ds_read',['enable','disable']),
    'romio_ds_write' : hp.choice('romio_ds_write',['enable','disable']),
    'romio_cb_read' : hp.choice('romio_cb_read',['enable','disable']),
    'romio_cb_write' : hp.choice('romio_cb_write',['enable','disable']),
    'cb_buffer_size' : 1048576*hp.quniform('cb_buffer_size',1,512,1),
    'setstripe-size' : 65536*(hp.quniform('setstripe-size',0,512,1)),
    'setstripe-count' : hp.qloguniform('setstripe-count',0,5,1)
    
}

In [89]:
x = sample(space)
params = x
print(x)

cb_buffer_size_dist = []
for _ in range(1000):
    cb_buffer_size_dist.append(sample(space)['cb_buffer_size'])
    
setstripe_size_dist = []
for _ in range(1000):
    setstripe_size_dist.append(sample(space)['setstripe-size'])
    
setstripe_count_dist = []
for _ in range(1000):
    setstripe_count_dist.append(sample(space)['setstripe-count']) 
    
romio_ds_read_dist = []
for _ in range(20):
    romio_ds_read_dist.append(sample(space)['romio_ds_read'])
    
romio_ds_write_dist = []
for _ in range(20):
    romio_ds_write_dist.append(sample(space)['romio_ds_write'])
    
romio_cb_read_dist = []
for _ in range(20):
    romio_cb_read_dist.append(sample(space)['romio_cb_read']) 
    
romio_cb_write_dist = []
for _ in range(20):
    romio_cb_write_dist.append(sample(space)['romio_cb_write']) 
    

{'cb_buffer_size': 276824064.0, 'romio_cb_read': 'disable', 'romio_cb_write': 'enable', 'romio_ds_read': 'disable', 'romio_ds_write': 'enable', 'setstripe-count': 20.0, 'setstripe-size': 29491200.0}


In [90]:
# File to save first results
print(os.getcwd())
out_file = '/Users/megha/parallel/mnt/project/progress/active/result/gbm_trials-'+file_suffix+'.csv'

of_connection = open(out_file, 'w')
writer = csv.writer(of_connection)
writer.writerow(['loss', 'params', 'iteration', 'train_time', 'output'])
of_connection.close()

/Users/megha/parallel/mnt/project/progress/active


In [None]:
from hyperopt import tpe

# Create the algorithm
tpe_algorithm = tpe.suggest


from hyperopt import Trials

# Record results
bayes_trials = Trials()

from hyperopt import fmin


ITERATION = 1

best = fmin(fn = objective, space = space, algo = tpe.suggest, trials = bayes_trials, max_evals = MAX_EVALS)

rerun                                                
/Users/megha/parallel/mnt/project/progress/active    
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '468713472'}, 'lfs': {'setstripe': {'size': 33554432, 'count': 1}}}
    0   1   2  3  4  5         0         1         2 
0  50  50  50  2  2  2  0.028023  0.002062  0.437684
2.3936615841252362e-05                               
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 468713472.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'setstripe-count': 1.0, 'setstripe-size': 33554432.0}, 'iteration': 2, 'iteration_time': 0.13723705799998243, 'status': 'ok'}
rerun                                                                             
/Users/megha/parallel/mnt/project/progress/active                                 
{'mpi': {'romio_ds_read': 'enable', 'r

/Users/megha/parallel/mnt/project/progress/active                                 
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'cb_buffer_size': '252706816'}, 'lfs': {'setstripe': {'size': 8650752, 'count': 2}}}
    0   1   2  3  4  5         0         1         2                              
0  50  50  50  2  2  2  0.007225  0.004124  0.235525
2.3936615841252362e-05                                                            
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 252706816.0, 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'setstripe-count': 2.0, 'setstripe-size': 8650752.0}, 'iteration': 11, 'iteration_time': 0.08894711900006769, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  


rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'disable', 'romio_ds_write': 'enable', 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'cb_buffer_size': '358612992'}, 'lfs': {'setstripe': {'size': 4587520, 'count': 1}}}
    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.003831  0.002062  0.334642
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 358612992.0, 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'romio_ds_read': 'disable', 'romio_ds_write': 'enable', 'setstripe-count': 1.0, 'setstripe-size': 4587520.0}, 'iteration': 20, 'iteration_time': 0.08891950299994278, 'status': 'ok'}
rerun                                                                            

{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 535822336.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'setstripe-count': 2.0, 'setstripe-size': 30343168.0}, 'iteration': 28, 'iteration_time': 0.0857971660000203, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '475004928'}, 'lfs': {'setstripe': {'size': 30146560, 'count': 1}}}
    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.025177  0.002062  0.443572
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 475004928.0,

2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 199229440.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'setstripe-count': 1.0, 'setstripe-size': 21233664.0}, 'iteration': 37, 'iteration_time': 0.08351153800003885, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '490733568'}, 'lfs': {'setstripe': {'size': 21168128, 'count': 3}}}
    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.017678  0.006186  0.458292
2.3936615841252362e-05                                                            

    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.021509  0.024742  0.148184
2.921157947010195e-05                                                              
{'loss': 2.921157947010195e-05, 'hyperparameters': {'cb_buffer_size': 159383552.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'setstripe-count': 12.0, 'setstripe-size': 25755648.0}, 'iteration': 46, 'iteration_time': 0.09046530599994185, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'disable', 'romio_ds_write': 'enable', 'romio_cb_read': 'disable', 'romio_cb_write': 'enable', 'cb_buffer_size': '130023424'}, 'lfs': {'setstripe': {'size': 8847360, 'count': 6}}}
    0   1   2  3  4  5         0         1         2                               

    0   1   2  3  4  5         0         1        2                                
0  50  50  50  2  2  2  0.018171  0.002062  0.48577
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 520093696.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'setstripe-count': 1.0, 'setstripe-size': 21757952.0}, 'iteration': 55, 'iteration_time': 0.09571538499994858, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '228589568'}, 'lfs': {'setstripe': {'size': 23724032, 'count': 2}}}
    0   1   2  3  4  5         0         1         2                              

    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.024848  0.004124  0.500491
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 535822336.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'setstripe-count': 2.0, 'setstripe-size': 29753344.0}, 'iteration': 64, 'iteration_time': 0.09678250299998581, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '248512512'}, 'lfs': {'setstripe': {'size': 33357824, 'count': 1}}}
    0   1   2  3  4  5         0         1       2                               

    0   1   2  3  4  5        0         1         2                                
0  50  50  50  2  2  2  0.00914  0.002062  0.299313
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 320864256.0, 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'setstripe-count': 1.0, 'setstripe-size': 10944512.0}, 'iteration': 73, 'iteration_time': 0.20994480600006682, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '446693376'}, 'lfs': {'setstripe': {'size': 28835840, 'count': 1}}}
    0   1   2  3  4  5         0         1         2                               

    0   1   2  3  4  5         0         1        2                                
0  50  50  50  2  2  2  0.014832  0.008247  0.33366
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 357564416.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'enable', 'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'setstripe-count': 4.0, 'setstripe-size': 17760256.0}, 'iteration': 82, 'iteration_time': 0.08683012899996356, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'enable', 'romio_ds_write': 'disable', 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'cb_buffer_size': '349175808'}, 'lfs': {'setstripe': {'size': 9961472, 'count': 7}}}
    0   1   2  3  4  5         0         1        2                                


    0   1   2  3  4  5         0         1         2                               
0  50  50  50  2  2  2  0.023206  0.004124  0.037291
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 40894464.0, 'romio_cb_read': 'enable', 'romio_cb_write': 'disable', 'romio_ds_read': 'enable', 'romio_ds_write': 'enable', 'setstripe-count': 2.0, 'setstripe-size': 27787264.0}, 'iteration': 91, 'iteration_time': 0.0907384730001013, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'disable', 'romio_ds_write': 'enable', 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'cb_buffer_size': '535822336'}, 'lfs': {'setstripe': {'size': 2162688, 'count': 2}}}
    0   1   2  3  4  5         0         1         2                               


    0   1   2  3  4  5        0         1         2                                
0  50  50  50  2  2  2  0.01226  0.002062  0.480864
2.3936615841252362e-05                                                             
{'loss': 2.3936615841252362e-05, 'hyperparameters': {'cb_buffer_size': 514850816.0, 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'romio_ds_read': 'disable', 'romio_ds_write': 'disable', 'setstripe-count': 1.0, 'setstripe-size': 14680064.0}, 'iteration': 100, 'iteration_time': 0.08425130800014813, 'status': 'ok'}
rerun                                                                              
/Users/megha/parallel/mnt/project/progress/active                                  
{'mpi': {'romio_ds_read': 'disable', 'romio_ds_write': 'disable', 'romio_cb_read': 'disable', 'romio_cb_write': 'disable', 'cb_buffer_size': '506462208'}, 'lfs': {'setstripe': {'size': 19202048, 'count': 2}}}
    0   1   2  3  4  5         0         1         2                         

In [None]:
print(best)
d=  best
#print(bayes_trials.results)
bayes_trials_results = sorted(bayes_trials.results, key = lambda x: x['loss'])
#bayes_trials_results[:1]

In [None]:
results = pd.read_csv(out_file)

# Sort with best scores on top and reset index for slicing
results.sort_values('train_time', ascending = True, inplace = True)
results.reset_index(inplace = True, drop = True)
results.head()


In [None]:
import ast

# Convert from a string to a dictionary
ast.literal_eval(results.loc[0, 'params'])
best_bayes_params = ast.literal_eval(results.loc[0, 'params']).copy()
print(best_bayes_params)
log = open('best.txt','a')
print(results.loc[0,'output'])
print(results.loc[0,'output'],file=log)
log.close()

In [None]:
bayes_params = pd.DataFrame(columns = list(ast.literal_eval(results.loc[0, 'params']).keys()),
                            index = list(range(len(results))))

# Add the results with each parameter a different column
for i, params in enumerate(results['params']):
    bayes_params.loc[i, :] = list(ast.literal_eval(params).values())
bayes_params['train_time'] = results['train_time']    
bayes_params['loss'] = results['loss']
bayes_params['iteration'] = results['iteration']
bayes_params['output']=results['output']

bayes_params.head()

In [None]:
#os.chdir('./plots')
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

# Density plots of the learning rate distributions 
sns.kdeplot(bayes_params['loss'], label = 'Loss Variation', linewidth = 2)
plt.legend()
plt.xlabel('Loss'); plt.ylabel('Density'); plt.title('Loss Distribution');
#plt.savefig(file_suffix+'_loss.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18


sns.kdeplot(bayes_params['cb_buffer_size'], label = 'bayes cb_buffer_size', linewidth = 2)
sns.kdeplot(cb_buffer_size_dist,color="red",linewidth=2, label = 'initial cb_buffer_size')

plt.legend()
plt.xlabel('cb_buffer_size'); plt.ylabel('Density'); plt.title('cb_buffer_size Distribution');
#plt.savefig(file_suffix+'_cb_buffer_size.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

sns.kdeplot(bayes_params['setstripe-size'], label = 'bayes setstripe-size', linewidth = 2)
sns.kdeplot(setstripe_size_dist,color="red",linewidth=2, label = 'initial setstripe-size')

plt.legend()
plt.xlabel('setstripe-size'); plt.ylabel('Density'); plt.title('setstripe-size Distribution');
#plt.savefig(file_suffix+'_setstripe-size.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

sns.kdeplot(bayes_params['setstripe-count'], label = 'bayes setstripe-count', linewidth = 2)
sns.kdeplot(setstripe_count_dist,color="red",linewidth=2, label = 'initial setstripe-count')

plt.legend()
plt.xlabel('setstripe-count'); plt.ylabel('Density'); plt.title('setstripe-count Distribution');
#plt.savefig(file_suffix+'_setstripe-count.png')

In [None]:
plt.figure(figsize = (20, 8))
from scipy.stats import itemfreq
plt.rcParams['font.size'] = 18
bayes_params['romio_ds_read'].value_counts().plot.bar()#plt.hist(romio_ds_read_dist,color="red",linewidth=2, label = 'initial romio_ds_read')

plt.legend()
plt.xlabel('romio_ds_read'); plt.ylabel('Count'); plt.title('romio_ds_read Distribution');
#plt.savefig(file_suffix+'_romio_ds_read.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

#plt.hist(bayes_params['romio_ds_write'], label = 'bayes romio_ds_write', linewidth = 2)
#plt.hist(romio_ds_write_dist,color="red",linewidth=2, label = 'initial romio_ds_write')
bayes_params['romio_ds_write'].value_counts().plot.bar()
plt.legend()
plt.xlabel('romio_ds_write'); plt.ylabel('Count'); plt.title('romio_ds_write Distribution');
#plt.savefig(file_suffix+'_romio_ds_write.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

bayes_params['romio_cb_write'].value_counts().plot.bar()#plt.hist(romio_ds_write_dist,color="red",linewidth=2, label = 'initial romio_cb_write')

plt.legend()
plt.xlabel('romio_cb_write'); plt.ylabel('Count'); plt.title('romio_cb_write Distribution');
#plt.savefig(file_suffix+'_romio_cb_write.png')

In [None]:
plt.figure(figsize = (20, 8))
plt.rcParams['font.size'] = 18

bayes_params['romio_cb_read'].value_counts().plot.bar()#plt.hist(romio_cb_read_dist,color="red",linewidth=2, label = 'initial romio_cb_read')

plt.legend()
plt.xlabel('romio_cb_read'); plt.ylabel('Count'); plt.title('romio_cb_read Distribution');
#plt.savefig(file_suffix+'_romio_cb_read.png')

In [None]:
os.chdir('../')