In [1]:
import GPyOpt
import numpy as np
import pandas as pd
import re
import json

In [2]:
N_EVENTS = 500

# Optimization space

In [3]:
min_dist = 3.6
radius = 2

space = [{'name': 'pitch', 'type': 'continuous', 'domain': (min_dist, min_dist)},\
         {'name': 'yoffset_layer', 'type': 'continuous', 'domain': (min_dist/2, min_dist)},\
         {'name': 'yoffset_plane', 'type': 'continuous', 'domain': (min_dist*0.25, min_dist*1.25)},\
         {'name': 'zshift_layer', 'type': 'continuous', 'domain': (1, 12)},\
         {'name': 'zshift_plane', 'type': 'continuous', 'domain': (1, 12)},\
         {'name': 'zshift_view', 'type': 'continuous', 'domain': (10, 12)},\
         {'name': 'alpha', 'type': 'continuous', 'domain': (5, 15)}]

constraints = [{'name': 'constr_1', 'constrain': '-(x[:,0]-x[:,1])**2-x[:,3]**2+'+str(radius)+'**2'},\
               {'name': 'constr_2', 'constrain': '-(x[:,1]-x[:,2])**2-(x[:,3]-x[:,4])**2+'+str(radius)+'**2'},\
               {'name': 'constr_3', 'constrain': 'x[:,3]+x[:,4]+'+str(radius)+'-x[:,5]'},
               {'name': 'constr_4', 'constrain': 'x[:,3]+'+str(radius)+'-x[:,4]'}]

In [4]:
feasible_region = GPyOpt.Design_space(space=space, constraints=constraints)

np.random.seed(42)

# Import skygrid client

In [5]:
import time
import json

from disneylandClient import (
    new_client,
    Job,
    RequestWithId,
)

In [6]:
STATUS_IN_PROCESS = set([
    Job.PENDING,
    Job.PULLED,
    Job.RUNNING,
])
STATUS_FINAL = set([
    Job.COMPLETED,
    Job.FAILED,
])

In [7]:
def return_descriptor(point):
    
    pitch, yoffset_layer, yoffset_plane, zshift_layer, zshift_plane, zshift_view, alpha = point
    
    cmd = "/opt/disney-run.sh python /opt/objective.py --pitch "+str(pitch)+" --yoffset_layer "+str(yoffset_layer)+\
        " --yoffset_plane "+str(yoffset_plane)+" --zshift_layer "+str(zshift_layer)+" --zshift_plane "+\
        str(zshift_plane)+" --zshift_view "+str(zshift_view)+" --alpha "+str(int(alpha))+\
        " --nEvents "+str(N_EVENTS)+" --method FH"

    descriptor = {
        "input": [],

        "container": {
            "workdir": "",
            "name": "oleg94/ship_metric:03.27",
            "cpu_needed": 1,
            "max_memoryMB": 4096,
            "min_memoryMB": 1024,
            "cmd": cmd,
        },

        "required_outputs": {
            "output_uri": "none:",
            "file_contents": [
                {"file": "output.txt", "to_variable": "out"}
            ]
        }
    }
    
    return descriptor

# Initial design

In [8]:
n_estimators = 40
n_initial_design = 40

In [9]:
initial_design = GPyOpt.experiment_design.initial_design('random', feasible_region, n_initial_design)

In [10]:
initial_objective = np.zeros(n_initial_design)

In [None]:
stub = new_client()
jobs = []

In [None]:
init_d_i = 0

for epoch in range(n_initial_design // n_estimators):
    
    print("EPOCH #"+str(epoch)+" started.")
    
    epoch_jobs = [0] * n_estimators
    
    for k in range(n_estimators):
        descriptor = return_descriptor(initial_design[init_d_i])
        init_d_i += 1
        epoch_jobs[k] = Job(input=json.dumps(descriptor), kind="docker")
        epoch_jobs[k] = stub.CreateJob(epoch_jobs[k])
        print(k, " pushed")
    
    prev_number_of_finished_jobs = 0
    prev_number_of_running_jobs = 0
    prev_number_of_pending_jobs = 0
    
    while True:
        for k in range(n_estimators):
            epoch_jobs[k] = stub.GetJob(RequestWithId(id=epoch_jobs[k].id))
        
        number_of_finished_jobs = 0
        number_of_running_jobs = 0
        number_of_pending_jobs = 0
        for k in range(n_estimators):
            if epoch_jobs[k].status in STATUS_FINAL:
                number_of_finished_jobs += 1
            if epoch_jobs[k].status == Job.PENDING:
                number_of_pending_jobs += 1
            if epoch_jobs[k].status == Job.RUNNING:
                number_of_running_jobs += 1
                
        if (number_of_finished_jobs != prev_number_of_finished_jobs) or (prev_number_of_running_jobs != number_of_running_jobs) or (prev_number_of_pending_jobs != number_of_pending_jobs):
            print("Finished jobs: "+str(number_of_finished_jobs)+\
                  " Running jobs: "+str(number_of_running_jobs)+\
                  " Pending jobs: "+str(number_of_pending_jobs))
            prev_number_of_finished_jobs = number_of_finished_jobs
            prev_number_of_running_jobs = number_of_running_jobs
            prev_number_of_pending_jobs = number_of_pending_jobs
            
        if number_of_finished_jobs == n_estimators:
            break
        time.sleep(10)
    
    jobs += epoch_jobs

EPOCH #0 started.
0  pushed
1  pushed
2  pushed
3  pushed
4  pushed
5  pushed
6  pushed
7  pushed
8  pushed
9  pushed
10  pushed
11  pushed
12  pushed
13  pushed
14  pushed
15  pushed
16  pushed
17  pushed
18  pushed
19  pushed
20  pushed
21  pushed
22  pushed
23  pushed
24  pushed
25  pushed
26  pushed
27  pushed
28  pushed
29  pushed
30  pushed
31  pushed
32  pushed
33  pushed
34  pushed
35  pushed
36  pushed
37  pushed
38  pushed
39  pushed
Finished jobs: 0 Running jobs: 0 Pending jobs: 40
Finished jobs: 0 Running jobs: 40 Pending jobs: 0
Finished jobs: 1 Running jobs: 39 Pending jobs: 0
Finished jobs: 2 Running jobs: 38 Pending jobs: 0
Finished jobs: 3 Running jobs: 37 Pending jobs: 0
Finished jobs: 4 Running jobs: 36 Pending jobs: 0
Finished jobs: 5 Running jobs: 35 Pending jobs: 0
Finished jobs: 6 Running jobs: 34 Pending jobs: 0
Finished jobs: 7 Running jobs: 33 Pending jobs: 0
Finished jobs: 8 Running jobs: 32 Pending jobs: 0


In [14]:
df_init_design = pd.DataFrame(initial_design, columns=['pitch', 'yoffset_layer', 'yoffset_plane', 'zshift_layer', 'zshift_plane', 'zshift_view', 'alpha'])
df_init_design['reconstructible'] = [float(json.loads(re.sub(r"\\", '', job.output[15:-2]))['reconstructible']) if re.sub(r"\\", '', job.output[15:-2]) else np.nan for job in jobs]
df_init_design['reco_passed_no_clones'] = [float(json.loads(re.sub(r"\\", '', job.output[15:-2]))['reco_passed_no_clones']) if re.sub(r"\\", '', job.output[15:-2]) else np.nan for job in jobs]

In [17]:
df_init_design.head()

Unnamed: 0,pitch,yoffset_layer,yoffset_plane,zshift_layer,zshift_plane,zshift_view,alpha,reconstructible,reco_passed_no_clones
0,3.6,3.436777,1.12881,2.210571,6.893522,11.797108,8.701587,157.0,119.0
1,3.6,3.491098,3.638826,2.318519,6.633024,11.303923,13.511367,135.0,60.0
2,3.6,1.934191,1.723673,1.566266,6.826156,11.784093,8.594912,155.0,145.0
3,3.6,3.372442,2.453974,2.263205,4.947135,11.594591,13.698964,135.0,112.0
4,3.6,3.251162,3.225302,2.69799,5.830484,11.440073,5.547616,147.0,74.0


In [16]:
df_init_design.to_csv('observations/observations.csv', index=False)

# Main part of optimization

In [18]:
df_init_design = pd.read_csv('observations/observations.csv')
n_estimators = 20
n_epochs = 50
stub = new_client()

In [None]:
for epoch in range(n_epochs):
    
        print("EPOCH #"+str(epoch)+" started.")

        step_X = df_init_design[df_init_design.columns[:-1]].values
        #because we want to maximize
        step_Y = (-df_init_design['reco_passed_no_clones'] / N_EVENTS).values.reshape(-1, 1)
        ignored_X = step_X[np.isnan(step_Y.ravel())]
        step_X = step_X[~np.isnan(step_Y.ravel())]
        step_Y = step_Y[~np.isnan(step_Y.ravel())]
        bo = GPyOpt.methods.BayesianOptimization(f=None, domain=space, constraints=constraints, X=step_X,\
                                                 Y=step_Y, initial_design_numdata=20,\
                                                 evaluator_type='local_penalization', batch_size=n_estimators)
        
        pending_X = list(bo.suggest_next_locations(ignored_X=ignored_X))
        
        epoch_jobs = [0] * len(pending_X)
        for k, new_point in enumerate(pending_X):
            descriptor = return_descriptor(new_point)
            epoch_jobs[k] = Job(input=json.dumps(descriptor), kind="docker")
            epoch_jobs[k] = stub.CreateJob(epoch_jobs[k])
        
        prev_number_of_finished_jobs = 0
        prev_number_of_running_jobs = 0
        prev_number_of_pending_jobs = 0

        while True:
            for k in range(n_estimators):
                epoch_jobs[k] = stub.GetJob(RequestWithId(id=epoch_jobs[k].id))

            number_of_finished_jobs = 0
            number_of_running_jobs = 0
            number_of_pending_jobs = 0
            for k in range(n_estimators):
                if epoch_jobs[k].status in STATUS_FINAL:
                    number_of_finished_jobs += 1
                if epoch_jobs[k].status == Job.PENDING:
                    number_of_pending_jobs += 1
                if epoch_jobs[k].status == Job.RUNNING:
                    number_of_running_jobs += 1

            if (number_of_finished_jobs != prev_number_of_finished_jobs) or (prev_number_of_running_jobs != number_of_running_jobs) or (prev_number_of_pending_jobs != number_of_pending_jobs):
                print("Finished jobs: "+str(number_of_finished_jobs)+\
                      " Running jobs: "+str(number_of_running_jobs)+\
                      " Pending jobs: "+str(number_of_pending_jobs))
                prev_number_of_finished_jobs = number_of_finished_jobs
                prev_number_of_running_jobs = number_of_running_jobs
                prev_number_of_pending_jobs = number_of_pending_jobs

            if number_of_finished_jobs == n_estimators:
                break
            time.sleep(10)
            
        for k, point in enumerate(pending_X):
        
            reconstructible = float(json.loads(re.sub(r"\\", '', epoch_jobs[k].output[15:-2]))['reconstructible']) if re.sub(r"\\", '', epoch_jobs[k].output[15:-2]) else np.nan
            reco_passed_no_clones = float(json.loads(re.sub(r"\\", '', epoch_jobs[k].output[15:-2]))['reco_passed_no_clones']) if re.sub(r"\\", '', epoch_jobs[k].output[15:-2]) else np.nan
            
            df_init_design.loc[len(df_init_design)] = list(point)+[reconstructible, reco_passed_no_clones]
            df_init_design.to_csv('observations/observations.csv', index=False)

EPOCH #0 started.
