In [1]:
import json
import boto3
import random
import os
from concurrent.futures import ThreadPoolExecutor
import threading
RUNNING = True
threadLimiter = threading.BoundedSemaphore(6)
resultLock = threading.Lock()

s3 = boto3.resource('s3')

RAM_LIST = list(range(128, 3072 ,64))

In [10]:
def get_task_type_list(workflow_file_path):
    with open(workflow_file_path) as result_json:
        data = json.load(result_json)
    task_type_input_files_map = {}
    for process in data['processes']:
        if process['name'] not in task_type_input_files_map:
            task_type_input_files_map[process['name']] = []
        for in_file in process['ins']:
            if data['signals'][in_file]['name'] not in task_type_input_files_map[process['name']]:
                task_type_input_files_map[process['name']].append(data['signals'][in_file]['name'])

    return task_type_input_files_map

In [11]:
def copy_file_from_to_bucket(copy_source, copy_destination):
    bucket = s3.Bucket('cegielskir')
    bucket.copy(copy_source, copy_destination)

In [12]:
def execute_workflow_task(config, ins, outs, ram, s3_path):
    options = {
        "bucket": "cegielskir",
        "prefix": s3_path
    }
    os.environ['FUNCTION_TYPE'] = str(ram)
    ins_arg = json.dumps(ins)
    outs_arg = json.dumps(outs)
    config_arg = json.dumps(config)
    options_arg = json.dumps(options)
    result = !node awsLambdaCommand.js {"'" + ins_arg + "'"} {"'" + outs_arg + "'"} {"'" + config_arg + "'"} {"'" + options_arg + "'"}
    return ' '.join(result)

In [13]:
def run_single_task(filename, foldername):
    with open(foldername + filename) as single_workflow:
        data = json.load(single_workflow)
    task_name = data['processes'][0]['name']
    process = data['processes'][0]
    config = process['config']
    ins = [ data['signals'][i] for i in process['ins'] ]
    outs = [ data['signals'][i] for i in process['outs'] ]
    run_and_save_results(config, ins, outs, ram, filename.split('_')[0], filename)


In [14]:
def run_and_save_results(config, ins, outs, ram, task_core,task_name):
    print("Starting " + str(task_name) + " with ram " + str(ram) + "   core - " + str(task_core))
    result = execute_workflow_task(config, ins, outs, ram, 'data-collection/montage-0_15/' + task_core)
    save_result(result, './montage-0.15-raw-results.json', str(ram), task_name)
    threadLimiter.release()


In [15]:
def save_result(result, filename,ram, task_id):
    resultLock.acquire()
    with open(filename) as result_json:
        data = json.load(result_json)
    if ram not in data:
        data[ram] = []
    result_dict = {}
    result_dict['task_id'] = task_id
    result_dict['result'] = str(result)
    data[ram].append(result_dict)
    
    with open(filename, 'w') as result_json:
        data = json.dump(data, result_json, indent=4)
    resultLock.release()

In [17]:
####################### TO BE CHANGED ###########################

exec_folder = 'montageV2_6-compiles'
source_path = 'dc-start-montage-0_15/'
folder_with_workflow_parts = 'montage-0_15/'
workflow_file = './dc-workflow-0.15.json'

###################### END TO BE CHANGED ####################### 

In [36]:
task_in_file_map = get_task_type_list(workflow_file)


for task_type in task_in_file_map:
    for exe_file in s3.Bucket('cegielskir').objects.filter(Prefix=exec_folder):
        copy_exec_source = {
            'Bucket': 'cegielskir',
            'Key': exe_file.key
        }
        
        copy_file_from_to_bucket(copy_exec_source, 'data-collection/' + folder_with_workflow_parts +task_type + '/' + exe_file.key.split('/')[1])
    
#     for in_file in task_in_file_map[task_type]:
#         copy_source = {
#             'Bucket': 'cegielskir',
#             'Key': source_path + in_file
#         }
        
#         copy_file_from_to_bucket(copy_source, 'data-collection/' + folder_with_workflow_parts +task_type + '/' + in_file)
    

In [None]:
files = os.listdir('./' + folder_with_workflow_parts)
#files.reverse()
it = 1
while True:
    print("================================== NEW LOOP ====================================")
    if RUNNING:
        for file in files:
            for ram in RAM_LIST:
                task_core = file.split('_')[0]
                with open('./' + folder_with_workflow_parts + file) as single_workflow:
                    data = json.load(single_workflow)
                task_name = data['processes'][0]['name']
                process = data['processes'][0]
                config = process['config']
                ins = [ data['signals'][i] for i in process['ins'] ]
                outs = [ data['signals'][i] for i in process['outs'] ]
                threadLimiter.acquire()
                print("Iteration: " + str(it))
                threading.Thread(target=run_and_save_results, args=(config, ins, outs, ram, task_core,file)).start()
                it += 1

Iteration: 1
Starting mDiffFit_8.json with ram 128   core - mDiffFit
Iteration: 2
Starting mDiffFit_8.json with ram 192   core - mDiffFit
Iteration: 3
Starting mDiffFit_8.json with ram 256   core - mDiffFit
Iteration: 4
Starting mDiffFit_8.json with ram 320   core - mDiffFit
Iteration: 5
Starting mDiffFit_8.json with ram 384   core - mDiffFit
Iteration: 6
Starting mDiffFit_8.json with ram 448   core - mDiffFit
Iteration: 7
Starting mDiffFit_8.json with ram 512   core - mDiffFit
Iteration: 8
Starting mDiffFit_8.json with ram 576   core - mDiffFit
Iteration: 9
Starting mDiffFit_8.json with ram 640   core - mDiffFit
Iteration: 10
Starting mDiffFit_8.json with ram 704   core - mDiffFit
Iteration: 11
Starting mDiffFit_8.json with ram 768   core - mDiffFit
Iteration: 12
Starting mDiffFit_8.json with ram 832   core - mDiffFit
Iteration: 13
Starting mDiffFit_8.json with ram 896   core - mDiffFit
Iteration: 14
Starting mDiffFit_8.json with ram 960   core - mDiffFit
Iteration: 15
Starting mDiffF

Iteration: 114
Starting mBackground_20.json with ram 1472   core - mBackground
Iteration: 115
Starting mBackground_20.json with ram 1536   core - mBackground
Iteration: 116
Starting mBackground_20.json with ram 1600   core - mBackground
Iteration: 117
Starting mBackground_20.json with ram 1664   core - mBackground
Iteration: 118
Starting mBackground_20.json with ram 1728   core - mBackground
Iteration: 119
Starting mBackground_20.json with ram 1792   core - mBackground
Iteration: 120
Starting mBackground_20.json with ram 1856   core - mBackground
Iteration: 121
Starting mBackground_20.json with ram 1920   core - mBackground
Iteration: 122
Starting mBackground_20.json with ram 1984   core - mBackground
Iteration: 123
Starting mBackground_20.json with ram 2048   core - mBackground
Iteration: 124
Starting mBackground_20.json with ram 2112   core - mBackground
Iteration: 125
Starting mBackground_20.json with ram 2176   core - mBackground
Iteration: 126
Starting mBackground_20.json with ram

Iteration: 222
Starting mBackground_15.json with ram 2496   core - mBackground
Iteration: 223
Starting mBackground_15.json with ram 2560   core - mBackground
Iteration: 224
Starting mBackground_15.json with ram 2624   core - mBackground
Iteration: 225
Starting mBackground_15.json with ram 2688   core - mBackground
Iteration: 226
Starting mBackground_15.json with ram 2752   core - mBackground
Iteration: 227
Starting mBackground_15.json with ram 2816   core - mBackground
Iteration: 228
Starting mBackground_15.json with ram 2880   core - mBackground
Iteration: 229
Starting mBackground_15.json with ram 2944   core - mBackground
Iteration: 230
Starting mBackground_15.json with ram 3008   core - mBackground
Iteration: 231
Starting mDiffFit_126.json with ram 128   core - mDiffFit
Iteration: 232
Starting mDiffFit_126.json with ram 192   core - mDiffFit
Iteration: 233
Starting mDiffFit_126.json with ram 256   core - mDiffFit
Iteration: 234
Starting mDiffFit_126.json with ram 320   core - mDiffF

Iteration: 334
Starting mProject_31.json with ram 832   core - mProject
Iteration: 335
Starting mProject_31.json with ram 896   core - mProject
Iteration: 336
Starting mProject_31.json with ram 960   core - mProject
Iteration: 337
Starting mProject_31.json with ram 1024   core - mProject
Iteration: 338
Starting mProject_31.json with ram 1088   core - mProject
Iteration: 339
Starting mProject_31.json with ram 1152   core - mProject
Iteration: 340
Starting mProject_31.json with ram 1216   core - mProject
Iteration: 341
Starting mProject_31.json with ram 1280   core - mProject
Iteration: 342
Starting mProject_31.json with ram 1344   core - mProject
Iteration: 343
Starting mProject_31.json with ram 1408   core - mProject
Iteration: 344
Starting mProject_31.json with ram 1472   core - mProject
Iteration: 345
Starting mProject_31.json with ram 1536   core - mProject
Iteration: 346
Starting mProject_31.json with ram 1600   core - mProject
