In [1]:
from __future__ import print_function

In [1]:
from glob import glob
import numpy as np
import sys, os, h5py, time, errno
import GPUtil


from utils import start_rabbit, start_worker, start_flower_monitor, read_h5py_file, cm_to_cvae, job_on_gpu
from utils import omm_job, cvae_job 

Using TensorFlow backend.


In [3]:
GPU_ids = [gpu.id for gpu in GPUtil.getGPUs()] 
print('Available GPUs', GPU_ids) 

Available GPUs [0, 1]


In [4]:
top_file = os.path.abspath('../P27-all/C1B48/C1B48.top.gz')
pdb_file = os.path.abspath('../P27-all/C1B48/C1B48.pdb.gz')

In [5]:
# number of cvae jobs 
n_cvae = 1 

# logs for scheudler

In [10]:
work_dir = os.path.abspath('./')
log_dir = os.path.join(work_dir, 'scheduler_logs') 

try:
    os.mkdir(log_dir)
except OSError as exc:
    if exc.errno != errno.EEXIST:
        raise
    pass

rabbitmq_log = os.path.join(log_dir, 'rabbit_server_log.txt') 
start_rabbit(rabbitmq_log)
time.sleep(5)

celery_worker_log = os.path.join(log_dir, 'celery_worker_log.txt') 
start_worker(celery_worker_log)
start_flower_monitor() 
print('Waiting 10 seconds for the server to stablize.')
time.sleep(10)

Waiting 10 seconds for the server to stablize.


## Jobs
* Assign job_id according to the available GPUs on the board. 
* Start the simulation according the job_labels containing job_id and gpu_id for individual jobs 

In [7]:
jobs = []
for gpu_id in GPU_ids: 
    job = omm_job(job_id=int(time.time()), gpu_id=gpu_id, top_file=top_file, pdb_file=pdb_file)
    job.start() 
    jobs.append(job) 
    time.sleep(2)

In [8]:
jobs

[<utils.omm_job at 0x7f8daf21a810>, <utils.omm_job at 0x7f8daf13a710>]

# Read the output h5 files

In [9]:
print('Waiting 5 mins for omm to write contact map .h5 files. ')
time.sleep(300)

Waiting 5 mins for omm to write contact map .h5 files. 


KeyboardInterrupt: 

In [2]:
cm_files = glob('*/*_cm.h5')

cm_data_lists = [read_h5py_file(cm_file) for cm_file in cm_files]

# Get updates from h5 file
Once every minute

In [11]:
frame_number = lambda lists: sum([cm.shape[1] for cm in lists])
frame_number(cm_data_lists)

597

In [12]:
frame_marker = 0 
while frame_number(cm_data_lists) < 1000: 
    for cm in cm_data_lists: 
        cm.refresh() 
    if frame_number(cm_data_lists) > frame_marker: 
        print('Current number of frames from OpenMM:', frame_number(cm_data_lists)) 
        frame_marker += int((10000 + frame_marker) / 10000) * 10000
        print('    Next report at frame', frame_marker) 
#     time.sleep(600)

Current number of frames from OpenMM: 597
    Next report at frame 10000


KeyboardInterrupt: 

# All contact to h5

In [4]:
cvae_input = cm_to_cvae(cm_data_lists)

In [26]:
train_data_length = [ cm_data.shape[1] for cm_data in cm_data_lists]

omm_log = os.path.join(log_dir, 'openmm_log.txt') 

log = open(omm_log, 'w') 

for i, n_frame in enumerate(train_data_length): 
    log.writelines("{} {}\n".format(cm_files[0], n_frame))    
log.close()

In [3]:
cvae_input = cm_to_cvae(cm_data_lists)

cvae_input_dir = os.path.join(work_dir, 'cvae_input') 
try:
    os.mkdir(cvae_input_dir)
except OSError as exc:
    if exc.errno != errno.EEXIST:
        raise
    pass

cvae_input_file = os.path.join(cvae_input_dir, 'cvae_input.h5')
cvae_input_save = h5py.File(cvae_input_file, 'w')
cvae_input_save.create_dataset('contact_maps', data=cvae_input)
cvae_input_save.close() 

NameError: name 'work_dir' is not defined

# CVAE

In [16]:
hyper_dims = np.array(range(n_cvae)) + 3

In [17]:
for i in range(n_cvae): 
    cvae_j = cvae_job(time.time(), i, cvae_input_file, hyper_dim=3) 
    stop_jobs = job_on_gpu(i, jobs) 
    stop_jobs.stop()
    jobs.remove(stop_jobs) 
    cvae_j.start() 
    jobs.append(cvae_j) 

[<utils.omm_job at 0x7f8daf13a710>, <utils.cvae_job at 0x7f8daf05c950>]