# Algorithm Explained

In [None]:
import sys
sys.path.append("./hybridjobs/utility")

from hybridjobs.utility.ProteinParser import ProteinData
from hybridjobs.utility.ProteinModel import ProteinModel
from hybridjobs.utility.ProteinStructurePrediction import ProteinStructurePrediction
# from utility.ResultProcess import ResultParser
import time

timestamp = time.strftime("%Y%m%d-%H")

Using TensorFlow backend.


#### Step 1: Prepare Data

In this part, we load the folder with the raw rna data for experimentation. To evaluate the quantum solution and the actual solution include both fasta and ct files. To only generate a quantum solution, upload a fasta file.

In [2]:
# input: aminoacids
# output: energy files

protein_name = 'glycylglycine'
aminoacids = 'GG'
number_bits_to_discretize_protein_angles = 4
protein_id = 0

input_filename = "inputRotations"
output_filename = "outputRotations"
basis = "6-31g"
energy_method = "mp2"



In [3]:
data_path='protein-folding-data'

#### Step 2: Build Model

In this part, we will show how to build model for qfold

In [4]:
# initial the ProteinFold object
init_param = {}
# method: qfold-cc stands for the classical metropolis method in QFold
# method: qfold-qc stands for the quantum metropolis method in QFold
method = ['qfold-cc', 'qfold-qc']

for mt in method:
    if mt == 'qfold-cc':
        init_param[mt] = {}
        init_param[mt]['params'] = ["initialization"]
    elif mt == 'qfold-qc':
        init_param[mt] = {}
        init_param[mt]['params'] = ["initialization"]

config_path = "ProteinFoldingHybridJobs/config/config.json"
protein_model = ProteinModel(data_path, method, config_path, **init_param)

INFO:root:Initial parameters for protein glycylglycine_3_GG using qfold-cc
INFO:root:Initial parameters for protein glycylglycine_3_GG using qfold-qc
INFO:root:Initial parameters for protein glycylglycine_4_GG using qfold-cc
INFO:root:Initial parameters for protein glycylglycine_4_GG using qfold-qc


In [5]:
# set the parameters for model
model_param = {}

method = 'qfold-cc'
model_param[method] = {}

# parameters
model_param[method]['initialization'] = ["minifold", "random"]

method = 'qfold-qc'
model_param[method] = {}

# parameters
model_param[method]['initialization'] = ["minifold", "random"]

protein_model.build_models(**model_param)


deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_4_GG: 1024


0

In [6]:
# save the model
model_path = protein_model.save("latest")

print(f"You have built the protein folding models and saved them as protein_folding_latest.pickle")

INFO:root:finish save protein_folding_latest.pickle


You have built the protein folding models and saved them as protein_folding_latest.pickle


#### Step 3: Predict Protein Structure

In this part, we will show how to run models for predicting protein structure

In [7]:
protein_models = ProteinModel.load(model_path)

In [8]:
model_info = protein_models.describe_models()

INFO:root:debug describe
INFO:root:model name: glycylglycine_3_GG, method: qfold-cc
INFO:root:param: initialization, value {'minifold', 'random'}
INFO:root:model name: glycylglycine_3_GG, method: qfold-qc
INFO:root:param: initialization, value {'minifold', 'random'}
INFO:root:model name: glycylglycine_4_GG, method: qfold-cc
INFO:root:param: initialization, value {'minifold', 'random'}
INFO:root:model name: glycylglycine_4_GG, method: qfold-qc
INFO:root:param: initialization, value {'minifold', 'random'}


In [9]:
# get the model you want to optimize
protein_name = 'glycylglycine_3_GG'
initialization = 'random'
method = 'qfold-cc'

model_name = "{}+{}".format(protein_name, initialization)

protein_model = protein_models.get_model(protein_name, method, model_name)



In [10]:
data_path = 'data'
# psp_param stands for the parameters for predicting protein structure
psp_param = {}
psp_param["data_path"] = data_path
psp_param["mode"] = 'local-simulator'
psp_param["model_name"] = model_name
psp_param["model_path"] = model_path

psp = ProteinStructurePrediction(protein_model, method, config_path, **psp_param)

psp.run()



INFO:root:initial protein structure prediction using qfold-cc in QFold
INFO:root:CLASSICAL METROPOLIS: Time for 2 steps: 0.3307058811187744 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 3 steps: 0.46265339851379395 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 4 steps: 0.6084005832672119 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 5 steps: 0.7418649196624756 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 6 steps: 0.8906919956207275 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 7 steps: 1.020871877670288 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 8 steps: 1.1487469673156738 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 9 steps: 1.298386812210083 seconds
INFO:root:finish save tts_results_glycylglycine_3_GG+random_1000_qfold-cc.json


In [11]:
initialization = 'random'
method = 'qfold-qc'

model_name = "{}+{}".format(protein_name, initialization)

protein_model = protein_models.get_model(protein_name, method, model_name)



In [12]:
psp = ProteinStructurePrediction(protein_model, method, config_path, **psp_param)

psp.run()



INFO:root:initial protein structure prediction using qfold-qc in QFold
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.10872 (ms)
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.05507 (ms)
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.04888 (ms)
INFO:qiskit.transpiler.runningpassmanager:Pass: UnitarySynthesis - 0.00954 (ms)
INFO:qiskit.transpiler.runningpassmanager:Pass: UnrollCustomDefinitions - 28044.69824 (ms)
INFO:qiskit.transpiler.passes.basis.basis_translator:Begin BasisTranslator from source basis {('mcx', 4), ('mcu1', 9), ('mcx', 5), ('snapshot', 16), ('cx', 2), ('ccx', 3), ('cu3', 2), ('x', 1), ('h', 1)} to target basis {'cy', 'mcz', 'mcry', 'set_statevector', 'u2', 'ry', 'sdg', 'quantum_channel', 'kraus', 'x', 'cu', 'initialize', 'rz', 'mcx', 'mcy', 'save_state', 'mcphase', 'roerror', 'rzz', 'cx', 'cz', 'ryy', 'r', 'tdg', 'mcu3', 'mcrz', 'unitary', 'save_probs_ket', 'save_statevector', 'h', 'save_probs', 'u3', 'pauli', 'save_expval', 'cu2', 'csx', 'sx',

#### Step 4: Post Process

In this part, we will show how to visualize the results

# Hybrid Job Experiment

In [None]:
from braket.aws import AwsDevice
from braket.aws import AwsQuantumJob, AwsSession
from braket.jobs.config import InstanceConfig

import boto3

import os
import json
import time
import altair as alt
import pandas as pd
import numpy as np

# Step 1: Prepare parameters for batch evaluation

In this part, we set the parameters for batch evaluation

In [None]:
# parameters for experiments
experiment_name = "QfoldHybridJobs"
data_path = "protein-folding-data"
suffix_check = ["json"]
experiments_params =  {
    "version": "1",
    "params": [
        {"method": ["qfold-cc", "qfold-qc"]},
        {"initialization": ["minifold", "random"]},
        {"shots": [10000]},
        {"mode": ["local-simulator"]},
        {"device": [{"qc": "null", "cc": "ml.m5.large"},{"qc": "null", "cc": "ml.m5.4xlarge"}]}
    ]
}

hp = {}
hybrid_job_params = []
parse_params(experiments_params['params'], hp, hybrid_job_params)

print(f"parameters for experiments: \n {hybrid_job_params}")

parameters for experiments: 
 [{'method': 'qfold-cc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-cc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-cc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-cc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-qc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-qc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-qc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device':

In [None]:
# Upload dataset to S3
s3_path = upload_data(data_path)
print(f"upload data to s3 path: {s3_path}")

upload data to s3 path: s3://amazon-braket-us-east-1-002224604296/protein-folding-data


# Step 2: Prepare image for experiment

In this part, we use the following code to prepare the image for experiment. For the first run, 
please run build_and_push.sh to create the image. For future experiments, avoid running
build_and_push.sh unless you want to rebuild the image

In [None]:
!cp /home/ubuntu/psi4conda/bin/psi4 QfoldHybridJobs/psi4

In [None]:
account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.client('s3').meta.region_name
image_name = f"amazon-braket-{experiment_name.lower()}-jobs"
image_uri = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest"

print(f"the hybrid job image for {account_id} in region {region}: {image_uri}")

# For the first run, please use the following code to create the image for this application. For future experiments, comment
# the following code unless you want to rebuild the image
# !sh build_and_push.sh {image_name}

the hybrid job image for 002224604296 in region us-east-1: 002224604296.dkr.ecr.us-east-1.amazonaws.com/amazon-braket-qfoldhybridjobs-jobs:latest


In [None]:
hybrid_jobs_json = f"{experiment_name}-hybrid-jobs.json"
print(f"job info will be saved in {hybrid_jobs_json}")

job info will be saved in QfoldHybridJobs-hybrid-jobs.json


# Step 3: Launch Amazon Braket Hybrid Jobs for experiment

In this part, we use the following code to launch the same number of hybrid jobs as the sets of parameters for this experiments.
When the number of jobs exceeds 5 RPS, this thread will wait. The default setting of this experiment will take around **7 hours** to 
finish.

In [None]:
# Long runnning cell due to Burst rate of CreateJob requests < 5 RPS
# sudo apt-get install python-prctl at first
# https://stackoverflow.com/questions/34361035/python-thread-name-doesnt-show-up-on-ps-or-htop
from threading import Thread
import threading
import setproctitle

def launch_hybrid_jobs(hybrid_job_params=hybrid_job_params, hybrid_jobs_json=hybrid_jobs_json):
    setproctitle.setproctitle(threading.current_thread().name)
    # parse evaluation parameters and trigger hybrid jobs:
    jobs = []
    names = []

    job_name = experiment_name.lower()
    device_param_list = ["shots", "device"]

    for job_param in hybrid_job_params:
        
        algorithm_param_name = ""
        for k,v in job_param.items():
            if k not in device_param_list:
                algorithm_param_name = algorithm_param_name+f"-{v[0]}"
        algorithm_param_name=algorithm_param_name[1:]
        quantum_device = get_quantum_device(job_param['device']['qc'])
        classical_device = job_param['device']['cc']

        device_name = classical_device.replace(".","-")
        device_name = device_name.replace("x","")
        
        name = f"{algorithm_param_name}-{device_name}-" + str(int(time.time()))
        name = name.lower()
        # name = f"{experiment_name}-"+ str(int(time.time()))
        print(f"name is {name}")

        tmp_job = AwsQuantumJob.create(
            device=quantum_device,
            source_module=f"{experiment_name}",
            entry_point=f"{experiment_name}.{job_name}:main",
            job_name=name,
            hyperparameters=job_param,
            input_data=s3_path,
            instance_config=InstanceConfig(instanceType=classical_device),
            image_uri=image_uri,
            wait_until_complete=False,
        )
        
#         from braket.jobs.local import LocalQuantumJob
        
#         tmp_job = LocalQuantumJob.create(
#             device=quantum_device,
#             source_module=f"{experiment_name}",
#             entry_point=f"{experiment_name}.{job_name}:main",
#             hyperparameters=job_param,
#             input_data=s3_path,
#             image_uri=image_uri,
#         )   
        
        print(f"Finish create {experiment_name} with {name}")

        jobs.append(tmp_job)
        names.append(name)


        while not queue_check(jobs):
            time.sleep(5)
    jobs_arn = []

    for job in jobs:
        jobs_arn.append(job.arn)

    jobs_states = {
        "experiment_name": experiment_name,
        "hybrid-jobs-arn": jobs_arn,
        "names": names
    }
    
    
    # save hybrid job arn for further analysis
    json_object = json.dumps(jobs_states, indent=4)

    with open(hybrid_jobs_json, "w") as outfile:
        outfile.write(json_object)
        
    print(f"Finish launch all the hybrid jobs and save all the files")

# remove existing hybrid_jobs_json file
!rm {hybrid_jobs_json}

t = Thread(target=launch_hybrid_jobs, name="launch-hybrid-job", daemon=True).start()

# launch_hybrid_jobs()

rm: cannot remove 'QfoldHybridJobs-hybrid-jobs.json': No such file or directory
fail to get null: list index out of range, use sv1 instead
name is q-m-l-ml-m5-large-1679454022
Finish create QfoldHybridJobs with q-m-l-ml-m5-large-1679454022
There are 1 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-m-l-ml-m5-4large-1679454037
Finish create QfoldHybridJobs with q-m-l-ml-m5-4large-1679454037
There are 2 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-r-l-ml-m5-large-1679454044
Finish create QfoldHybridJobs with q-r-l-ml-m5-large-1679454044
There are 3 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-r-l-ml-m5-4large-1679454050
Finish create QfoldHybridJobs with q-r-l-ml-m5-4large-1679454050
There are 4 jobs in RUNNING or QUEUED status
There are 4 jobs in RUNNING or QUEUED status
There are 4 jobs in RUNNING or QUEUED status
There 

In [None]:
# run the following scripts to check the created threads
!ps -aux | grep launch-hybrid-job

ubuntu     52285  0.7  1.0 2466724 343908 ?      Ssl  02:42   0:16 launch-hybrid-job
ubuntu     53803  0.0  0.0   8756  3432 pts/21   Ss+  03:20   0:00 /bin/bash -c ps -aux | grep launch-hybrid-job
ubuntu     53808  0.0  0.0   8176   728 pts/21   S+   03:20   0:00 grep launch-hybrid-job


# Step 4: Jobs finish and visualize results

Please use the following code to check the status of hybrid jobs. The status of hybrid jobs can also be checked in the Amazon Braket console. Optionally, if the email if input when deploying the solution, emails will be sent at the same number of hybrid jobs once 
the status of jobs changes.

In [None]:
# run the following code to test whether all the jobs finish
results = []
if os.path.exists(hybrid_jobs_json):
    # recover hybrid jobs and show result
    jobs_states_load = None
    with open(hybrid_jobs_json, "r") as outfile:
        jobs_states_load = json.load(outfile)

    completed_jobs_arn = set()

    for job_name, job_arn in zip(jobs_states_load["names"], jobs_states_load["hybrid-jobs-arn"]):
        current_job = AwsQuantumJob(job_arn)
        print(f"the state of job {job_name} is : {current_job.state()}")
        if current_job.state() == 'COMPLETED':
            completed_jobs_arn.update({job_arn})

    whole_jobs_num = len(jobs_states_load["names"])

    if len(completed_jobs_arn) == whole_jobs_num:
        print(f"all jobs completed")
        for job_arn in completed_jobs_arn:
            current_job = AwsQuantumJob(job_arn)
            results.append(current_job.result())
        # display results
        results = display_results(results, experiments_params)
else:
    print(f"JSON file for job arns not generated! please wait for the thread(launch-hybrid-job) to finish")

the state of job q-m-l-ml-m5-large-1679454022 is : COMPLETED
the state of job q-m-l-ml-m5-4large-1679454037 is : COMPLETED
the state of job q-r-l-ml-m5-large-1679454044 is : COMPLETED
the state of job q-r-l-ml-m5-4large-1679454050 is : COMPLETED
the state of job q-m-l-ml-m5-large-1679454236 is : COMPLETED
the state of job q-m-l-ml-m5-4large-1679454268 is : COMPLETED
the state of job q-r-l-ml-m5-large-1679454314 is : COMPLETED
the state of job q-r-l-ml-m5-4large-1679454322 is : COMPLETED
all jobs completed


KeyError: 'hypermeter'

In [None]:
rename_result = {}
device_list = []
x_list = []
y_list = []
for k,vs in results.items():
    k = k.replace("\'","\"")
    dict_k = json.loads(k)
    device_name = None
    if dict_k['qc'] == 'null':
        device_name = dict_k['cc']
    else:
        device_name = dict_k['qc']
    for v in vs:
        device_list.append(device_name)
        x_list.append(v[0])
        y_list.append(v[1])
source = pd.DataFrame({
    "Sequence Length": np.array(x_list),
    "Time to Solution": np.array(y_list),
    "Device": np.array(device_list),
})

alt.Chart(source).mark_line(point = True).encode(
    x='Sequence Length',
    y='Time to Solution',
    color='Device',
).properties(
    title = f"{experiment_name} experiments",
    width = 700,
    height = 600,
).interactive()

AttributeError: 'list' object has no attribute 'items'

fail to get null: list index out of range, use sv1 instead
name is qfold-cc-minifold-local-simulator-ml-m5-large-1679452955


Exception in thread launch-hybrid-job:
Traceback (most recent call last):
  File "/opt/conda/envs/qfold-python3.7/lib/python3.7/threading.py", line 917, in _bootstrap_inner
    self.run()
  File "/opt/conda/envs/qfold-python3.7/lib/python3.7/threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-8-13347a113ad7>", line 44, in launch_hybrid_jobs
    wait_until_complete=False,
  File "/opt/conda/envs/qfold-python3.7/lib/python3.7/site-packages/braket/aws/aws_quantum_job.py", line 198, in create
    job_arn = aws_session.create_job(**create_job_kwargs)
  File "/opt/conda/envs/qfold-python3.7/lib/python3.7/site-packages/braket/aws/aws_session.py", line 244, in create_job
    response = self.braket_client.create_job(**boto3_kwargs)
  File "/opt/conda/envs/qfold-python3.7/lib/python3.7/site-packages/botocore/client.py", line 508, in _api_call
    return self._make_api_call(operation_name, kwargs)
  File "/opt/conda/envs/qfold-python3.7/lib/python3