In [1]:
!pip install -r requirements.txt
!rm ~/.keras/keras.json

Collecting absl-py==1.4.0 (from -r requirements.txt (line 1))
  Using cached absl_py-1.4.0-py3-none-any.whl (126 kB)
Collecting altair==4.2.2 (from -r requirements.txt (line 2))
  Using cached altair-4.2.2-py3-none-any.whl (813 kB)
Collecting amazon-braket-default-simulator==1.11.5.post0 (from -r requirements.txt (line 3))
  Using cached amazon_braket_default_simulator-1.11.5.post0-py3-none-any.whl (206 kB)
Collecting amazon-braket-schemas==1.14.1.post0 (from -r requirements.txt (line 4))
  Using cached amazon_braket_schemas-1.14.1.post0-py3-none-any.whl (109 kB)
Collecting amazon-braket-sdk==1.35.5 (from -r requirements.txt (line 5))
  Using cached amazon_braket_sdk-1.35.5-py3-none-any.whl (245 kB)
Collecting antlr4-python3-runtime==4.9.2 (from -r requirements.txt (line 6))
  Using cached antlr4-python3-runtime-4.9.2.tar.gz (117 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting anyio==3.6.2 (from -r requirements.txt (line 7))
  Using cached anyio-3.6.2-py3-none-any.w

# Algorithm Explained

In [5]:
import sys
sys.path.append("./hybridjobs/utility")

from hybridjobs.utility.ProteinParser import ProteinData
from hybridjobs.utility.ProteinModel import ProteinModel
from hybridjobs.utility.ProteinStructurePrediction import ProteinStructurePrediction
import time

timestamp = time.strftime("%Y%m%d-%H")

#### Step 1: Prepare Data

In this part, we have prepared the precalculated energies files in advance for doing protein folding experiments

In [6]:
# input: aminoacids
# output: energy files

protein_name = 'glycylglycine'
aminoacids = 'GG'
number_bits_to_discretize_protein_angles = 4
protein_id = 0

data_path='protein-folding-data'

#### Step 2: Build Model

In this part, we will show how to build model for qfold

In [7]:
# initial the ProteinFold object
init_param = {}
# method: qfold-cc stands for the classical metropolis method in QFold
# method: qfold-qc stands for the quantum metropolis method in QFold
method = ['qfold-cc', 'qfold-qc']

for mt in method:
    if mt == 'qfold-cc':
        init_param[mt] = {}
        init_param[mt]['params'] = ["initialization"]
    elif mt == 'qfold-qc':
        init_param[mt] = {}
        init_param[mt]['params'] = ["initialization"]

config_path = "hybridjobs/config/config.json"
protein_model = ProteinModel(data_path, method, config_path, **init_param)

INFO:root:Initial parameters for protein glycylglycine_3_GG using qfold-cc
INFO:root:Initial parameters for protein glycylglycine_3_GG using qfold-qc
INFO:root:Initial parameters for protein glycylglycine_4_GG using qfold-cc
INFO:root:Initial parameters for protein glycylglycine_4_GG using qfold-qc


In [8]:
# set the parameters for model
model_param = {}

method = 'qfold-cc'
model_param[method] = {}

# parameters
model_param[method]['initialization'] = ["minifold", "random"]

method = 'qfold-qc'
model_param[method] = {}

# parameters
model_param[method]['initialization'] = ["minifold", "random"]

protein_model.build_models(**model_param)


deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_3_GG: 256
deltas_dict length for glycylglycine_4_GG: 1024
deltas_dict length for glycylglycine_4_GG: 1024


0

In [9]:
# save the model
model_path = protein_model.save("latest")

print(f"You have built the protein folding models and saved them as protein_folding_latest.pickle")

INFO:root:finish save protein_folding_latest.pickle


You have built the protein folding models and saved them as protein_folding_latest.pickle


#### Step 3: Predict Protein Structure

In this part, we will show how to run models for predicting protein structure

In [10]:
protein_models = ProteinModel.load(model_path)

In [11]:
model_info = protein_models.describe_models()

INFO:root:debug describe
INFO:root:model name: glycylglycine_3_GG, method: qfold-cc
INFO:root:param: initialization, value {'random', 'minifold'}
INFO:root:model name: glycylglycine_3_GG, method: qfold-qc
INFO:root:param: initialization, value {'random', 'minifold'}
INFO:root:model name: glycylglycine_4_GG, method: qfold-cc
INFO:root:param: initialization, value {'random', 'minifold'}
INFO:root:model name: glycylglycine_4_GG, method: qfold-qc
INFO:root:param: initialization, value {'random', 'minifold'}


In [12]:
# get the model you want to optimize
protein_name = 'glycylglycine_3_GG'
initialization = 'random'
method = 'qfold-cc'

model_name = "{}+{}".format(protein_name, initialization)

protein_model = protein_models.get_model(protein_name, method, model_name)



In [13]:
data_path = 'data'
# psp_param stands for the parameters for predicting protein structure
psp_param = {}
psp_param["data_path"] = data_path
psp_param["mode"] = 'local-simulator'
psp_param["model_name"] = model_name
psp_param["model_path"] = model_path

psp = ProteinStructurePrediction(protein_model, method, config_path, **psp_param)

psp.run()



INFO:root:initial protein structure prediction using qfold-cc in QFold
INFO:root:CLASSICAL METROPOLIS: Time for 2 steps: 0.6116912364959717 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 3 steps: 0.6174771785736084 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 4 steps: 0.9570331573486328 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 5 steps: 1.0897619724273682 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 6 steps: 1.3576455116271973 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 7 steps: 1.714564323425293 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 8 steps: 1.8095722198486328 seconds
INFO:root:CLASSICAL METROPOLIS: Time for 9 steps: 1.7752666473388672 seconds
INFO:root:finish save tts_results_glycylglycine_3_GG+random_1000_qfold-cc.json


In [14]:
initialization = 'random'
method = 'qfold-qc'

model_name = "{}+{}".format(protein_name, initialization)

protein_model = protein_models.get_model(protein_name, method, model_name)



In [15]:
psp = ProteinStructurePrediction(protein_model, method, config_path, **psp_param)

psp.run()

INFO:root:initial protein structure prediction using qfold-qc in QFold
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.13137 (ms)
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.10180 (ms)
INFO:qiskit.compiler.assembler:Total Assembly Time - 0.10800 (ms)
INFO:qiskit.transpiler.runningpassmanager:Pass: UnitarySynthesis - 0.01025 (ms)
INFO:qiskit.transpiler.runningpassmanager:Pass: UnrollCustomDefinitions - 28142.65633 (ms)
INFO:qiskit.transpiler.passes.basis.basis_translator:Begin BasisTranslator from source basis {('cu3', 2), ('cx', 2), ('mcx', 4), ('mcx', 5), ('snapshot', 16), ('h', 1), ('ccx', 3), ('mcu1', 9), ('x', 1)} to target basis {'mcx', 'cu1', 'sx', 'h', 'mcsx', 'rzx', 'save_statevector', 'mcr', 'mcrz', 'reset', 'cp', 'sdg', 'u', 'initialize', 'cu', 'qerror_loc', 'p', 'swap', 'x', 'mcu', 'y', 'pauli', 'rz', 'kraus', 'id', 'quantum_channel', 'cy', 't', 'csx', 'rxx', 'cu3', 'rx', 'save_probs_ket', 'ccx', 'save_expval', 'save_amplitudes', 'mcswap', 'unitary', 'tdg',

In [16]:
# The time for final steps can be compared among these two methods
import json

with open("tts_results_glycylglycine_3_GG+random_1000_qfold-cc.json") as f:
    qfold_cc_results = json.load(f)

with open("tts_results_glycylglycine_3_GG+random_1000_qfold-qc.json") as f:
    qfold_qc_results = json.load(f)

qfold_cc_min_tts = qfold_cc_results['final_stats']['min_tts']['value']
qfold_qc_min_tts = qfold_qc_results['final_stats']['min_tts']['value']
print(f"The min tts for classical method is {qfold_cc_min_tts}, for quantum method is {qfold_qc_min_tts}")

The min tts for classical method is 58.98483476110978, for quantum method is 125.83909728469


# Hybrid Job Experiment

In [1]:
from braket.aws import AwsDevice
from braket.aws import AwsQuantumJob, AwsSession
from braket.jobs.config import InstanceConfig
from hybridjobs.utility.HybridJobHelpers import *

import boto3

import os
import json
import time
import altair as alt
import pandas as pd
import numpy as np

#### Step 1: Prepare parameters for batch evaluation

In this part, we set the parameters for batch evaluation

In [2]:
# parameters for experiments
experiment_name = "protein-folding-qrw"
data_path = 'protein-folding-data/precalculated_energies'
suffix_check = ["json"]
experiments_params =  {
    "version": "1",
    "params": [
        {"method": ["qfold-cc", "qfold-qc"]},
        {"initialization": ["minifold", "random"]},
        {"shots": [10000]},
        {"mode": ["local-simulator"]},
        {"device": [{"qc": "null", "cc": "ml.m5.large"},{"qc": "null", "cc": "ml.m5.4xlarge"}]}
    ]
}

hp = {}
hybrid_job_params = []
parse_params(experiments_params['params'], hp, hybrid_job_params)

print(f"parameters for experiments: \n {hybrid_job_params}")

parameters for experiments: 
 [{'method': 'qfold-cc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-cc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-cc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-cc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-qc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.large'}}, {'method': 'qfold-qc', 'initialization': 'minifold', 'shots': 10000, 'mode': 'local-simulator', 'device': {'qc': 'null', 'cc': 'ml.m5.4xlarge'}}, {'method': 'qfold-qc', 'initialization': 'random', 'shots': 10000, 'mode': 'local-simulator', 'device':

In [4]:
# Upload dataset to S3
s3_path = upload_data(data_path,suffix_check)
print(f"upload data to s3 path: {s3_path}")


upload data to s3 path: s3://amazon-braket-qc-53d2cb00/protein-folding-data


#### Step 2: Prepare image for experiment

In this part, we use the following code to prepare the image for experiment. For the first run, 
please run build_and_push.sh to create the image. For future experiments, avoid running
build_and_push.sh unless you want to rebuild the image

In [5]:
account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.client('s3').meta.region_name
image_name = f"amazon-braket-{experiment_name.lower()}-jobs"
image_uri = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest"

print(f"the hybrid job image for {account_id} in region {region}: {image_uri}")

# For the first run, please use the following code to create the image for this application. For future experiments, comment
# the following code unless you want to rebuild the image
!sh build_and_push.sh {image_name}

the hybrid job image for 685723555941 in region us-west-2: 685723555941.dkr.ecr.us-west-2.amazonaws.com/amazon-braket-protein-folding-qrw-jobs:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  102.7MB
Step 1/4 : FROM 292282985366.dkr.ecr.us-west-2.amazonaws.com/amazon-braket-base-jobs:1.0-cpu-py37-ubuntu18.04
 ---> c2e3f19ae3b6
Step 2/4 : RUN python3 -m pip install --upgrade pip
 ---> Using cache
 ---> d0a4b1afada6
Step 3/4 : RUN python3 -m pip install numpy==1.22     scipy==1.5.4     tensorflow==2.13.0     Keras==2.13.1     qiskit==0.34.2     qiskit-aer==0.10.3     qiskit-aqua==0.9.5     qiskit-ibmq-provider==0.18.3     qiskit-ignis==0.7.0     qiskit-terra==0.19.2     matplotlib==3.3.4     bokeh==2.3.3
 ---> Using cache
 ---> d1455da79ec0
Step 4/4 : COPY hybridjobs/psi4 /home/ubuntu/psi4conda/bin/psi4

In [6]:
hybrid_jobs_json = f"{experiment_name}-hybrid-jobs.json"
print(f"job info will be saved in {hybrid_jobs_json}")

job info will be saved in protein-folding-qrw-hybrid-jobs.json


#### Step 3: Launch Amazon Braket Hybrid Jobs for experiment

In this part, we use the following code to launch the same number of hybrid jobs as the sets of parameters for this experiments.
When the number of jobs exceeds 5 RPS, this thread will wait. The default setting of this experiment will take around **7 hours** to 
finish.

In [7]:
# Long runnning cell due to Burst rate of CreateJob requests < 5 RPS
# sudo apt-get install python-prctl at first
# https://stackoverflow.com/questions/34361035/python-thread-name-doesnt-show-up-on-ps-or-htop
from threading import Thread
import threading
import setproctitle

def launch_hybrid_jobs(hybrid_job_params=hybrid_job_params, hybrid_jobs_json=hybrid_jobs_json):
    setproctitle.setproctitle(threading.current_thread().name)
    # parse evaluation parameters and trigger hybrid jobs:
    jobs = []
    names = []

    job_name = f"{experiment_name}-job"
    device_param_list = ["shots", "device"]

    for job_param in hybrid_job_params:
        
        algorithm_param_name = ""
        for k,v in job_param.items():
            if k not in device_param_list:
                algorithm_param_name = algorithm_param_name+f"-{v[0]}"
        algorithm_param_name=algorithm_param_name[1:]
        quantum_device = get_quantum_device(job_param['device']['qc'])
        classical_device = job_param['device']['cc']

        device_name = classical_device.replace(".","-")
        device_name = device_name.replace("x","")
        
        name = f"{algorithm_param_name}-{device_name}-" + str(int(time.time()))
        name = name.lower()
        # name = f"{experiment_name}-"+ str(int(time.time()))
        print(f"name is {name}")

        tmp_job = AwsQuantumJob.create(
            device=quantum_device,
            source_module="hybridjobs",
            entry_point=f"hybridjobs.{job_name}:main",
            job_name=name,
            hyperparameters=job_param,
            input_data=s3_path,
            instance_config=InstanceConfig(instanceType=classical_device),
            image_uri=image_uri,
            wait_until_complete=False,
        )
        
#         from braket.jobs.local import LocalQuantumJob
        
#         tmp_job = LocalQuantumJob.create(
#             device=quantum_device,
#             source_module=f"{experiment_name}",
#             entry_point=f"{experiment_name}.{job_name}:main",
#             hyperparameters=job_param,
#             input_data=s3_path,
#             image_uri=image_uri,
#         )   
        
        print(f"Finish create {experiment_name} with {name}")

        jobs.append(tmp_job)
        names.append(name)


        while not queue_check(jobs):
            time.sleep(5)
    jobs_arn = []

    for job in jobs:
        jobs_arn.append(job.arn)

    jobs_states = {
        "experiment_name": experiment_name,
        "hybrid-jobs-arn": jobs_arn,
        "names": names
    }
    
    
    # save hybrid job arn for further analysis
    json_object = json.dumps(jobs_states, indent=4)

    with open(hybrid_jobs_json, "w") as outfile:
        outfile.write(json_object)
        
    print(f"Finish launch all the hybrid jobs and save all the files")


In [8]:

# remove existing hybrid_jobs_json file
!rm {hybrid_jobs_json}

t = Thread(target=launch_hybrid_jobs, name="launch-hybrid-job", daemon=True).start()

# launch_hybrid_jobs()

In [9]:
# run the following scripts to check the created threads
!ps -aux | grep launch-hybrid-job

ec2-user  2452  0.0  0.0 119860  2728 pts/0    Ss+  05:49   0:00 /bin/bash -c ps -aux | grep launch-hybrid-job
ec2-user  2454  0.0  0.0 119420   968 pts/0    S+   05:49   0:00 grep launch-hybrid-job
ec2-user 32267  3.2  1.3 1701344 214332 ?      Ssl  05:46   0:04 launch-hybrid-job
fail to get null: list index out of range, use sv1 instead
name is q-m-l-ml-m5-large-1697176146
Finish create protein-folding-qrw with q-m-l-ml-m5-large-1697176146
There are 1 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-m-l-ml-m5-4large-1697176163
Finish create protein-folding-qrw with q-m-l-ml-m5-4large-1697176163
There are 2 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-r-l-ml-m5-large-1697176171
Finish create protein-folding-qrw with q-r-l-ml-m5-large-1697176171
There are 3 jobs in RUNNING or QUEUED status
fail to get null: list index out of range, use sv1 instead
name is q-r-l-ml-m5-4large-169

#### Step 4: Jobs finish and visualize results

Please use the following code to check the status of hybrid jobs. The status of hybrid jobs can also be checked in the Amazon Braket console. Optionally, if the email if input when deploying the solution, emails will be sent at the same number of hybrid jobs once 
the status of jobs changes.

In [13]:
# run the following code to test whether all the jobs finish
results = []
if os.path.exists(hybrid_jobs_json):
    # recover hybrid jobs and show result
    jobs_states_load = None
    with open(hybrid_jobs_json, "r") as outfile:
        jobs_states_load = json.load(outfile)

    completed_jobs_arn = set()

    for job_name, job_arn in zip(jobs_states_load["names"], jobs_states_load["hybrid-jobs-arn"]):
        current_job = AwsQuantumJob(job_arn)
        print(f"the state of job {job_name} is : {current_job.state()}")
        if current_job.state() == 'COMPLETED':
            completed_jobs_arn.update({job_arn})

    whole_jobs_num = len(jobs_states_load["names"])

    if len(completed_jobs_arn) == whole_jobs_num:
        print(f"all jobs completed")
        for job_arn in completed_jobs_arn:
            current_job = AwsQuantumJob(job_arn)
            results.append(current_job.result())
            print(current_job.result())
        # display results
        results = display_results(results, experiments_params)
else:
    print(f"JSON file for job arns not generated! please wait for the thread(launch-hybrid-job) to finish")

the state of job q-m-l-ml-m5-large-1696571643 is : COMPLETED
the state of job q-m-l-ml-m5-4large-1696571658 is : COMPLETED
the state of job q-r-l-ml-m5-large-1696571667 is : COMPLETED
the state of job q-r-l-ml-m5-4large-1696571676 is : COMPLETED
the state of job q-m-l-ml-m5-large-1696571866 is : COMPLETED
the state of job q-m-l-ml-m5-4large-1696571875 is : COMPLETED
the state of job q-r-l-ml-m5-large-1696572011 is : COMPLETED
the state of job q-r-l-ml-m5-4large-1696572021 is : COMPLETED
all jobs completed
{'precalculated_energies': {'hypermeter': {'device': "{'qc': 'null', 'cc': 'ml.m5.large'}", 'initialization': 'random', 'method': 'qfold-cc', 'mode': 'local-simulator', 'shots': '10000'}, 'result': {'initial_step': 2, 'final_step': 10, 'tts': [80.2793512683781, 82.84507059622045, 79.80480389291786, 73.90645184334613, 68.35430135379889, 61.21228579850771, 65.008484602282, 61.70899525614872], 'initialization_stats': {'phis_precision': [80.29186537993999], 'psis_precision': [12.524701960

In [15]:
rename_result = {}
device_list = []
x_list = []
y_list = []
for k,vs in results.items():
    device_name = k
    for index, v in enumerate(vs):
        device_list.append(device_name)
        x_list.append(index)
        y_list.append(v)
source = pd.DataFrame({
    "Step": np.array(x_list),
    "Time to Solution": np.array(y_list),
    "Device": np.array(device_list),
})

alt.Chart(source).mark_line(point = True).encode(
    x='Step',
    y='Time to Solution',
    color='Device',
).properties(
    title = f"{experiment_name} experiments",
    width = 700,
    height = 600,
).interactive()

CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES
