In [None]:
!pip install biopandas
!pip install py3Dmol
!pip install ipywidgets

In [1]:
!rm qmu*

In [2]:
import networkx as nx
from utility.MoleculeParser import MoleculeData
from utility.QMUQUBO import QMUQUBO
from utility.AnnealerOptimizer import Annealer
from utility.ResultProcess import ResultParser
import time

timestamp = time.strftime("%Y%m%d-%H")
%matplotlib inline

2022-03-21 03:43:15,781 dwave.cloud INFO MainThread Log level for 'dwave.cloud' namespace set to 0


In [3]:
# initial parameters for experiment data
s3_bucket = f"amazon-braket-1a222675c751" # the name of the bucket
prefix = "annealer-experiment" # the name of the folder in the bucket

raw_path = './molecule-data/Aspirin.mol2' # the mol2 file for this experiment

mol_data = MoleculeData(raw_path, 'qmu')

data_path = mol_data.save("latest")

num_rotation_bond = mol_data.bond_graph.rb_num
print(f"You have loaded the raw molecule data and saved as {data_path}. \n\
This molecule has {num_rotation_bond} rotable bond")

INFO:root:parse mol2 file!
INFO:root:finish save qmu_Aspirin_data_latest.pickle


You have loaded the raw molecule data and saved as ./qmu_Aspirin_data_latest.pickle. 
This molecule has 4 rotable bond


In [4]:
# initial the QMUQUBO object
init_param = {}
method = ['pre-calc']

for mt in method:
    if mt == 'pre-calc':
        init_param[mt] = {}
        init_param[mt]['param'] = ['M', 'D', 'A', 'hubo_qubo_val']
    
qmu_qubo = QMUQUBO(mol_data, method, **init_param)

INFO:root:initial pre-calculate for constructing molecule QUBO


In [52]:
# set the parameters for model
model_param = {}
# parameters
num_rotation_bond = mol_data.bond_graph.rb_num

method = 'pre-calc'
model_param[method] = {}
# model_param[method]['M'] = range(1, num_rotation_bond+1)
model_param[method]['M'] = [2]
model_param[method]['D'] = [8]
model_param[method]['A'] = [300,600,900]
model_param[method]['hubo_qubo_val'] = [200]

qmu_qubo.build_model(**model_param)

INFO:root:Construct model for M:2,D:2,A:300,hubo_qubo_val:200 0.0001537044843037923 min
INFO:root:Construct model for M:2,D:2,A:600,hubo_qubo_val:200 0.00011076529820760091 min
INFO:root:Construct model for M:2,D:2,A:900,hubo_qubo_val:200 0.0001319567362467448 min
INFO:root:Construct model for M:2,D:4,A:300,hubo_qubo_val:200 0.0010620554288228352 min
INFO:root:Construct model for M:2,D:4,A:600,hubo_qubo_val:200 0.0005726734797159831 min
INFO:root:Construct model for M:2,D:4,A:900,hubo_qubo_val:200 0.0005127747853597005 min
INFO:root:Construct model for M:2,D:8,A:300,hubo_qubo_val:200 0.0013926148414611817 min
INFO:root:Construct model for M:2,D:8,A:600,hubo_qubo_val:200 0.0013730486234029135 min
INFO:root:Construct model for M:2,D:8,A:900,hubo_qubo_val:200 0.00160750945409139 min
INFO:root:Construct model for M:3,D:2,A:300,hubo_qubo_val:200 0.000499876340230306 min
INFO:root:Construct model for M:3,D:2,A:600,hubo_qubo_val:200 0.00040566126505533855 min
INFO:root:Construct model for M:3

0

In [53]:
qmu_qubo.model_info[method]

{'M': {2, 3}, 'D': {2, 4, 8}, 'A': {300, 600, 900}, 'hubo_qubo_val': {200}}

In [54]:
# save the model
model_path = qmu_qubo.save("latest")

print(f"You have built the QUBO model and saved it as {model_path}")

INFO:root:finish save qmu_Raloxifene_model_latest.pickle


You have built the QUBO model and saved it as ./qmu_Raloxifene_model_latest.pickle


In [55]:
qmu_qubo_optimize = QMUQUBO.load(model_path)

In [56]:
# get the model you want to optimize
M = 3
D = 8
A = 900
hubo_qubo_val = 200
model_name = "{}_{}_{}_{}".format(M, D, A, hubo_qubo_val)
method = "pre-calc"

qubo_model = qmu_qubo_optimize.get_model(method, model_name)

In [57]:
qmu_qubo_optimize.describe_model()

INFO:root:method: pre-calc
INFO:root:The model_name should be {M}_{D}_{A}_{hubo_qubo_val}
INFO:root:param: M, value {2, 3}
INFO:root:param: D, value {8, 2, 4}
INFO:root:param: A, value {600, 900, 300}
INFO:root:param: hubo_qubo_val, value {200}


{'pre-calc': {'M': {2, 3},
  'D': {2, 4, 8},
  'A': {300, 600, 900},
  'hubo_qubo_val': {200}}}

In [58]:
method = 'dwave-qa'

optimizer_param = {}
optimizer_param['shots'] = 10000
optimizer_param['bucket'] = s3_bucket # the name of the bucket
optimizer_param['prefix'] = prefix # the name of the folder in the bucket
optimizer_param['device'] = "arn:aws:braket:::device/qpu/d-wave/Advantage_system4"
optimizer_param["embed_method"] = "default"

qa_optimizer = Annealer(qubo_model, method, **optimizer_param)

INFO:root:use quantum annealer arn:aws:braket:::device/qpu/d-wave/Advantage_system4 


In [59]:
# not create annealing task, only embedding logic
qa_optimizer.embed()
# create annealing task
qa_optimize_result = qa_optimizer.fit()

INFO:root:fit() ...
INFO:root:finish save /tmp/qa_result.pickle
INFO:root:_upload_result_json, bucket=amazon-braket-1a222675c751, key=annealer-experiment/641d37df-08d8-437f-875f-9baafb530b6c/qa_result.pickle
INFO:root:dwave-qa save to s3 - 641d37df-08d8-437f-875f-9baafb530b6c: None


In [60]:
qa_task_id = qa_optimizer.get_task_id()
print(f"task id is {qa_task_id}")

task id is 641d37df-08d8-437f-875f-9baafb530b6c


# develop post-process

In [80]:
import networkx as nx
from utility.MoleculeParser import MoleculeData
from utility.QMUQUBO import QMUQUBO
from utility.AnnealerOptimizer import Annealer
from utility.ResultProcess import ResultParser
from utility.MolGeoCalc import update_pts_distance
import time
import numpy as np

timestamp = time.strftime("%Y%m%d-%H")
%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [89]:
s3_bucket = f"amazon-braket-1a222675c751" # the name of the bucket
prefix = "annealer-experiment" # the name of the folder in the bucket
raw_path = './molecule-data/Focalin.mol2' # the mol2 file for this experiment
data_path = './qmu_Focalin_data_latest.pickle'
qa_task_id = 'db9281da-d774-402f-9df4-8c12313493a4'

In [91]:
method = "dwave-sa"
sa_param = {}
sa_param["raw_path"] = raw_path
sa_param["data_path"] = data_path

sa_process_result = ResultParser(method, **sa_param)
# print(f"{method} result is {sa_process_result.get_all_result()}")

local_time, _ , _, _= sa_process_result.get_time()

print(f"time for {method}: \n \
    local time is {local_time}")

sa_atom_pos_data = sa_process_result.generate_optimize_pts()
# save unfold file for visualization and parameters for experiment: 1. volume value 2. relative improvement
sa_process_result.save_mol_file(f"{timestamp}")

INFO:root:_load_raw_result
INFO:root:load simulated annealer raw result
INFO:root:MoleculeData.load()
INFO:root:init mol data for final position
INFO:root:init mol data for raw position
INFO:root:_parse_model_info
INFO:root:_init_parameters
INFO:root:parse simulated annealer result
INFO:root:sa only has local_time!
INFO:root:generate_optimize_pts()
INFO:root:var_dict_raw {'5': ['1']} var_dict_list [{'5': '1'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_1'}
INFO:root:tor list {'X_5_1'}
INFO:root:optimize_gain 1.0
INFO:root:var_dict_raw {'5': ['2']} var_dict_list [{'5': '2'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_2'}
INFO:root:tor list {'X_5_2'}
INFO:root:optimize_gain 0.9982652574327464
INFO:root:var_dict_raw {'5': ['8']} var_dict_list [{'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:optimize_gain 0.9737821701261852
INFO:root:var_dict_raw {'5': ['3']} var_dict_list [{'5': '3'}]
INFO:root:_init_parame

time for dwave-sa: 
     local time is 34.348660707473755


['./molecule-data/Focalin_dwave-sa_20220322-07.mol2',
 './molecule-data/Focalin_dwave-sa_20220322-07.json']

In [92]:
sa_process_result.parameters

{'volume': {'optimize': 4.377073266484346,
  'initial': 4.377073266484346,
  'gain': 1.0,
  'unfolding_results': ['X_5_1'],
  'annealing_results': ['X_5_1'],
  'optimize_info': {'optimize_state': False, 'result_rank': 5}}}

In [84]:
pddf_sample_result = sa_process_result.raw_result["response"].aggregate().to_pandas_dataframe()
pddf_head_sample = pddf_sample_result.sort_values(by=['energy']).head(100)
pddf_head_sample

Unnamed: 0,x_5_1,x_5_2,x_5_3,x_5_4,x_5_5,x_5_6,x_5_7,x_5_8,energy,num_occurrences
1,1,0,0,0,0,0,0,0,-4.377073,324
7,0,1,0,0,0,0,0,0,-4.36948,3
2,0,0,0,0,0,0,0,1,-4.262316,350
6,0,0,1,0,0,0,0,0,-4.243502,9
0,0,0,0,0,0,0,1,0,-4.086457,173
3,0,0,0,1,0,0,0,0,-4.066816,22
5,0,0,0,0,0,1,0,0,-3.951459,70
4,0,0,0,0,1,0,0,0,-3.943051,49


In [85]:
pddf_sample_result = qa_process_result.raw_result["response"].aggregate().to_pandas_dataframe()
pddf_head_sample = pddf_sample_result.sort_values(by=['energy']).head(100)
pddf_head_sample

Unnamed: 0,x_5_1,x_5_2,x_5_3,x_5_4,x_5_5,x_5_6,x_5_7,x_5_8,chain_break_fraction,energy,num_occurrences
0,1,0,0,0,0,0,0,0,0.0,-4.377073,83
1,0,1,0,0,0,0,0,0,0.0,-4.36948,112
2,0,0,0,0,0,0,0,1,0.0,-4.262316,107
3,0,0,1,0,0,0,0,0,0.0,-4.243502,86
4,0,0,0,0,0,0,1,0,0.0,-4.086457,111
5,0,0,0,1,0,0,0,0,0.0,-4.066816,115
6,0,0,0,0,0,1,0,0,0.0,-3.951459,127
7,0,0,0,0,1,0,0,0,0.0,-3.943051,102
8,0,0,0,0,0,0,0,0,0.0,0.0,82
9,1,1,0,0,0,0,0,0,0.0,591.253447,1


In [93]:
method = "dwave-qa"
qa_param = {}
qa_param["bucket"] = s3_bucket
qa_param["prefix"] = prefix
qa_param["task_id"] = qa_task_id
qa_param["raw_path"] = raw_path
qa_param["data_path"] = data_path

qa_process_result = ResultParser(method, **qa_param)
# print(f"{method} result is {qa_process_result.get_all_result()}")

local_time, task_time, total_time, access_time = qa_process_result.get_time()

print(f"time for {method}: \n \
    local time is {local_time},\n \
    task time is {task_time}, \n \
    qpu total time is {total_time}, \n \
    qpu access time is {access_time}")

INFO:root:_load_raw_result
INFO:root:load quantum annealer raw result
INFO:root:_read_result_obj
INFO:root:_read_result_obj: annealer-experiment/db9281da-d774-402f-9df4-8c12313493a4/qa_result.pickle
INFO:root:MoleculeData.load()
INFO:root:init mol data for final position
INFO:root:init mol data for raw position
INFO:root:_parse_model_info
INFO:root:_init_parameters
INFO:root:parse quantum annealer result
INFO:root:_read_result_obj
INFO:root:_read_result_obj: annealer-experiment/db9281da-d774-402f-9df4-8c12313493a4/results.json


time for dwave-qa: 
     local time is 5.065170526504517,
     task time is 2.277, 
     qpu total time is 0.143333, 
     qpu access time is 0.11606699999999999


In [94]:
qa_atom_pos_data = qa_process_result.generate_optimize_pts()
# save unfold file for visualization and parameters for experiment: 1. volume value 2. relative improvement
qa_process_result.save_mol_file(f"{timestamp}")

INFO:root:generate_optimize_pts()
INFO:root:var_dict_raw {'5': ['1']} var_dict_list [{'5': '1'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_1'}
INFO:root:tor list {'X_5_1'}
INFO:root:optimize_gain 1.0
INFO:root:var_dict_raw {'5': ['2']} var_dict_list [{'5': '2'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_2'}
INFO:root:tor list {'X_5_2'}
INFO:root:optimize_gain 0.9982652574327464
INFO:root:var_dict_raw {'5': ['8']} var_dict_list [{'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:optimize_gain 0.9737821701261852
INFO:root:var_dict_raw {'5': ['3']} var_dict_list [{'5': '3'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3'}
INFO:root:tor list {'X_5_3'}
INFO:root:optimize_gain 0.9694839539451261
INFO:root:var_dict_raw {'5': ['7']} var_dict_list [{'5': '7'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_7'}
INFO:root:tor list {'X_5_7'}
INFO:root:optimize_gain 0.9336049166760635
INFO:root:var_di

INFO:root:pass current duplicate var
INFO:root:var_dict_raw {'5': ['3', '4']} var_dict_list [{'5': '3'}, {'5': '4'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 'x_5_4'}
INFO:root:tor list {'X_5_3'}
INFO:root:pass current duplicate var
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 'x_5_4'}
INFO:root:tor list {'X_5_4'}
INFO:root:pass current duplicate var
INFO:root:var_dict_raw {'5': ['6', '8']} var_dict_list [{'5': '6'}, {'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_6', 'x_5_8'}
INFO:root:tor list {'X_5_6'}
INFO:root:pass current duplicate var
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_6', 'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:pass current duplicate var
INFO:root:var_dict_raw {'5': ['3', '6']} var_dict_list [{'5': '3'}, {'5': '6'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 'x_5_6'}
INFO:root:tor list {'X_5_3'}
INFO:root:pass current duplicate var
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 

['./molecule-data/Focalin_dwave-qa_20220322-07.mol2',
 './molecule-data/Focalin_dwave-qa_20220322-07.json']

In [95]:
qa_process_result.parameters

{'volume': {'optimize': 0,
  'initial': 0,
  'gain': 1.0,
  'unfolding_results': ['X_5_1'],
  'annealing_results': ['X_5_1'],
  'optimize_info': {'optimize_state': False, 'result_rank': 35}}}

In [84]:
temp_pddf_head_sample = qa_process_result.raw_result["response"].aggregate().to_pandas_dataframe().sort_values(by=['energy']).head(100)
temp_pddf_head_sample.head()

Unnamed: 0,x_11_1,x_11_2,x_11_3,x_11_4,x_11_5,x_11_6,x_11_7,x_11_8,x_1_1,x_1_1*x_2_1,...,x_2_8,x_2_8*x_1_2,x_2_8*x_1_3,x_2_8*x_1_4,x_2_8*x_1_5,x_2_8*x_1_7,x_2_8*x_1_8,chain_break_fraction,energy,num_occurrences
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.147727,-3.040442,8
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.147727,0.0,2
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.147727,191.775623,2
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0.136364,191.777901,8
12,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.147727,191.79956,2


In [138]:
qa_process_result.mol_data.bond_graph.sort_ris_data['9'].keys()

dict_keys(['2+3', '2+20', '24+25', '23+24', '25+26', '26+27', '4+5', '15+16', '8+9', '2+3,2+20', '2+20,23+24', '24+25,23+24', '24+25,25+26', '25+26,26+27', '2+3,4+5', '2+3,15+16', '4+5,8+9', '4+5,15+16', '2+3,2+20,23+24', '2+20,24+25,23+24', '24+25,23+24,25+26', '24+25,25+26,26+27', '2+3,2+20,4+5', '2+3,2+20,15+16', '2+3,4+5,8+9', '4+5,8+9,15+16', '2+3,2+20,24+25,23+24', '2+20,24+25,23+24,25+26', '24+25,23+24,25+26,26+27', '2+3,2+20,23+24,4+5', '2+3,2+20,23+24,15+16', '2+3,2+20,4+5,8+9', '2+3,2+20,24+25,23+24,25+26', '2+20,24+25,23+24,25+26,26+27', '2+3,2+20,24+25,23+24,4+5', '2+3,2+20,24+25,23+24,15+16', '2+3,2+20,23+24,4+5,8+9', '2+3,2+20,24+25,23+24,25+26,26+27', '2+3,2+20,24+25,23+24,25+26,4+5', '2+3,2+20,24+25,23+24,25+26,15+16', '2+3,2+20,24+25,23+24,4+5,8+9', '2+3,2+20,24+25,23+24,25+26,26+27,4+5', '2+3,2+20,24+25,23+24,25+26,26+27,15+16', '2+3,2+20,24+25,23+24,25+26,4+5,8+9', '2+3,2+20,24+25,23+24,25+26,26+27,4+5,8+9'])

In [120]:
# qa_process_result.mol_data.bond_graph.non_ar_bonds
import math

temp_rb_list = []
for rot in qa_process_result.mol_data.bond_graph.non_ar_bonds:
    if math.isclose(qa_process_result.mol_data.bond_graph.bc[rot[0]], 0) or math.isclose(qa_process_result.mol_data.bond_graph.bc[rot[1]], 0):
        continue
    temp_rb_list.append(rot)
    
temp_rb_list

[('2', '3'),
 ('2', '20'),
 ('3', '19'),
 ('3', '4'),
 ('4', '5'),
 ('4', '12'),
 ('8', '9'),
 ('12', '13'),
 ('15', '16'),
 ('23', '24'),
 ('24', '25'),
 ('25', '26'),
 ('26', '27'),
 ('27', '33'),
 ('27', '29'),
 ('29', '30'),
 ('30', '31'),
 ('31', '32'),
 ('32', '33')]

In [121]:
qa_process_result.mol_data.bond_graph.rb_list

[('2', '3'),
 ('2', '20'),
 ('4', '5'),
 ('8', '9'),
 ('15', '16'),
 ('23', '24'),
 ('24', '25'),
 ('25', '26'),
 ('26', '27')]

In [126]:
for index, row in qa_process_result.mol_data.bond_graph.df_bonds.iterrows():
    if row['atom2'] == '19':
        print(row)

atom1         3
atom2        19
bond_type     1
Name: 4, dtype: object
atom1        13
atom2        19
bond_type    ar
Name: 21, dtype: object
atom1        18
atom2        19
bond_type    ar
Name: 30, dtype: object


In [33]:
pddf_sample_result = qa_process_result.raw_result["response"].aggregate(
).to_pandas_dataframe()

pddf_head_sample = pddf_sample_result.sort_values(by=['energy']).head(100)

pddf_head_sample.head()

Unnamed: 0,x_1_1,x_1_1*x_3_1,x_1_2,x_1_2*x_3_1,x_1_3,x_1_3*x_3_1,x_1_4,x_1_4*x_3_1,x_1_5,x_1_5*x_3_1,...,x_3_7*x_1_4,x_3_8,x_3_8*x_1_1,x_3_8*x_1_2,x_3_8*x_1_3,x_3_8*x_1_4,x_3_8*x_1_8,chain_break_fraction,energy,num_occurrences
1240,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0.136364,4325.203086,1
5169,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0.170455,4384.972415,1
105,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.125,4596.364205,1
280,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.193182,4791.513194,1
5030,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0.170455,4974.964977,1


In [35]:
for index, row in pddf_head_sample.iterrows():
    best_config = row.filter(items=qa_process_result.valid_var_name)
    chosen_var = set(best_config[best_config == 1].index.tolist())
    print(chosen_var)

{'x_2_2', 'x_1_8', 'x_3_7'}
{'x_3_6', 'x_1_3'}
{'x_3_5'}
set()
{'x_3_4', 'x_1_3'}
{'x_2_8', 'x_3_3'}
{'x_1_7', 'x_3_2'}
{'x_1_3', 'x_3_2'}
{'x_1_3', 'x_3_2'}
{'x_3_5', 'x_1_3'}
{'x_3_7'}
{'x_1_7', 'x_2_8', 'x_3_2'}
{'x_3_3', 'x_1_4', 'x_2_7'}
{'x_3_8', 'x_2_7'}
{'x_2_8'}
{'x_3_7'}
{'x_3_8', 'x_2_2', 'x_1_3'}
{'x_1_7', 'x_3_6', 'x_2_7'}
{'x_1_7', 'x_3_6', 'x_2_4'}
{'x_1_3'}
{'x_3_8'}
{'x_3_3', 'x_1_3'}
{'x_1_7', 'x_3_8'}
{'x_3_4'}
{'x_1_5', 'x_3_8', 'x_2_2', 'x_1_8'}
{'x_1_6', 'x_2_8', 'x_3_1'}
{'x_1_8', 'x_3_5', 'x_2_4', 'x_1_3'}
{'x_3_5', 'x_2_2', 'x_1_3'}
{'x_1_7', 'x_2_2', 'x_3_2'}
{'x_2_7', 'x_3_7', 'x_1_7'}
{'x_3_5', 'x_2_5', 'x_1_8'}
{'x_1_6', 'x_3_5', 'x_2_6'}
{'x_1_7', 'x_3_2'}
{'x_1_8', 'x_3_4'}
{'x_3_5', 'x_3_8', 'x_1_3'}
{'x_1_3'}
{'x_1_7', 'x_2_2', 'x_3_1'}
{'x_2_2', 'x_3_1'}
{'x_2_5', 'x_3_3', 'x_1_3'}
{'x_3_8', 'x_2_8', 'x_3_3'}
{'x_3_8', 'x_2_1'}
{'x_2_5', 'x_1_3'}
{'x_3_5', 'x_2_2'}
{'x_2_5', 'x_3_1'}
{'x_2_4'}
{'x_1_7', 'x_3_7'}
{'x_1_7', 'x_3_2'}
{'x_1_3', 'x_3_1'}
{'

In [1]:
from utility.MoleculeParser import MoleculeData
from utility.QMUQUBO import QMUQUBO
from utility.AnnealerOptimizer import Annealer
from utility.ResultProcess import ResultParser
import time

timestamp = time.strftime("%Y%m%d-%H")

2022-03-22 06:57:59,181 dwave.cloud INFO MainThread Log level for 'dwave.cloud' namespace set to 0


# Step 1: Prepare Data

In this part, we load the raw molecule data for experiment.
The [117 ligand](http://www.rcsb.org/ligand/117) was 
put in the repository. We assign the relative 
path to **raw_path**.
The **s3_bucket** and **prefix** are used to store the 
optimization results. We can use the one created with the 
cloudformation for convenience.

In [43]:
# initial parameters for experiment data
s3_bucket = f"amazon-braket-1a222675c751" # the name of the bucket
prefix = "annealer-experiment" # the name of the folder in the bucket

raw_path = './molecule-data/Focalin.mol2' # the mol2 file for this experiment

In [44]:
mol_data = MoleculeData(raw_path, 'qmu')

data_path = mol_data.save("latest")

num_rotation_bond = mol_data.bond_graph.rb_num
print(f"You have loaded the raw molecule data and saved as {data_path}. \n\
This molecule has {num_rotation_bond} rotable bond")

INFO:root:parse mol2 file!
INFO:root:finish save qmu_Focalin_data_latest.pickle


You have loaded the raw molecule data and saved as ./qmu_Focalin_data_latest.pickle. 
This molecule has 11 rotable bond


After running this block, the processed data 
will be saved as **qmu_117_ideal_data_latest.pickle**
and **data_path** will be updated. We can see that this 
molecule has 23 rotatable bonds.

# Step 2: Build Model

In this part, we build the Quadratic Unconstrained 
Binary Optimization (QUBO) model for molecular unfolding.

First, we set the following parameters and 
initialize the QMUQUBO object. 

<center>

| Parameter | Description | Value |
|--- |--- |--- |
|A | penalty scalar |300|
|hubo_qubo_val | energy penalty of make_quadratic() |200|
|M | number of torsions for molecular unfolding| [1, max number of rotatable bonds] |
|D| angle precision of rotation| 8|
|method| the method of building model| 'pre-calc': calculate the score in advance|

 </center>

We use the 'pre-calc' method 
to build the model. This molecule has 23 rotatable bonds and 
we only test 2 of them, so we set the **M** to 2. And we want 
the angle to become $45^o$, so we set the **D** to 8 
(i.e., $8=360^o/45^o$). The **A** and **hubo_qubo_val** are 
test from experiments. 

In [45]:
# initial the QMUQUBO object
init_param = {}
method = ['pre-calc']

for mt in method:
    if mt == 'pre-calc':
        init_param[mt] = {}
        init_param[mt]['param'] = ['M', 'D', 'A', 'hubo_qubo_val']
    
qmu_qubo = QMUQUBO(mol_data, method, **init_param)

INFO:root:initial pre-calculate for constructing molecule QUBO


In [46]:
# set the parameters for model
model_param = {}
# parameters
num_rotation_bond = mol_data.bond_graph.rb_num

method = 'pre-calc'
model_param[method] = {}
# model_param[method]['M'] = range(1, num_rotation_bond+1)
model_param[method]['M'] = [1,2,3]
model_param[method]['D'] = [8]
model_param[method]['A'] = [300]
model_param[method]['hubo_qubo_val'] = [200]

qmu_qubo.build_model(**model_param)

INFO:root:Construct model for M:1,D:8,A:300,hubo_qubo_val:200 7.691383361816407e-05 min
INFO:root:Construct model for M:2,D:8,A:300,hubo_qubo_val:200 0.0010442256927490235 min
INFO:root:Construct model for M:3,D:8,A:300,hubo_qubo_val:200 0.0028060038884480795 min


0

We can use the following method to check the properties of 
model. This way, we can build many models conveniently. 
After that, we save the model and update the value of 
**model_path**.

In [47]:
# describe the model parameters
model_info = qmu_qubo.describe_model()

INFO:root:method: pre-calc
INFO:root:The model_name should be {M}_{D}_{A}_{hubo_qubo_val}
INFO:root:param: M, value {1, 2, 3}
INFO:root:param: D, value {8}
INFO:root:param: A, value {300}
INFO:root:param: hubo_qubo_val, value {200}


In [48]:
# save the model
model_path = qmu_qubo.save("latest")

print(f"You have built the QUBO model and saved it as {model_path}")

INFO:root:finish save qmu_Focalin_model_latest.pickle


You have built the QUBO model and saved it as ./qmu_Focalin_model_latest.pickle


# Step 3: Optimize Configuration

In this part, we use SA and QA to find the optimized configuration of molecular unfolding.
At first, we load the model file using **QMUQUBO** object

In [49]:
qmu_qubo_optimize = QMUQUBO.load(model_path)

In [50]:
model_info = qmu_qubo_optimize.describe_model()

INFO:root:method: pre-calc
INFO:root:The model_name should be {M}_{D}_{A}_{hubo_qubo_val}
INFO:root:param: M, value {1, 2, 3}
INFO:root:param: D, value {8}
INFO:root:param: A, value {300}
INFO:root:param: hubo_qubo_val, value {200}


We can see the parameters of this model, with M equaling 2, D equaling 8, 
A equaling 300 and hubo_qubo_val equaling 200. 
Actually, we can contain multiple models in this file just 
by giving multiple values for one parameter when creating models.

Actually, we can contain multiple models in this file just 
by giving multiple values for one parameter when creating models.
Then, we need use **model_name** to get the model for experiments.

In [51]:
# get the model you want to optimize
M = 1
D = 8
A = 300
hubo_qubo_val = 200
model_name = "{}_{}_{}_{}".format(M, D, A, hubo_qubo_val)
method = "pre-calc"

qubo_model = qmu_qubo_optimize.get_model(method, model_name)

We can see that we want to carry out experiment with the QUBO model with M equaling 2.
 After that, we set the parameters for optimization.

| Parameter | Description | Value |
|--- |--- |--- |
|method | annealing method for QUBO problem |'dwave-sa': use the simulated annealer in ocean toolkit<br> 'dwave-qa': use the quantum annealer|
|shots| number of reads, refer to [dwave-sa](https://docs.ocean.dwavesys.com/projects/neal/en/latest/reference/generated/neal.sampler.SimulatedAnnealingSampler.sample.html#neal.sampler.SimulatedAnnealingSampler.sample) and [dwave-qa](https://amazon-braket-ocean-plugin-python.readthedocs.io/en/latest/_apidoc/braket.ocean_plugin.braket_sampler.html) for details |1 to 10,000|
|bucket | the s3 bucket to store your results | - |
|prefix | the name of the folder in your s3 bucket | - |
|device | the arn name to run your quantum annealing| 'arn:aws:braket:::device/qpu/d-wave/Advantage_system4' <br> 'arn:aws:braket:::device/qpu/d-wave/DW_2000Q_6'|

Then, we can run the SA for this problem:

In [52]:
method = 'dwave-sa'

optimizer_param = {}
optimizer_param['shots'] = 1000

sa_optimizer = Annealer(qubo_model, method, **optimizer_param)

INFO:root:use simulated annealer from dimod


In [53]:
sa_optimize_result = sa_optimizer.fit()

INFO:root:fit() ...
INFO:root:dwave-sa save to local
INFO:root:finish save sa_result.pickle


We can tell that we set the number of shots for SA to 1000. 
The result is saved as the local file **./sa_result.pickle.**
Alternatively, we can use QA to solve this problem:

In [54]:
method = 'dwave-qa'

optimizer_param = {}
optimizer_param['shots'] = 1000
optimizer_param['bucket'] = s3_bucket # the name of the bucket
optimizer_param['prefix'] = prefix # the name of the folder in the bucket
optimizer_param['device'] = "arn:aws:braket:::device/qpu/d-wave/Advantage_system4"
optimizer_param["embed_method"] = "default"

qa_optimizer = Annealer(qubo_model, method, **optimizer_param)

INFO:root:use quantum annealer arn:aws:braket:::device/qpu/d-wave/Advantage_system4 


In this QA, we set the number of shots to 1000 and 
choose the 
[Advantage_System4.1](https://docs.dwavesys.com/docs/latest/doc_physical_properties.html)
as the QPU. In addition, the results are saved to your bucket automatically and you 
can get the task id for future process. 

In [55]:
# not create annealing task, only embedding logic
qa_optimizer.embed()
# create annealing task
qa_optimize_result = qa_optimizer.fit()

INFO:root:fit() ...
INFO:root:finish save /tmp/qa_result.pickle
INFO:root:_upload_result_json, bucket=amazon-braket-1a222675c751, key=annealer-experiment/db9281da-d774-402f-9df4-8c12313493a4/qa_result.pickle
INFO:root:dwave-qa save to s3 - db9281da-d774-402f-9df4-8c12313493a4: None


In [56]:
qa_task_id = qa_optimizer.get_task_id()
print(f"task id is {qa_task_id}")

task id is db9281da-d774-402f-9df4-8c12313493a4


Finally, we can compare the execution time between SA and QA :

In [57]:
print(f"dwave-sa run time {sa_optimize_result['time']}")
print(f"dwave-qa run time {qa_optimize_result['time']}")

dwave-sa run time 34.348660707473755
dwave-qa run time 5.065170526504517


We can tell from the image that SA needs 174.2 seconds 
and QA needs 7.7 seconds to find 
solution.

We sometimes get the best result that occurs only once.

![OneTimeQA](../../../docs/en/images/one-time-qa.png)

This does not always indicate an error. It is actually the characteristic of the problem or how the problem 
is formulated. Because we have different linear and quadratic terms that vary by many orders of magnitude. If we 
set change value of A to some smaller number, like 10 or 100, more occurrences of the best answer will be observed. 
However, these answers usually break the constraints. For more information about this phenomenon, please refer to this 
[Link](https://support.dwavesys.com/hc/en-us/community/posts/1500000698522-Number-of-occurrences-?input_string=number%20occurance).

# Step 4: PostProcess Result

In this part, we post process the optimizing results for evaluation and visualization.
At first, we prepare the following parameters:

| Parameter | Description | Value |
|--- |--- |--- |
|method | annealing method for QUBO problem |'dwave-sa': use the simulated annealer in ocean toolkit<br> 'dwave-qa': use the quantum annealer|
|raw_path| the path for the original molecule file| './molecule-data/117_ideal.mol2' in this example |
|data_path| the path for the processed molecule file| './qmu_117_ideal_data_latest.mol2' in this example |
|bucket | the s3 bucket to store your results | - |
|prefix | the name of the folder in your s3 bucket | - |
|task_id | the id for your quantum annealing task| '2b5a3b05-1a0e-443a-852c-4ec422a10e59' in this example |

Then we can run the post-process using **ResultParser** object for SA:

In [58]:
method = "dwave-sa"
sa_param = {}
sa_param["raw_path"] = raw_path
sa_param["data_path"] = data_path

sa_process_result = ResultParser(method, **sa_param)
# print(f"{method} result is {sa_process_result.get_all_result()}")

local_time, _ , _, _= sa_process_result.get_time()

print(f"time for {method}: \n \
    local time is {local_time}")

INFO:root:_load_raw_result
INFO:root:load simulated annealer raw result
INFO:root:MoleculeData.load()
INFO:root:init mol data for final position
INFO:root:init mol data for raw position
INFO:root:_parse_model_info
INFO:root:_init_parameters
INFO:root:parse simulated annealer result
INFO:root:sa only has local_time!


time for dwave-sa: 
     local time is 34.348660707473755


In [59]:
sa_atom_pos_data = sa_process_result.generate_optimize_pts()
# save unfold file for visualization and parameters for experiment: 1. volume value 2. relative improvement
sa_process_result.save_mol_file(f"{timestamp}")


INFO:root:generate_optimize_pts()
INFO:root:var_dict_raw {'5': ['1']} var_dict_list [{'5': '1'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_1'}
INFO:root:tor list {'X_5_1'}
INFO:root:optimize_gain 1.0
INFO:root:var_dict_raw {'5': ['2']} var_dict_list [{'5': '2'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_2'}
INFO:root:tor list {'X_5_2'}
INFO:root:optimize_gain 0.9982652574327464
INFO:root:var_dict_raw {'5': ['8']} var_dict_list [{'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:optimize_gain 0.9737821701261852
INFO:root:var_dict_raw {'5': ['3']} var_dict_list [{'5': '3'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3'}
INFO:root:tor list {'X_5_3'}
INFO:root:optimize_gain 0.9694839539451261
INFO:root:var_dict_raw {'5': ['7']} var_dict_list [{'5': '7'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_7'}
INFO:root:tor list {'X_5_7'}
INFO:root:optimize_gain 0.9336049166760635
INFO:root:var_di

['./molecule-data/Focalin_dwave-sa_20220322-06.mol2',
 './molecule-data/Focalin_dwave-sa_20220322-06.json']

In [64]:
sa_process_result.parameters

{'volume': {'optimize': 4.377073266484346,
  'initial': 4.377073266484346,
  'gain': 1.0,
  'unfolding_results': ['X_1_1'],
  'annealing_results': ['X_1_1'],
  'optimize_info': {'optimize_state': False, 'result_rank': 5}}}

In the first block, we can see the **local time**
for SA is around 174 seconds. 
With the **generate_optimize_pts()** method, the final 3D 
points after unfolding will be generated and saved as json file and mol2 files. The last 
block shows the optimizing results which are also stored in json files. 
It shows that the optimized result gains 
1.0212x increase in volume. The value for **unfolding_results** indicates 
that the rotatable bond 15 should rotate $270^o$ ($360/8*(7-1)$) and 
the rotatable bond 14 should rotate $315^o$ ($360/8*(8-1)$).
At the same time, you can run the post-process for QA:

In [61]:
method = "dwave-qa"
qa_param = {}
qa_param["bucket"] = s3_bucket
qa_param["prefix"] = prefix
qa_param["task_id"] = qa_task_id
qa_param["raw_path"] = raw_path
qa_param["data_path"] = data_path

qa_process_result = ResultParser(method, **qa_param)
# print(f"{method} result is {qa_process_result.get_all_result()}")

local_time, task_time, total_time, access_time = qa_process_result.get_time()

print(f"time for {method}: \n \
    local time is {local_time},\n \
    task time is {task_time}, \n \
    qpu total time is {total_time}, \n \
    qpu access time is {access_time}")

INFO:root:_load_raw_result
INFO:root:load quantum annealer raw result
INFO:root:_read_result_obj
INFO:root:_read_result_obj: annealer-experiment/db9281da-d774-402f-9df4-8c12313493a4/qa_result.pickle
INFO:root:MoleculeData.load()
INFO:root:init mol data for final position
INFO:root:init mol data for raw position
INFO:root:_parse_model_info
INFO:root:_init_parameters
INFO:root:parse quantum annealer result
INFO:root:_read_result_obj
INFO:root:_read_result_obj: annealer-experiment/db9281da-d774-402f-9df4-8c12313493a4/results.json


time for dwave-qa: 
     local time is 5.065170526504517,
     task time is 2.277, 
     qpu total time is 0.143333, 
     qpu access time is 0.11606699999999999


we can see that there many types of time metrics for running QA.
This task has the **local time** of 7.7 s, which means the time between calling the api and 
getting the annealing result. The **task time** time is the metric from the json file in 
bucket. We can also see the **qpu total time** and **qpu access time** representing the 
actual time running in the QPU. Please refer to [Operation and Timing](https://docs.dwavesys.com/docs/latest/c_qpu_timing.html)
for details.

In [62]:
qa_atom_pos_data = qa_process_result.generate_optimize_pts()
# save unfold file for visualization and parameters for experiment: 1. volume value 2. relative improvement
qa_process_result.save_mol_file(f"{timestamp}")

INFO:root:generate_optimize_pts()
INFO:root:var_dict_raw {'5': ['1']} var_dict_list [{'5': '1'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_1'}
INFO:root:tor list {'X_5_1'}
INFO:root:optimize_gain 1.0
INFO:root:var_dict_raw {'5': ['2']} var_dict_list [{'5': '2'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_2'}
INFO:root:tor list {'X_5_2'}
INFO:root:optimize_gain 0.9982652574327464
INFO:root:var_dict_raw {'5': ['8']} var_dict_list [{'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:optimize_gain 0.9737821701261852
INFO:root:var_dict_raw {'5': ['3']} var_dict_list [{'5': '3'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3'}
INFO:root:tor list {'X_5_3'}
INFO:root:optimize_gain 0.9694839539451261
INFO:root:var_dict_raw {'5': ['7']} var_dict_list [{'5': '7'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_7'}
INFO:root:tor list {'X_5_7'}
INFO:root:optimize_gain 0.9336049166760635
INFO:root:var_di

INFO:root:chosen var {'x_5_2', 'x_5_5'}
INFO:root:tor list {'X_5_2'}
INFO:root:optimize_gain 0.9982652574327464
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_2', 'x_5_5'}
INFO:root:tor list {'X_5_5'}
INFO:root:optimize_gain 0.9027613067043506
INFO:root:var_dict_raw {'5': ['3', '4']} var_dict_list [{'5': '3'}, {'5': '4'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 'x_5_4'}
INFO:root:tor list {'X_5_3'}
INFO:root:optimize_gain 0.9694839539451261
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_3', 'x_5_4'}
INFO:root:tor list {'X_5_4'}
INFO:root:optimize_gain 0.90276372382556
INFO:root:var_dict_raw {'5': ['6', '8']} var_dict_list [{'5': '6'}, {'5': '8'}]
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_6', 'x_5_8'}
INFO:root:tor list {'X_5_6'}
INFO:root:optimize_gain 0.902762788142846
INFO:root:_init_parameters
INFO:root:chosen var {'x_5_6', 'x_5_8'}
INFO:root:tor list {'X_5_8'}
INFO:root:optimize_gain 0.900841577829964
INFO:root:var_dict_raw {'5': ['3', '6

['./molecule-data/Focalin_dwave-qa_20220322-06.mol2',
 './molecule-data/Focalin_dwave-qa_20220322-06.json']

In [63]:
qa_process_result.parameters

{'volume': {'optimize': 4.377089532479611,
  'initial': 4.377073266484346,
  'gain': 1.000003716180716,
  'unfolding_results': ['X_5_4'],
  'annealing_results': ['x_5_6', 'x_5_4'],
  'optimize_info': {'optimize_state': True, 'result_rank': 33}}}

In same way, the optimized results are translated the 3D points and saved 
as local json and mol2 files. The result indicates that QA gains 
1.021x increase in 
volume.

Finally, We can open folders for the optimized results:

![optimize-results](../../../docs/en/images/optimize-results.png)

 <center>Optimize Results</center>

We can see the json result and mol2 file of SA and QA are 
stored in this place. If we carry out more 
experiments, more results with time stamp are 
stored incrementally. 
For visualization, 
we can upload the 
result **117_ideal_dwave-qa_20220216-05.mol2** 
into 
[online viewer tool](https://www.rcsb.org/3d-view) 
to see the result:

![visual](../../../docs/en/images/visualization.png)

 <center>Visualization</center>