In [1]:
import os
import functools
from itertools import groupby
import operator
from collections import defaultdict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from dwave.system.samplers import DWaveSampler
from dwave.system.composites import EmbeddingComposite

from codon_optimization import *

---
# Amino_acid_to Codon

Input: Amino acid sequence

In [2]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'

test_amino_seq = fragmenting_amino_acid_seq(A0A2U1LIM9, 3, 1) # Check this function in codon_hamiltonian.py
test_amino_seq

'TTS'

Enumerating all poissible codons of an input

In [3]:
codon_seq = Amino_acid_to_Codon(test_amino_seq)
codon_seq()

[['ACA', 'ACC', 'ACG', 'ACU'],
 ['ACA', 'ACC', 'ACG', 'ACU'],
 ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU']]

Codon Seq. Translation from RNA basis to DNA basis

In [4]:
codon_seq_in_dna_base = codon_seq.in_dna_base()
print(codon_seq())
print(codon_seq_in_dna_base)

[['ACA', 'ACC', 'ACG', 'ACU'], ['ACA', 'ACC', 'ACG', 'ACU'], ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU']]
[['ACA', 'ACC', 'ACG', 'ACT'], ['ACA', 'ACC', 'ACG', 'ACT'], ['AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT']]


---
# Codon_Hamiltonian

parameters

In [5]:
weight_params = {'c_f': 0.1, 'c_GC': 1, 'c_R': 0.1, 'c_L': 1, 'epsilon_f': 0.001, 'rho_T': 0.5, 'epsilon': 1, 'infty': 10}
H_codon = Codon_Hamiltonian(test_amino_seq, weight_params)

## 1. Codon Usage Bias

$$
\mathcal{H}_{cub} = c_f\sum^N_i-\log\left[C_i + \varepsilon_f\right]q_i
$$

codon frequency table

In [6]:
import python_codon_tables as pct
# PRINT THE LIST OF NAMES OF ALL AVAILABLE TABLES
print ('Available tables:', pct.available_codon_tables_names)
table = pct.get_codons_table('e_coli_316407')
table['A']

#Table
col1 = test_amino_seq
col2 = [table[x] for x in test_amino_seq]
two_dim_list = [[col1[x], col2[x]] for x in range(len(test_amino_seq))]


my_data = pd.DataFrame(two_dim_list, columns = ['Amino acid sequence', 'Frequences of all possible codons'])
print(my_data)

Available tables: ['b_subtilis_1423', 'd_melanogaster_7227', 'm_musculus_domesticus_10092', 'm_musculus_10090', 'e_coli_316407', 'g_gallus_9031', 'c_elegans_6239', 's_cerevisiae_4932', 'h_sapiens_9606']
  Amino acid sequence                  Frequences of all possible codons
0                   T  {'ACA': 0.13, 'ACC': 0.44, 'ACG': 0.27, 'ACT':...
1                   T  {'ACA': 0.13, 'ACC': 0.44, 'ACG': 0.27, 'ACT':...
2                   S  {'AGC': 0.28, 'AGT': 0.15, 'TCA': 0.12, 'TCC':...


$\vec{\zeta}$, where $\zeta_i = \log(C_i+\varepsilon_f)$

In [7]:
H_codon.vec_zeta(epsilon_f=0)

array([-2.04022083, -0.82098055, -1.30933332, -1.83258146, -2.04022083,
       -0.82098055, -1.30933332, -1.83258146, -1.27296568, -1.89711998,
       -2.12026354, -1.89711998, -1.89711998, -1.89711998])

$\mathcal{H}_f$

In [8]:
H_codon.H_f

AttributeError: 'Codon_Hamiltonian' object has no attribute 'H_f'

## 2. Codon Pair Usage Bias

$$
CPS(a_k, c_i, a_l, c_j) = \ln\left(\frac{f^c_{ij}}{f^a_{kl}}\right) - \ln\left(\frac{f^c_{i}}{f^a_{k}}\right) - \ln\left(\frac{f^c_{j}}{f^a_{l}}\right) 
$$

In [9]:
host = 'e_coli' #'human'
pcpt = pd.read_csv('codon_table/CPS_'+host+'.csv')
pcpt

Unnamed: 0,AminoPair,CodonPair,Observed,Expected,Ob/Ex,CPS
0,AA,GCTGCA,1078,849.0720,1.269621,0.238719
1,AA,GCCGCG,1751,2456.2440,0.712877,-0.338446
2,AA,GCCGCA,1725,1432.8090,1.203929,0.185590
3,AA,GCCGCC,2238,1842.1830,1.214863,0.194631
4,AA,GCCGCT,1314,1091.6640,1.203667,0.185373
...,...,...,...,...,...,...
3716,YW,TACTGG,996,493.2100,2.019424,0.702812
3717,YY,TATTAT,547,833.0436,0.656628,-0.420637
3718,YY,TATTAC,906,628.4364,1.441673,0.365804
3719,YY,TACTAT,428,628.4364,0.681055,-0.384112


In [10]:
def _cps(codon_pair, host):
    pcpt = pd.read_csv('./codon_table/CPS_'+host+'.csv')
    return pcpt[pcpt['CodonPair'] == codon_pair]['CPS'].values[0]

In [11]:
_cps('GCTGCA', host)

0.2387187630996811

$$
\mathcal{H}_{cpub}=\sum^N
$$

In [12]:
H_codon.matrix_CPS()

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.71786492,
        -0.18460032,  0.30106066,  0.09549254,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        , -0.01671945,
         0.27365785,  0.15863344,  0.30225068,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        , -0.07387786,
        -0.75444849, -0.27298438, -0.46148861,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        , -0.43793291,
        -0.09452744, -0.26209914, -0.23062843,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        , -1.10592181, -0.55076037,
         0.82129621, -0.01

## 3. Optimize target GC concentration

$$
\rho_{GC} = \frac{1}{L}\sum^N_{i} s_i q_i,
$$
where $s_i$: the number of G's and C's in the $i$ th codon and $L$ : the length of the codon sequence.

$\vec{s}$

In [13]:
H_codon.vec_s()

array([1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1])

$\vec{s}\otimes\vec{s}$

In [14]:
upper_trianglular_part, diagonal_part = H_codon.matrix_ss()

$$
\mathcal{H}_{GC} = \frac{2c_{GC}}{L^2}\sum^N_i\sum^N_{j<i} s_is_jq_iq_j + \sum^N_i \left(\frac{c_{GC}}{L^2}s^2_i - \frac{2\rho_Tc_{GC}}{L}s_i\right)q_i + c_{GC}\rho^2_T
$$

In [None]:
quadratic_coeff, linear_coeff, const = H_codon.H_GC

In [13]:
np.max(quadratic_coeff)

-0.09876543209876543

## 4. Minimize sequentially repeated nucleotides

$$
\mathcal{H}_R = c_R\sum^N_i\sum^{N}_{j<i} R_{ij}q_iq_j,
$$
$$
R_{ij} = r(C_i, C_J) \kappa_{i,j}
$$

$r(C_i, C_j)$

In [16]:
# r(C_i, C_j)
def repeated_sequential_nucleotides(Ci, Cj):
    input = Ci + Cj
    groups = groupby(input)
    result = [(label, len(list(group))) for label, group in groups]
    list_counts = np.array(result)[:,1]
    outcome = np.max(list_counts.astype('int'))
    return outcome ** 2 - 1

In [17]:
H_codon._repeated_sequential_nucleotides('AUG', 'CAA')

3

$R_{ij}$

In [18]:
H_codon.matrix_R()

array([[0., 0., 0., 0., 3., 3., 3., 3., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 3., 3., 3., 3., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 3., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 3., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3., 3., 0., 3., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3., 3., 3., 3., 3., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 3., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 3., 3., 3., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [19]:
np.max(H_codon.matrix_R())

3.0

## 5. Additional constraints

$$
\mathcal{H}_p = -\sum^N_i \epsilon q_i + \sum^N_i\sum^N_{j<i} \tau_{ij}q_iq_j
$$

In [18]:
H_codon.matrix_tau()

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0., 50.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0., 50., 50., 50., 50., 50.],
       [ 0.,  0.,  0.,  0.,  0., 50., 50., 50., 50.],
       [ 0.,  0.,  0.,  0.,  0.,  0., 50., 50., 50.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., 50., 50.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 50.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

## 6. Codon Hamiltonian

$$
\mathcal{H} = \sum^N_i Q_{ii} q_i + \sum^N_i\sum^N_{j<i}Q_{ij}q_iq_j + c_{GC}\rho^2_T.
$$

$$
Q_{ii} = c_f\zeta_i - \frac{2\rho_Tc_{GC}}{N}s_i + \frac{c_{GC}}{N^2}s^2_i - \varepsilon + c_LL_{i}
$$
$$
Q_{ij} = \frac{2c_{GC}}{N^2}\sigma_{ij} + c_R R_{ij} + \tau_{ij} + c_L L_{ij}
$$

In [29]:
np.max(H_codon.Q_ii), np.min(H_codon.Q_ii)

(-1.4431245143157814, -1.6854704980510382)

In [30]:
np.max(H_codon.Q_ij), np.min(H_codon.Q_ij)

(10.320987654320987, 0.0)

---
# Simulating a Toy Model 

test input

In [2]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'
HPDF_amino = 'EGPALRRSYWRHLRRLVLGPPEPPFSHVCQVGDPVLRGVAAPVERAQLGGPELQRLTQRLVQVMRRRRCVGLSAPQLGVPRQVLALELPEALCRECPPRQRALRQMEPFPLRVFVNPSLRVLDSRLVTFPEGCESVAGFLACVPRFQAVQISGLDPNGEQVVWQASGWAARIIQHEMDHLQGCLFIDKMDSRTFTNVYWMKVND'

In [3]:
amino_seq = fragmenting_amino_acid_seq(HPDF_amino, 3, 0)
codon_seq = Amino_acid_to_Codon(amino_seq)
print('Amino acid seq:', amino_seq)
print('All possible codons:', codon_seq())

Amino acid seq: EGP
All possible codons: [['GAA', 'GAG'], ['GGA', 'GGC', 'GGG', 'GGU'], ['CCA', 'CCC', 'CCG', 'CCU']]


Preparing H_codon

In [4]:
#weight_params = {'c_f': -0.1, 'c_GC': 1, 'c_R': 0.1, 'c_L': 1, 'epsilon_f': 0.001, 'rho_T': 0.5, 'epsilon': 1, 'infty': 10}
#weight_params = {'c_f': 0, 'c_GC': 1, 'c_R': 0, 'c_L': 0, 'epsilon_f': 0.00, 'rho_T': 0.5, 'epsilon': 0, 'infty': 0}
weight_params = {'c_f': 0, 'c_GC': 1, 'c_R': 0, 'c_L': 0, 'epsilon_f': 0.000, 'rho_T': 0.5, 'epsilon': 2, 'infty': 4}
H_codon = Codon_Hamiltonian(amino_seq, weight_params)

## 1. Exact Diagonalization

Constructing Ising model from QUBO & Running 

In [5]:
J, h, const = H_codon.Q_to_Jh()

In [6]:
tests = Quantum_Ising(J=J, h=h, shift=const)
tests.hamiltonian

array([[-3.5308642 ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        , -5.7037037 ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        , -5.75308642, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., 21.18518519,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
        21.58024691,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        , 32.44444444]])

In [10]:
tests.run_ExactDiag()
tests.get_outcome()

{'|1000011000>': 1.0,
 '|1010001000>': 1.0,
 '|1000010001>': 1.0,
 '|1010000001>': 1.0}

## 2. DWaveSampler

Constructing Q_{ij} as dict type

In [53]:
Q = H_codon.get_Q_dict()

### Simulation

In [54]:
from dwave.system.samplers import DWaveSampler
from dwave.system.composites import EmbeddingComposite

#import matplotlib
#matplotlib.use("agg")
#from matplotlib import pyplot as plt

In [55]:
# ------- Run our QUBO on the QPU -------
# Set up QPU parameters
#이건 우리가 최적화 해야할듯?
chainstrength = 15 #min: 12
numruns = 10000

In [56]:
# Run the QUBO on the solver from your config file
sampler = EmbeddingComposite(DWaveSampler())
response = sampler.sample_qubo(Q,
                               #chain_strength=chainstrength,
                               num_reads=numruns,
                               #num_spin_reversal_tramsforms=50,
                               label='Codon_sequence')

In [57]:
def get_min_res(response):
    min_E = min(response.data(fields=['energy']))[0]
    
    min_indices = []
    for i, x in enumerate(response.data(fields=['energy'])):
        if x == min_E:
            min_indices.append(i) 
        else: 
            break

    sample_list = np.array(list(response.data(fields=['sample'])))[:,0]
    min_sample_list = sample_list[min_indices]

    min_samples = []
    for x in min_indices:
        a_min_sample = [k for k,v in min_sample_list[x].items() if v == 1]
        if a_min_sample not in min_samples: #remove duplicates of min_sample
            min_samples.append(a_min_sample) 
    
    return min_samples, min_E

In [97]:
#get_min_res(response)

In [59]:
def get_min_res1(response): #more effective for memory usage
    min_E = min(response.data(fields=['energy']))[0]

    min_indices = []
    for i, x in enumerate(response.data(fields=['energy'])):
        if x == min_E:
            min_indices.append(i) 
        else: 
            break

    min_samples = []
    i_min = 0
    for x in response.data(fields=['sample']):
        a_min_sample = [k for k,v in x[0].items() if v == 1]
        if a_min_sample not in min_samples: #remove duplicates of min_sample
            min_samples.append(a_min_sample) 
            
        if i_min == min_indices[-1]:
            break
        else:
            i_min += 1

    return min_samples, min_E

In [98]:
#get_min_res1(response)[0]

In [67]:
print('-' * 60)
print('{:>15s}{:>15s}{:^15s}{:^15s}'.format('Set 0','Set 1','Energy','Cut Size'))
print('-' * 60)
for sample, E in response.data(fields=['sample','energy']):
    S0 = [k for k,v in sample.items() if v == 0]
    S1 = [k for k,v in sample.items() if v == 1]
    print('{:>15s}{:^15s}{:^15s}'.format(str(S1),str(E),str(int(-1*E))))

------------------------------------------------------------
          Set 0          Set 1    Energy        Cut Size    
------------------------------------------------------------
[0, 3, 6, 10, 15]-10.250000000000004      10       
[0, 4, 9, 10, 16]-10.250000000000004      10       
[1, 5, 6, 13, 16]-10.250000000000004      10       
[0, 4, 6, 13, 16]-10.250000000000004      10       
[1, 2, 6, 13, 15]-10.250000000000004      10       
[1, 2, 9, 13, 15]-10.250000000000004      10       
[1, 5, 6, 10, 15]-10.250000000000004      10       
[0, 4, 6, 10, 15]-10.250000000000004      10       
[0, 3, 6, 10, 15]-10.250000000000004      10       
[0, 3, 6, 13, 16]-10.250000000000004      10       
[1, 2, 6, 10, 15]-10.250000000000004      10       
[0, 4, 6, 10, 15]-10.250000000000004      10       
[1, 2, 9, 10, 15]-10.250000000000004      10       
[1, 2, 6, 10, 15]-10.250000000000004      10       
[0, 3, 6, 13, 15]-10.250000000000004      10       
[0, 4, 9, 13, 15]-10.250000000000004 

new $\mathcal{H}_L$

[0, 2, 3, 11, 16, 17, 25]-7.537854785825701 <br>
[0, 2, 4, 11, 15, 17, 25]-7.527818748163343  <br>

[0, 2, 8, 12, 15, 17, 25, 28]-8.02910625876468

[0, 2, 3, 11, 15, 17, 25]-6.592669715640199        <br>

[0, 2, 3, 11, 15, 17, 25, 28]-7.150419551458638  <br>
[0, 2, 3, 11, 15, 17, 24, 28]-7.0986156153496704       <br>
[0, 2, 4, 11, 15, 17, 25, 28]-7.085933946922571  <br>
[0, 2, 4, 11, 15, 17, 24, 28]-7.034130010813604 

In [11]:
H_codon.list_all_possible_codons

[['AUG'],
 ['CAA', 'CAG'],
 ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU'],
 ['ACA', 'ACC', 'ACG', 'ACU']]

In [12]:
H_codon.run_Dwave()

([0, 2, 3, 11], -4.948192389640039)

In [14]:
H_codon.opt_codon_seq(base='DNA')

['ATG', 'CAG', 'AGC', 'ACG']