In [1]:
import functools
from itertools import groupby
import operator
import numpy as np
import pandas as pd

from codon_hamiltonian import *

---
# Amino_acid_to Codon

Input: Amino acid sequence

In [41]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'
test_amino_seq = 'MQS'#TTSVKLS'
ith = 6
test_amino_seq = A0A2U1LIM9[3*(ith-1):3*(ith)]
test_amino_seq

'TAL'

Enumerating all poissible codons of an input

In [3]:
codon_seq = Amino_acid_to_Codon(test_amino_seq)
codon_seq()

[['GAC', 'GAU'], ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG'], ['AUG']]

Codon Seq. from RNA basis to DNA basis

In [4]:
codon_seq_in_dna_base = codon_seq.in_dna_base()
print(codon_seq())
print(codon_seq_in_dna_base)

[['GAC', 'GAU'], ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG'], ['AUG']]
[['GAC', 'GAT'], ['CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG'], ['ATG']]


---
# Codon_Hamiltonian

parameters

In [5]:
weight_params = {'c_f': 0.1, 'c_GC': 1, 'c_R': 0.1, 'epsilon_f': 0.001, 'rho_T': 0.5, 'epsilon': 1, 'infty': 10}
H_codon = Codon_Hamiltonian(test_amino_seq, weight_params)

## 1. Incorporating codon usage bias

$$
\mathcal{H}_f = c_f\sum^N_i\log\left[C_i + \varepsilon_f\right]q_i
$$

codon frequency table

In [6]:
import python_codon_tables as pct
# PRINT THE LIST OF NAMES OF ALL AVAILABLE TABLES
print ('Available tables:', pct.available_codon_tables_names)
table = pct.get_codons_table('e_coli_316407')
table['A']

#Table
col1 = test_amino_seq
col2 = [table[x] for x in test_amino_seq]
two_dim_list = [[col1[x], col2[x]] for x in range(len(test_amino_seq))]


my_data = pd.DataFrame(two_dim_list, columns = ['Amino acid sequence', 'Frequences of all possible codons'])
print(my_data)

Available tables: ['b_subtilis_1423', 'd_melanogaster_7227', 'm_musculus_domesticus_10092', 'm_musculus_10090', 'e_coli_316407', 'g_gallus_9031', 'c_elegans_6239', 's_cerevisiae_4932', 'h_sapiens_9606']
  Amino acid sequence                  Frequences of all possible codons
0                   D                         {'GAC': 0.37, 'GAT': 0.63}
1                   L  {'CTA': 0.04, 'CTC': 0.1, 'CTG': 0.5, 'CTT': 0...
2                   M                                       {'ATG': 1.0}


$\vec{C}$

In [7]:
#np.exp(H_codon.vec_zeta(epsilon_f=0))

$\vec{\zeta}$, where $\zeta_i = \log(C_i+\varepsilon_f)$

In [8]:
H_codon.vec_zeta(epsilon_f=0)

array([-0.99425227, -0.46203546, -3.21887582, -2.30258509, -0.69314718,
       -2.30258509, -2.04022083, -2.04022083,  0.        ])

$\mathcal{H}_f$

In [9]:
H_codon.H_f

array([ 9.91553216e-02,  4.60449416e-02,  3.19418321e-01,  2.29263476e-01,
        6.91149178e-02,  2.29263476e-01,  2.03255796e-01,  2.03255796e-01,
       -9.99500333e-05])

## 2. Optimize target GC concentration

$$
\rho_{GC} = \frac{1}{N}\sum^N_{i} s_i q_i,
$$
where $s_i$: the number of G's and C's in codon $i$.

$\vec{s}$

In [10]:
H_codon.vec_s()

array([2, 1, 1, 2, 2, 1, 0, 1, 1])

$\vec{s}\otimes\vec{s}$

In [11]:
upper_trianglular_part, diagonal_part = H_codon.matrix_ss()

$$
\mathcal{H}_{GC} = \frac{2c_{GC}}{N^2}\sum^N_i\sum^N_{j<i} s_is_jq_iq_j + \sum^N_i \left(\frac{c_{GC}}{N^2}s^2_i - \frac{2\rho_Tc_{GC}}{N}s_i\right)q_i + c_{GC}\rho^2_T
$$

In [12]:
quadratic_coeff, linear_coeff, const = H_codon.H_GC

In [13]:
np.max(quadratic_coeff)

0.09876543209876543

## 3. Minimize sequentially repeated nuceotides

$$
\mathcal{H}_R = c_R\sum^N_i\sum^{N}_{j<i} R_{ij}q_iq_j,
$$
$$
R_{ij} = r(C_i, C_J) \kappa_{i,j}
$$

$r(C_i, C_j)$

In [14]:
# r(C_i, C_j)
def repeated_sequential_nucleotides(Ci, Cj):
    input = Ci + Cj
    groups = groupby(input)
    result = [(label, len(list(group))) for label, group in groups]
    list_counts = np.array(result)[:,1]
    outcome = np.max(list_counts.astype('int'))
    return outcome ** 2 - 1

In [15]:
H_codon._repeated_sequential_nucleotides('AUG', 'CAA')

3

$R_{ij}$

In [16]:
H_codon.matrix_R()

array([[0., 0., 3., 3., 3., 3., 3., 3., 0.],
       [0., 0., 0., 0., 0., 3., 8., 8., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [17]:
np.max(H_codon.matrix_R())

8.0

## 4. Additional constraints

$$
\mathcal{H}_p = -\sum^N_i \epsilon q_i + \sum^N_i\sum^N_{j<i} \tau_{ij}q_iq_j
$$

In [18]:
H_codon.matrix_tau()

array([[ 0., 50.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0., 50., 50., 50., 50., 50.,  0.],
       [ 0.,  0.,  0.,  0., 50., 50., 50., 50.,  0.],
       [ 0.,  0.,  0.,  0.,  0., 50., 50., 50.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0., 50., 50.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., 50.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

## 5. Codon Hamiltonian

$$
\mathcal{H} = \sum^N_i Q_{ii} q_i + \sum^N_i\sum^N_{j<i}Q_{ij}q_iq_j + c_{GC}\rho^2_T.
$$

$$
Q_{ii} = c_f\zeta_i - \frac{2\rho_Tc_{GC}}{N}s_i + \frac{c_{GC}}{N^2}s^2_i - \varepsilon
$$
$$
Q_{ij} = \frac{2c_{GC}}{N^2}\sigma_{ij} + c_R R_{ij} + \tau_{ij}
$$

In [19]:
np.max(H_codon.Q_ii), np.min(H_codon.Q_ii)

(-0.7793471108709824, -1.1037245883831122)

In [20]:
np.max(H_codon.Q_ij), np.min(H_codon.Q_ij)

(10.098765432098766, 0.0)

---
# Simulation

input

In [2]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'
test_amino_seq = 'MQS'#TTSVKLS'
ith = 6
test_amino_seq = A0A2U1LIM9[3*(ith-1):3*(ith)]

In [3]:
codon_seq = Amino_acid_to_Codon(test_amino_seq)
print('Amino acid seq:', test_amino_seq)
print('All possible codons:', codon_seq())

Amino acid seq: TAL
All possible codons: [['ACA', 'ACC', 'ACG', 'ACU'], ['GCA', 'GCC', 'GCG', 'GCU'], ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG']]


## 1. D-wave

In [4]:
from dwave.system.samplers import DWaveSampler
from dwave.system.composites import EmbeddingComposite
import matplotlib
matplotlib.use("agg")
from matplotlib import pyplot as plt

input

In [5]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'
test_amino_seq = 'MQS'#TTSVKLS'
ith = 6
test_amino_seq = A0A2U1LIM9[3*(ith-1):3*(ith)]

In [6]:
codon_seq = Amino_acid_to_Codon(test_amino_seq)
print('Amino acid seq:', test_amino_seq)
print('All possible codons:', codon_seq())

Amino acid seq: TAL
All possible codons: [['ACA', 'ACC', 'ACG', 'ACU'], ['GCA', 'GCC', 'GCG', 'GCU'], ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG']]


Ham

In [7]:
weight_params = {'c_f': 0.1, 'c_GC': 1, 'c_R': 0.1, 'epsilon_f': 0.001, 'rho_T': 0.5, 'epsilon': 1, 'infty': 10}
H_codon = Codon_Hamiltonian(test_amino_seq, weight_params)

In [22]:
#spike_sars2_seq = 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT'
#A0A2U1LIM9 = 'MQSTTSVKLSPFDLMTALLNGKVSFDTSNTSDTNIPLAVFMENRELLMILTTSVAVLIGCVVVLVWRRSSSAAKKAAESPVIVVPKKVTEDEVDDGRKKVTVFFGTQTGTAEGFAKALVEEAKARYEKAVFKVIDLDDYAAEDDEYEEKLKKESLAFFFLATYGDGEPTDNAARFYKWFTEGEEKGEWLEKLQYAVFGLGNRQYEHFNKIAKVVDEKLVEQGAKRLVPVGMGDDDQCIEDDFTAWKELVWPELDQLLRDEDDTSVATPYTAAVAEYRVVFHDKPETYDQDQLTNGHAVHDAQHPCRSNVAVKKELHSPLSDRSCTHLEFDISNTGLSYETGDHVGVYVENLSEVVDEAEKLIGLPPHTYFSVHTDNEDGTPLGGASLPPPFPPCTLRKALASYADVLSSPKKSALLALAAHATDSTEADRLKFLASPAGKDEYAQWIVASHRSLLEVMEAFPSAKPPLGVFFASVAPRLQPRYYSISSSPKFAPNRIHVTCALVYEQTPSGRVHKGVCSTWMKNAVPMTESQDCSWAPIYVRTSNFRLPSDPKVPVIMIGPGTGLAPFRGFLQERLAQKEAGTELGTAILFFGCRNRKVDFIYEDELNNFVETGALSELVTAFSREGATKEYVQHKMTQKASDIWNLLSEGAYLYVCGDAKGMAKDVHRTLHTIVQEQGSLDSSKAELYVKNLQMAGRYLRDVW'

### type(Q) = dict

In [8]:
Q = dict()
for i in range(len(H_codon.Q_ii)):
    for j in range(i, len(H_codon.Q_ii)):
        if i == j:
            Q[(i,i)] = H_codon.Q_ii[i]
        else:
            Q[(i,j)] = H_codon.Q_ij[i,j]


### Simulation

In [9]:
from dwave.system import DWaveSampler, EmbeddingComposite

In [10]:
# ------- Run our QUBO on the QPU -------
# Set up QPU parameters
#이건 우리가 최적화 해야할듯?
chainstrength = 15 #min: 12
numruns = 10000

In [11]:
# Run the QUBO on the solver from your config file
sampler = EmbeddingComposite(DWaveSampler())
response = sampler.sample_qubo(Q,
                               chain_strength=chainstrength,
                               num_reads=numruns,
                               #num_spin_reversal_tramsforms=50,
                               label='Codon_sequence')

In [12]:
print('-' * 60)
print('{:>15s}{:>15s}{:^15s}{:^15s}'.format('Set 0','Set 1','Energy','Cut Size'))
print('-' * 60)
for sample, E in response.data(fields=['sample','energy']):
    S0 = [k for k,v in sample.items() if v == 0]
    S1 = [k for k,v in sample.items() if v == 1]
    print('{:>15s}{:^15s}{:^15s}'.format(str(S1),str(E),str(int(-1*E))))

------------------------------------------------------------
          Set 0          Set 1    Energy        Cut Size    
------------------------------------------------------------
     [3, 6, 10]-2.8912602179292537       2       
     [3, 6, 10]-2.8912602179292537       2       
     [0, 6, 10]-2.8706395137509224       2       
     [3, 4, 10]-2.822252112994592       2       
     [0, 4, 10]-2.801631408816261       2       
     [3, 7, 10]-2.795206736145432       2       
     [0, 7, 10]-2.7745860319671007       2       
      [3, 6, 9]-2.731111659504893       2       
      [0, 6, 9]-2.710490955326562       2       
     [1, 6, 10]-2.697126309791818       2       
      [3, 4, 9]-2.6621035545702316       2       
      [3, 4, 9]-2.6621035545702316       2       
     [2, 6, 10]-2.648433704335104       2       
      [0, 4, 9]-2.6414828503919003       2       
      [0, 4, 9]-2.6414828503919003       2       
     [1, 4, 10]-2.6383222864898097       2       
      [3, 7, 9]-2.635058

[0, 2, 3, 11, 15, 17, 25]-6.592669715640199       6  <br>
[0, 2, 3, 11, 15, 17, 24, 28]-7.0986156153496704       7 

[0, 2, 3, 11, 13, 20, 24, 28]-8.333211366064825       8 <br>
[0, 2, 8, 12, 13, 19]-7.040679745020278       7 <br>
[0, 2, 4, 9, 13, 19, 25, 28, 30, 39]-10.834208550073477      10 <br>
[0, 2, 7, 12, 14, 17, 24, 28, 31, 39]-7.867976766148219       7  

In [13]:
H_codon.list_all_possible_codons

[['ACA', 'ACC', 'ACG', 'ACU'],
 ['GCA', 'GCC', 'GCG', 'GCU'],
 ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG']]

# 7. Exact Diagonalization

### Constructing Ising model from QUBO

In [57]:
J, h, const = H_codon.Q_to_Jh()

In [58]:
tests = Quantum_Ising(J=J, h=h, shift=const)
tests.hamiltonian

In [32]:
GS = tests.ExactDiag()
vec_to_braket(GS)

{'|010010001>': 1.0}

In [33]:
tests.GE

-3.1318536708463895