# Identify missing residues

The goal of this script will be to generate a $(13\times L\times 3)$ coordinate tensor given a PDB entry ID as well as a $(13\times L)$ mask identifying which atoms are missing.

In [75]:
import sys
sys.path.append('/home/jok120/protein-transformer/protein')
import Sidechains
import numpy as np

# Playground

## Occupancies

In [2]:
from prody import *

In [7]:
b = parsePDB('1a9u')
b

In [13]:
b.getDataLabels()

['altloc',
 'beta',
 'chain',
 'element',
 'icode',
 'mass',
 'name',
 'occupancy',
 'resname',
 'resnum',
 'segment',
 'serial']

In [15]:
b.getOccupancies()[:30]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [24]:
bp = b.select('protein and occupancy > 0')
bp.getResnames()

array(['ARG', 'ARG', 'ARG', ..., 'ASP', 'ASP', 'ASP'], dtype='<U6')

In [23]:
b.getResnames()

array(['GLU', 'GLU', 'GLU', ..., 'HOH', 'HOH', 'HOH'], dtype='<U6')

In [18]:
b.getNames()

array(['N', 'CA', 'C', ..., 'O', 'O', 'O'], dtype='<U6')

In [25]:
b.select('protein and occupancy <= 0').getResnums()

array([  4,   4,   4,   4,   4,   4,   4,   4,   4, 172, 172, 172, 172,
       172, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 174,
       174, 174, 174, 174, 174, 174, 174, 174, 174, 175, 175, 175, 175,
       175, 175, 175, 176, 176, 176, 176, 176, 176, 176, 176])

In [26]:
b.select('protein and occupancy <= 0').getNames()

array(['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', 'N', 'CA',
       'C', 'O', 'CB', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ',
       'NH1', 'NH2', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1',
       'NE2', 'N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', 'N', 'CA', 'C',
       'O', 'CB', 'CG', 'OD1', 'OD2'], dtype='<U6')

## From SEQRES

In [37]:
def get_coordinate_mask(pdbid, chid):
    h = parsePDB(pdbid, model=0, chain=chid, header=True)
    poly = h[chid]
    return h

In [38]:
get_coordinate_mask('1a9u', 'A')

{'helix': {('A', 62, ''): (1, 1, '1'),
  ('A', 63, ''): (1, 1, '1'),
  ('A', 64, ''): (1, 1, '1'),
  ('A', 65, ''): (1, 1, '1'),
  ('A', 66, ''): (1, 1, '1'),
  ('A', 67, ''): (1, 1, '1'),
  ('A', 68, ''): (1, 1, '1'),
  ('A', 69, ''): (1, 1, '1'),
  ('A', 70, ''): (1, 1, '1'),
  ('A', 71, ''): (1, 1, '1'),
  ('A', 72, ''): (1, 1, '1'),
  ('A', 73, ''): (1, 1, '1'),
  ('A', 74, ''): (1, 1, '1'),
  ('A', 75, ''): (1, 1, '1'),
  ('A', 76, ''): (1, 1, '1'),
  ('A', 77, ''): (1, 1, '1'),
  ('A', 113, ''): (1, 2, '2'),
  ('A', 114, ''): (1, 2, '2'),
  ('A', 115, ''): (1, 2, '2'),
  ('A', 116, ''): (1, 2, '2'),
  ('A', 117, ''): (1, 2, '2'),
  ('A', 118, ''): (1, 2, '2'),
  ('A', 119, ''): (1, 2, '2'),
  ('A', 124, ''): (1, 3, '3'),
  ('A', 125, ''): (1, 3, '3'),
  ('A', 126, ''): (1, 3, '3'),
  ('A', 127, ''): (1, 3, '3'),
  ('A', 128, ''): (1, 3, '3'),
  ('A', 129, ''): (1, 3, '3'),
  ('A', 130, ''): (1, 3, '3'),
  ('A', 131, ''): (1, 3, '3'),
  ('A', 132, ''): (1, 3, '3'),
  ('A', 133, ''

In [44]:
p, h = parsePDB('1a9u',chain="A", header=True)
poly = h["A"]
seq = poly.sequence

In [51]:
d = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

In [43]:
seq

'GSSHHHHHHSSGLVPRGSHMSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES'

In [90]:
observed_seq = ""
observed_seqnums = []
pp = p.select('protein')
hv = pp.getHierView()
padchar = np.array([np.nan]*3)

for chain in hv:
    all_atoms = []
    for residue in chain:
        res_atom_coords = []
        resname = residue.getResname()
        expected_atoms = Sidechains.SC_DATA[resname]["all"]
        for ea in expected_atoms:
            if ea is "O": continue
            a = residue.select(f"name {ea}")
            if a:
                print(resname, a.getNames()[0], a.getCoords()[0])
                res_atom_coords.append(a.getCoords()[0])
            else:
                print(resname, ea, np.array([0,0,0]))
                res_atom_coords.append(np.array([0,0,0]))
            if len(observed_seqnums) == 0 or residue.getResnum() != observed_seqnums[-1]:
                observed_seq += d[resname]
                observed_seqnums += [residue.getResnum()]
        all_atoms.extend(res_atom_coords + (13 - len(res_atom_coords))*[padchar])
    break

GLU N [24.643  4.56  26.403]
GLU CA [23.481  4.689 25.477]
GLU C [23.878  4.469 23.998]
GLU CB [22.828  6.079 25.652]
GLU CG [21.488  6.067 26.406]
GLU CD [21.123  7.413 27.028]
GLU OE1 [21.901  7.908 27.869]
GLU OE2 [20.055  7.972 26.681]
ARG N [23.381  5.351 23.135]
ARG CA [23.633  5.335 21.693]
ARG C [23.22   4.114 20.891]
ARG CB [25.097  5.646 21.376]
ARG CG [25.263  6.353 20.024]
ARG CD [24.576  7.722 20.027]
ARG NE [23.91   8.029 18.765]
ARG CZ [23.094  9.062 18.585]
ARG NH1 [22.838  9.897 19.583]
ARG NH2 [22.536  9.266 17.399]
PRO N [21.902  3.931 20.708]
PRO CA [21.39   2.798 19.946]
PRO C [21.171  3.307 18.53 ]
PRO CB [20.076  2.48  20.638]
PRO CG [19.603  3.813 21.177]
PRO CD [20.796  4.751 21.24 ]
THR N [20.488  2.518 17.709]
THR CA [20.221  2.955 16.357]
THR C [18.905  3.697 16.37 ]
THR CB [20.139  1.779 15.372]
THR OG1 [21.426  1.166 15.265]
THR CG2 [19.713  2.263 13.985]
PHE N [18.938  4.896 15.815]
PHE CA [17.763  5.742 15.735]
PHE C [17.195  5.609 14.336]
PHE CB [18.146

ARG NH1 [19.356 20.164 39.347]
ARG NH2 [18.569 18.662 40.907]
LEU N [13.213 17.38  37.457]
LEU CA [12.064 16.915 38.237]
LEU C [11.706 15.456 37.927]
LEU CB [10.861 17.812 37.958]
LEU CG [10.78  19.086 38.789]
LEU CD1 [ 9.695 19.982 38.202]
LEU CD2 [10.49  18.738 40.252]
LEU N [11.462 15.173 36.647]
LEU CA [11.112 13.83  36.187]
LEU C [12.238 12.819 36.474]
LEU CB [10.786 13.865 34.68 ]
LEU CG [ 9.319 13.797 34.199]
LEU CD1 [ 8.383 14.393 35.235]
LEU CD2 [ 9.184 14.52  32.868]
LYS N [13.482 13.265 36.347]
LYS CA [14.603 12.392 36.622]
LYS C [14.638 12.044 38.113]
LYS CB [15.915 13.053 36.193]
LYS CG [16.349 12.69  34.774]
LYS CD [17.736 13.209 34.417]
LYS CE [18.798 12.411 35.124]
LYS NZ [20.172 12.624 34.606]
HIS N [14.15  12.935 38.968]
HIS CA [14.162 12.675 40.401]
HIS C [13.01  11.815 40.955]
HIS CB [14.205 14.002 41.154]
HIS CG [14.653 13.875 42.578]
HIS ND1 [13.768 13.762 43.632]
HIS CD2 [15.894 13.828 43.122]
HIS CE1 [14.445 13.651 44.763]
HIS NE2 [15.736 13.688 44.48 ]
MET N [1

GLN NE2 [-5.114  4.322 44.681]
ILE N [-3.021  9.561 46.549]
ILE CA [-2.668 10.936 46.261]
ILE C [-1.798 11.448 47.409]
ILE CB [-3.917 11.833 46.141]
ILE CG1 [-4.861 11.299 45.057]
ILE CG2 [-3.488 13.249 45.801]
ILE CD1 [-6.046 12.222 44.78 ]
LEU N [-2.205 11.143 48.649]
LEU CA [-1.469 11.569 49.85 ]
LEU C [-0.104 10.885 50.012]
LEU CB [-2.314 11.317 51.102]
LEU CG [-3.533 12.218 51.341]
LEU CD1 [-4.35  11.631 52.474]
LEU CD2 [-3.101 13.647 51.674]
ARG N [-3.0000e-02  9.6130e+00  4.9629e+01]
ARG CA [ 1.215  8.852 49.689]
ARG C [ 2.221  9.478 48.713]
ARG CB [ 0.932  7.414 49.283]
ARG CG [ 2.088  6.466 49.443]
ARG CD [ 1.644  5.068 49.059]
ARG NE [ 2.741  4.109 49.118]
ARG CZ [ 3.182  3.405 48.076]
ARG NH1 [ 2.619  3.545 46.879]
ARG NH2 [ 4.192  2.554 48.229]
GLY N [ 1.772  9.739 47.494]
GLY CA [ 2.644 10.347 46.509]
GLY C [ 3.023 11.755 46.913]
LEU N [ 2.087 12.462 47.543]
LEU CA [ 2.307 13.836 47.992]
LEU C [ 3.281 13.932 49.163]
LEU CB [ 0.977 14.457 48.38 ]
LEU CG [ 0.78  15.936 48.04

PRO CB [-8.229 31.236 49.624]
PRO CG [-8.058 30.053 50.541]
PRO CD [-7.731 28.894 49.663]
GLU N [-5.144 31.53  49.304]
GLU CA [-3.913 32.305 49.429]
GLU C [-2.999 32.265 48.204]
GLU CB [-3.137 31.886 50.701]
GLU CG [-2.337 30.577 50.611]
GLU CD [-3.143 29.347 51.032]
GLU OE1 [-4.357 29.488 51.31 ]
GLU OE2 [-2.559 28.241 51.08 ]
ILE N [-2.987 31.168 47.453]
ILE CA [-2.155 31.126 46.256]
ILE C [-2.997 31.722 45.119]
ILE CB [-1.682 29.671 45.924]
ILE CG1 [-0.912 29.641 44.605]
ILE CG2 [-2.851 28.734 45.831]
ILE CD1 [-0.216 28.303 44.333]
MET N [-4.314 31.551 45.207]
MET CA [-5.239 32.071 44.199]
MET C [-5.325 33.599 44.142]
MET CB [-6.639 31.496 44.44 ]
MET CG [-7.716 32.083 43.551]
MET SD [-9.308 31.267 43.762]
MET CE [-10.062  32.35   44.982]
LEU N [-5.331 34.238 45.305]
LEU CA [-5.407 35.691 45.388]
LEU C [-4.058 36.275 45.79 ]
LEU CB [-6.453 36.105 46.419]
LEU CG [-7.815 36.633 45.969]
LEU CD1 [-7.907 36.797 44.446]
LEU CD2 [-8.85  35.665 46.475]
ASN N [-3.026 35.442 45.732]
ASN CA [-

SER CA [-1.003 42.847 50.725]
SER C [-2.038 43.636 51.515]
SER CB [-1.618 42.39  49.401]
SER OG [-2.723 43.197 49.044]
GLU N [-1.8   44.922 51.729]
GLU CA [-2.755 45.717 52.487]
GLU C [-4.146 45.842 51.846]
GLU CB [-2.194 47.104 52.769]
GLU CG [-3.044 47.894 53.751]
GLU CD [-2.724 49.378 53.736]
GLU OE1 [-1.546 49.726 53.482]
GLU OE2 [-3.648 50.193 53.976]
SER N [-4.234 45.816 50.513]
SER CA [-5.55  45.901 49.853]
SER C [-6.262 44.564 50.049]
SER CB [-5.433 46.193 48.342]
SER OG [-4.092 46.14  47.894]
ALA N [-5.491 43.477 50.028]
ALA CA [-6.056 42.154 50.241]
ALA C [-6.619 42.107 51.667]
ALA CB [-4.989 41.083 50.049]
ARG N [-5.859 42.597 52.639]
ARG CA [-6.352 42.587 54.003]
ARG C [-7.584 43.486 54.083]
ARG CB [-5.269 43.065 54.966]
ARG CG [-4.759 41.976 55.891]
ARG CD [-3.3   42.177 56.231]
ARG NE [-3.001 43.574 56.525]
ARG CZ [-1.986 44.247 55.992]
ARG NH1 [-1.179 43.651 55.124]
ARG NH2 [-1.783 45.519 56.314]
ASN N [-7.509 44.654 53.44 ]
ASN CA [-8.613 45.625 53.4  ]
ASN C [-9.853 45

HIS CD2 [-0.083  5.156 57.09 ]
HIS CE1 [ 0.218  5.202 59.262]
HIS NE2 [ 0.739  5.573 58.108]
ASP N [ 0.295  1.974 56.171]
ASP CA [ 1.363  1.088 56.605]
ASP C [ 2.546  2.034 56.521]
ASP CB [ 1.512 -0.073 55.623]
ASP CG [ 2.796 -0.869 55.839]
ASP OD1 [ 3.547 -0.551 56.801]
ASP OD2 [ 3.045 -1.808 55.04 ]
PRO N [ 2.994  2.564 57.669]
PRO CA [ 4.116  3.507 57.691]
PRO C [ 5.37   3.008 57.025]
PRO CB [ 4.333  3.807 59.177]
PRO CG [ 3.605  2.735 59.915]
PRO CD [ 2.507  2.241 59.021]
ASP N [ 5.491  1.692 56.893]
ASP CA [ 6.684  1.121 56.283]
ASP C [ 6.595  1.112 54.774]
ASP CB [ 6.927 -0.287 56.808]
ASP CG [ 7.259 -0.295 58.276]
ASP OD1 [ 7.935  0.657 58.73 ]
ASP OD2 [ 6.843 -1.244 58.976]
ASP N [ 5.414  1.409 54.249]
ASP CA [ 5.251  1.432 52.812]
ASP C [ 4.893  2.808 52.278]
ASP CB [ 4.191  0.427 52.371]
ASP CG [ 4.3910e+00 -1.7000e-02  5.0934e+01]
ASP OD1 [ 5.39   0.417 50.303]
ASP OD2 [ 3.551 -0.797 50.44 ]
GLU N [ 5.328  3.849 52.976]
GLU CA [ 5.059  5.203 52.525]
GLU C [ 6.397  5.926 52.5

In [72]:
observed_seq

'ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLD'

In [73]:
observed_seqnums

[4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 186,
 187

In [91]:
np.asarray(all_atoms)[:27]

array([[24.643,  4.56 , 26.403],
       [23.481,  4.689, 25.477],
       [23.878,  4.469, 23.998],
       [22.828,  6.079, 25.652],
       [21.488,  6.067, 26.406],
       [21.123,  7.413, 27.028],
       [21.901,  7.908, 27.869],
       [20.055,  7.972, 26.681],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [23.381,  5.351, 23.135],
       [23.633,  5.335, 21.693],
       [23.22 ,  4.114, 20.891],
       [25.097,  5.646, 21.376],
       [25.263,  6.353, 20.024],
       [24.576,  7.722, 20.027],
       [23.91 ,  8.029, 18.765],
       [23.094,  9.062, 18.585],
       [22.838,  9.897, 19.583],
       [22.536,  9.266, 17.399],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [   nan,    nan,    nan],
       [21.902,  3.931, 20.708]])

## Alignment

In [217]:
from Bio import Align

In [218]:
s1 = seq
s2 = observed_seq

In [243]:
a = Align.PairwiseAligner()
a.target_gap_score = -999
a.query_gap_score = 1
a.query_extend_gap_score = 1
a.match = 3
a.mismatch = -9999

In [244]:
print(a)

Pairwise sequence aligner with parameters
  match score: 3.000000
  mismatch score: -9999.000000
  target open gap score: -999.000000
  target extend gap score: -999.000000
  target left open gap score: -999.000000
  target left extend gap score: -999.000000
  target right open gap score: -999.000000
  target right extend gap score: -999.000000
  query open gap score: 1.000000
  query extend gap score: 1.000000
  query left open gap score: 1.000000
  query left extend gap score: 1.000000
  query right open gap score: 1.000000
  query right extend gap score: 1.000000
  mode: global



In [245]:
print(a.align(seq, observed_seq)[0])

GSSHHHHHHSSGLVPRGSHMSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
----------------------|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||------
----------------------ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRT

In [248]:
a2 = a.align("ALPHABETSOUPCOUPTROUPREEEE", "ETSOUPTRO")
print(a2[0])
print(a2[0].score)

ALPHABETSOUPCOUPTROUPREEEE
------||||||----|||-------
------ETSOUP----TRO-------

44.0


In [236]:
print(a2)

<Bio.Align.PairwiseAlignments object at 0x7f20241af9b0>


TypeError: 'NoneType' object is not subscriptable

In [229]:
# a1 = a.align(s1[:50]+s1[62:68]+s1[80:], s2)[0]
a1 = a.align(seq[:100]+seq[102:], observed_seq[:50]+observed_seq[62:68]+observed_seq[80:100]+observed_seq[109:])[0]

In [230]:
print(a1)

GSSHHHHHHSSGLVPRGSHMSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHEIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
----------------------||||||||||||||||||||||||||||||||||||||||||||||||||------------|||||----------|||||||||||||||||||||---------||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||------
----------------------ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVK------------KRTYR----------EIGLLDVFTPARSLEEFNDVY---------LNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTD

In [231]:
a1.score

639.0

In [196]:
a1.target

'GSSHHHHHHSSGLVPRGSHMSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES'

In [197]:
a1.query

'ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKRTYREIGLLDVFTPARSLEEFNDVYLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLD'

In [193]:
a1.path

((0, 0),
 (22, 0),
 (72, 50),
 (84, 50),
 (90, 56),
 (102, 56),
 (373, 327),
 (379, 327))

In [None]:
def trim_ends(complete, observed):
    