Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements Py3-compatible rich comparison for all Bio.PDB objects #1360

Merged
merged 10 commits into from Aug 15, 2017
59 changes: 59 additions & 0 deletions Bio/PDB/Atom.py
Expand Up @@ -70,6 +70,61 @@ def __init__(self, name, coord, bfactor, occupancy, altloc, fullname, serial_num
self.element = self._assign_element(element)
self.mass = self._assign_atom_mass()

# For atom sorting (protein backbone atoms first)
self._sorting_keys = {'N': 0, 'CA': 1, 'C': 2, 'O': 3}

# Sorting Methods
# standard across different objects and allows direct comparison
def __eq__(self, other):
return (self.id, self.altloc) == (other.id, other.altloc)

def __ne__(self, other):
return (self.id, self.altloc) != (other.id, other.altloc)

def __gt__(self, other):
order_s = self._sorting_keys.get(self.name, 4)
order_o = self._sorting_keys.get(other.name, 4)
if order_s != order_o:
return order_s > order_o
elif self.name != other.name:
return self.name > other.name
else:
return self.altloc > other.altloc

def __ge__(self, other):
order_s = self._sorting_keys.get(self.name, 4)
order_o = self._sorting_keys.get(other.name, 4)
if order_s != order_o:
return order_s >= order_o
elif self.name != other.name:
return self.name >= other.name
else:
return self.altloc >= other.altloc

def __lt__(self, other):
order_s = self._sorting_keys.get(self.name, 4)
order_o = self._sorting_keys.get(other.name, 4)
if order_s != order_o:
return order_s < order_o
elif self.name != other.name:
return self.name < other.name
else:
return self.altloc < other.altloc

def __le__(self, other):
order_s = self._sorting_keys.get(self.name, 4)
order_o = self._sorting_keys.get(other.name, 4)
if order_s != order_o:
return order_s <= order_o
elif self.name != other.name:
return self.name <= other.name
else:
return self.altloc <= other.altloc

# Hash method to allow uniqueness (set)
def __hash__(self):
return hash(self.get_full_id())

def _assign_element(self, element):
"""Tries to guess element from atom name if not recognised."""
if not element or element.capitalize() not in IUPACData.atom_weights:
Expand Down Expand Up @@ -315,6 +370,10 @@ def __init__(self, id):
DisorderedEntityWrapper.__init__(self, id)

# Special methods
# Override parent class __iter__ method
def __iter__(self):
for i in self.disordered_get_list():
yield i

def __repr__(self):
return "<Disordered Atom %s>" % self.get_id()
Expand Down
33 changes: 33 additions & 0 deletions Bio/PDB/Chain.py
Expand Up @@ -13,6 +13,39 @@ def __init__(self, id):
self.level = "C"
Entity.__init__(self, id)

# Sorting methods: empty chain IDs come last.
def __gt__(self, other):
if self.id == ' ' and other.id != ' ':
return 0
elif self.id != ' ' and other.id == ' ':
return 1
else:
return self.id > other.id

def __ge__(self, other):
if self.id == ' ' and other.id != ' ':
return 0
elif self.id != ' ' and other.id == ' ':
return 1
else:
return self.id >= other.id

def __lt__(self, other):
if self.id == ' ' and other.id != ' ':
return 0
elif self.id != ' ' and other.id == ' ':
return 1
else:
return self.id < other.id

def __le__(self, other):
if self.id == ' ' and other.id != ' ':
return 0
elif self.id != ' ' and other.id == ' ':
return 1
else:
return self.id <= other.id

def _translate_id(self, id):
"""Translate sequence identifer to tuple form (PRIVATE).

Expand Down
38 changes: 38 additions & 0 deletions Bio/PDB/Entity.py
Expand Up @@ -52,6 +52,30 @@ def __iter__(self):
for child in self.child_list:
yield child

# Generic id-based comparison methods
# Works for Structures and Models (id is numeric)
def __eq__(self, other):
return self.id == other.id

def __ne__(self, other):
return self.id != other.id

def __gt__(self, other):
return self.id > other.id

def __ge__(self, other):
return self.id >= other.id

def __lt__(self, other):
return self.id < other.id

def __le__(self, other):
return self.id <= other.id

# Hash method to allow uniqueness (set)
def __hash__(self):
return hash(self.id)

# Private methods

def _reset_full_id(self):
Expand Down Expand Up @@ -283,6 +307,20 @@ def __sub__(self, other):
"""Subtraction with another object."""
return self.selected_child - other

# Sorting
# Directly compare the selected child
def __gt__(self, other):
return self.selected_child > other

def __ge__(self, other):
return self.selected_child >= other

def __lt__(self, other):
return self.selected_child < other

def __le__(self, other):
return self.selected_child <= other

# Public methods

def get_id(self):
Expand Down
57 changes: 44 additions & 13 deletions Bio/PDB/Residue.py
Expand Up @@ -35,6 +35,48 @@ def __repr__(self):
full_id = (resname, hetflag, resseq, icode)
return "<Residue %s het=%s resseq=%s icode=%s>" % full_id

# Residue-specific sorting methods
# Sort first by HETATM flag, then by resseq, finally by insertion code
def __gt__(self, other):
hetflag_s, resseq_s, icode_s = self.id
hetflag_o, resseq_o, icode_o = other.id
if hetflag_o != hetflag_s:
return hetflag_s > hetflag_o
elif resseq_o != resseq_s:
return resseq_s > resseq_o
else:
return icode_s > icode_o

def __ge__(self, other):
hetflag_s, resseq_s, icode_s = self.id
hetflag_o, resseq_o, icode_o = other.id
if hetflag_o != hetflag_s:
return hetflag_s >= hetflag_o
elif resseq_o != resseq_s:
return resseq_s >= resseq_o
else:
return icode_s >= icode_o

def __lt__(self, other):
hetflag_s, resseq_s, icode_s = self.id
hetflag_o, resseq_o, icode_o = other.id
if hetflag_o != hetflag_s:
return hetflag_s < hetflag_o
elif resseq_o != resseq_s:
return resseq_s < resseq_o
else:
return icode_s < icode_o

def __le__(self, other):
hetflag_s, resseq_s, icode_s = self.id
hetflag_o, resseq_o, icode_o = other.id
if hetflag_o != hetflag_s:
return hetflag_s < hetflag_o
elif resseq_o != resseq_s:
return resseq_s < resseq_o
else:
return icode_s < icode_o

def add(self, atom):
"""Add an Atom object.

Expand All @@ -54,19 +96,8 @@ def sort(self):
by name, with any alternative location specifier for disordered
atoms (altloc) as a tie-breaker.
"""
# Defining sort key function within the sort method's scope:
def sort_index(atom):
"""Build tuple of (int, name, alt-loc) for sorting.

The first integer is 0, 1, 2, 3, 4 for atoms N, CA, C, O, other.
"""
try:
i = ["N", "CA", "C", "O"].index(atom.name)
except ValueError:
i = 4
return (i, atom.name, atom.altloc)

self.child_list.sort(key=sort_index)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'll need to remove this blank link to make flake8 happy:

$ flake8 Bio/
Bio/PDB/Residue.py:92:1: D202 No blank lines allowed after function docstring

self.child_list.sort()

def flag_disordered(self):
"""Set the disordered flag."""
Expand Down
65 changes: 33 additions & 32 deletions Tests/PDB/a_structure.pdb
Expand Up @@ -573,15 +573,15 @@ ATOM 558 N GLY A 73 17.694 -3.324 28.982 1.00 25.36 N
ATOM 559 CA GLY A 73 18.291 -4.645 28.942 1.00 26.44 C
ATOM 560 C GLY A 73 17.283 -5.775 28.973 1.00 27.24 C
ATOM 561 O GLY A 73 16.086 -5.595 29.157 1.00 26.15 O
ATOM 562 N ASN A 74 17.798 -7.004 28.869 1.00 27.53 N
ATOM 563 CA ASN A 74 17.013 -8.206 28.788 1.00 27.66 C
ATOM 562 CA ASN A 74 17.013 -8.206 28.788 1.00 27.66 C
ATOM 563 N ASN A 74 17.798 -7.004 28.869 1.00 27.53 N
ATOM 564 C ASN A 74 16.001 -8.395 29.892 1.00 25.55 C
ATOM 565 O ASN A 74 14.915 -8.955 29.662 1.00 26.18 O
ATOM 566 CB ASN A 74 17.981 -9.421 28.763 1.00 32.27 C
ATOM 567 CG AASN A 74 17.279 -10.749 28.825 0.50 33.40 C
ATOM 568 CG BASN A 74 18.917 -9.356 27.574 0.50 34.22 C
ATOM 569 OD1AASN A 74 17.230 -11.395 29.878 0.50 35.56 O
ATOM 570 OD1BASN A 74 18.479 -9.218 26.432 0.50 36.46 O
ATOM 569 OD1BASN A 74 18.479 -9.218 26.432 0.50 36.46 O
ATOM 570 OD1AASN A 74 17.230 -11.395 29.878 0.50 35.56 O
ATOM 571 ND2AASN A 74 16.736 -11.203 27.699 0.50 34.86 N
ATOM 572 ND2BASN A 74 20.215 -9.465 27.831 0.50 36.27 N
ATOM 573 N ASP A 75 16.355 -8.036 31.129 1.00 25.87 N
Expand Down Expand Up @@ -696,6 +696,7 @@ ATOM 681 O CYS A 86 9.846 -3.464 22.105 1.00 25.66 O
ATOM 682 CB CYS A 86 8.644 -5.399 24.430 1.00 21.37 C
ATOM 683 SG CYS A 86 7.527 -5.528 25.832 1.00 19.98 S
TER 684 CYS A 86
HETATM 685 O HOH B 0 6.997 5.537 34.149 1.00 15.09 O
HETATM 685 C1 NAG B 1 2.845 5.730 8.410 1.00 13.00 C
HETATM 686 C2 NAG B 1 2.980 4.600 7.346 1.00 14.42 C
HETATM 687 C3 NAG B 1 2.175 3.411 7.793 1.00 14.65 C
Expand Down Expand Up @@ -724,34 +725,34 @@ HETATM 709 O4 NAG B 2 3.695 6.769 8.064 1.00 13.33 O
HETATM 710 O5 NAG B 2 3.294 10.396 8.215 1.00 16.15 O
HETATM 711 O6 NAG B 2 2.067 9.786 5.669 1.00 21.57 O
HETATM 712 O7 NAG B 2 4.383 10.784 12.792 1.00 13.02 O
HETATM 713 C1 NAG B 3 4.558 15.870 8.029 1.00 22.68 C
HETATM 714 C2 NAG B 3 3.781 14.939 7.134 1.00 23.40 C
HETATM 715 C3 NAG B 3 3.385 13.670 7.891 1.00 21.15 C
HETATM 716 C4 NAG B 3 4.638 13.082 8.560 1.00 19.08 C
HETATM 717 C5 NAG B 3 5.387 14.135 9.404 1.00 20.09 C
HETATM 718 C6 NAG B 3 6.662 13.594 9.979 1.00 20.74 C
HETATM 719 C7 NAG B 3 2.333 15.941 5.366 1.00 30.04 C
HETATM 720 C8 NAG B 3 0.935 16.346 5.056 1.00 30.86 C
HETATM 721 N2 NAG B 3 2.536 15.585 6.659 1.00 26.09 N
HETATM 722 O3 NAG B 3 2.831 12.779 6.934 1.00 22.73 O
HETATM 723 O4 NAG B 3 4.253 12.032 9.493 1.00 16.48 O
HETATM 724 O5 NAG B 3 5.732 15.223 8.531 1.00 21.12 O
HETATM 725 O6 NAG B 3 7.565 13.140 9.015 1.00 23.92 O
HETATM 726 O7 NAG B 3 3.172 15.935 4.593 1.00 31.92 O
HETATM 727 C1 NAG B 4 6.424 20.761 7.837 1.00 35.33 C
HETATM 728 C2 NAG B 4 7.333 19.637 8.339 1.00 35.66 C
HETATM 729 C3 NAG B 4 6.907 18.331 7.686 1.00 35.20 C
HETATM 730 C4 NAG B 4 5.422 18.072 8.014 1.00 33.37 C
HETATM 731 C5 NAG B 4 4.527 19.299 7.696 1.00 33.27 C
HETATM 732 C6 NAG B 4 3.173 19.102 8.406 1.00 32.57 C
HETATM 733 C7 NAG B 4 9.448 20.235 9.622 0.00 40.22 C
HETATM 734 C8 NAG B 4 10.921 20.272 9.570 0.00 40.07 C
HETATM 735 N2 NAG B 4 8.744 19.920 8.013 1.00 37.41 N
HETATM 736 O3 NAG B 4 7.711 17.283 8.235 1.00 36.78 O
HETATM 737 O4 NAG B 4 4.923 16.987 7.202 1.00 29.35 O
HETATM 738 O5 NAG B 4 5.078 20.464 8.245 1.00 34.32 O
HETATM 739 O6 NAG B 4 3.314 19.212 9.787 1.00 31.85 O
HETATM 740 O7 NAG B 4 8.849 20.404 10.603 0.00 43.75 O
HETATM 713 C1 NAG B 4 4.558 15.870 8.029 1.00 22.68 C
HETATM 714 C2 NAG B 4 3.781 14.939 7.134 1.00 23.40 C
HETATM 715 C3 NAG B 4 3.385 13.670 7.891 1.00 21.15 C
HETATM 716 C4 NAG B 4 4.638 13.082 8.560 1.00 19.08 C
HETATM 717 C5 NAG B 4 5.387 14.135 9.404 1.00 20.09 C
HETATM 718 C6 NAG B 4 6.662 13.594 9.979 1.00 20.74 C
HETATM 719 C7 NAG B 4 2.333 15.941 5.366 1.00 30.04 C
HETATM 720 C8 NAG B 4 0.935 16.346 5.056 1.00 30.86 C
HETATM 721 N2 NAG B 4 2.536 15.585 6.659 1.00 26.09 N
HETATM 722 O3 NAG B 4 2.831 12.779 6.934 1.00 22.73 O
HETATM 723 O4 NAG B 4 4.253 12.032 9.493 1.00 16.48 O
HETATM 724 O5 NAG B 4 5.732 15.223 8.531 1.00 21.12 O
HETATM 725 O6 NAG B 4 7.565 13.140 9.015 1.00 23.92 O
HETATM 726 O7 NAG B 4 3.172 15.935 4.593 1.00 31.92 O
HETATM 727 C1 NAG B 3 6.424 20.761 7.837 1.00 35.33 C
HETATM 728 C2 NAG B 3 7.333 19.637 8.339 1.00 35.66 C
HETATM 729 C3 NAG B 3 6.907 18.331 7.686 1.00 35.20 C
HETATM 730 C4 NAG B 3 5.422 18.072 8.014 1.00 33.37 C
HETATM 731 C5 NAG B 3 4.527 19.299 7.696 1.00 33.27 C
HETATM 732 C6 NAG B 3 3.173 19.102 8.406 1.00 32.57 C
HETATM 733 C7 NAG B 3 9.448 20.235 9.622 0.00 40.22 C
HETATM 734 C8 NAG B 3 10.921 20.272 9.570 0.00 40.07 C
HETATM 735 N2 NAG B 3 8.744 19.920 8.013 1.00 37.41 N
HETATM 736 O3 NAG B 3 7.711 17.283 8.235 1.00 36.78 O
HETATM 737 O4 NAG B 3 4.923 16.987 7.202 1.00 29.35 O
HETATM 738 O5 NAG B 3 5.078 20.464 8.245 1.00 34.32 O
HETATM 739 O6 NAG B 3 3.314 19.212 9.787 1.00 31.85 O
HETATM 740 O7 NAG B 3 8.849 20.404 10.603 0.00 43.75 O
HETATM 741 O HOH 1 6.997 5.537 34.149 1.00 15.09 O
HETATM 742 O HOH 2 10.295 3.302 26.095 1.00 16.76 O
HETATM 743 O HOH 3 10.019 12.949 25.133 1.00 20.11 O
Expand Down