Skip to content

Commit

Permalink
Merge branch 'master' into alignio-maf
Browse files Browse the repository at this point in the history
  • Loading branch information
blaiseli committed Apr 3, 2015
2 parents 39cf674 + b31c789 commit 02120d1
Show file tree
Hide file tree
Showing 21 changed files with 6,417 additions and 127 deletions.
2 changes: 1 addition & 1 deletion Bio/PDB/Chain.py
Expand Up @@ -129,6 +129,6 @@ def get_residues(self):
yield r

def get_atoms(self):
for r in self:
for r in self.get_residues():
for a in r:
yield a
4 changes: 4 additions & 0 deletions Bio/PDB/MMCIF2Dict.py
Expand Up @@ -21,6 +21,8 @@ def __init__(self, filename):
tokens = self._tokenize(handle)
token = next(tokens)
self[token[0:5]] = token[5:]
i = 0
n = 0
for token in tokens:
if token == "loop_":
loop_flag = True
Expand All @@ -47,6 +49,8 @@ def __init__(self, filename):
self[key] = token
key = None

# Private methods

def _tokenize(self, handle):
for line in handle:
if line.startswith("#"):
Expand Down
76 changes: 53 additions & 23 deletions Bio/PDB/MMCIFParser.py
Expand Up @@ -10,21 +10,53 @@
from string import ascii_letters

import numpy
import warnings

from Bio._py3k import range

from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
from Bio.PDB.PDBExceptions import PDBConstructionWarning


class MMCIFParser(object):
"""Parse a PDB file and return a Structure object."""

def __init__(self, structure_builder=None, QUIET=False):
"""Create a PDBParser object.
The PDB parser call a number of standard methods in an aggregated
StructureBuilder object. Normally this object is instanciated by the
MMCIParser object itself, but if the user provides his/her own
StructureBuilder object, the latter is used instead.
Arguments:
- structure_builder - an optional user implemented StructureBuilder class.
- QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
These warnings might be indicative of problems in the PDB file!
"""
if structure_builder is not None:
self._structure_builder = structure_builder
else:
self._structure_builder = StructureBuilder()
# self.header = None
# self.trailer = None
self.line_counter = 0
self.build_structure = None
self.QUIET = bool(QUIET)

# Public methods

def get_structure(self, structure_id, filename):
with warnings.catch_warnings():
if self.QUIET:
warnings.filterwarnings("ignore", category=PDBConstructionWarning)
self._mmcif_dict = MMCIF2Dict(filename)
self._structure_builder = StructureBuilder()
self._build_structure(structure_id)
return self._structure_builder.get_structure()

# Private methods

def _build_structure(self, structure_id):
mmcif_dict = self._mmcif_dict
atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
Expand All @@ -39,6 +71,7 @@ def _build_structure(self, structure_id):
y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
alt_list = mmcif_dict["_atom_site.label_alt_id"]
icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list = mmcif_dict["_atom_site.occupancy"]
fieldname_list = mmcif_dict["_atom_site.group_PDB"]
Expand Down Expand Up @@ -75,9 +108,15 @@ def _build_structure(self, structure_id):
structure_builder.init_seg(" ")
# Historically, Biopython PDB parser uses model_id to mean array index
# so serial_id means the Model ID specified in the file
current_model_id = 0
current_model_id = -1
current_serial_id = 0
for i in range(0, len(atom_id_list)):

# set the line_counter for 'ATOM' lines only and not
# as a global line counter found in the PDBParser()
# this number should match the '_atom_site.id' index in the MMCIF
structure_builder.set_line_counter(i)

x = x_list[i]
y = y_list[i]
z = z_list[i]
Expand All @@ -87,6 +126,9 @@ def _build_structure(self, structure_id):
if altloc == ".":
altloc = " "
resseq = seq_id_list[i]
icode = icode_list[i]
if icode == "?":
icode = " "
name = atom_id_list[i]
# occupancy & B factor
try:
Expand All @@ -108,23 +150,23 @@ def _build_structure(self, structure_id):
if current_serial_id != serial_id:
# if serial changes, update it and start new model
current_serial_id = serial_id
structure_builder.init_model(current_model_id, current_serial_id)
current_model_id += 1
structure_builder.init_model(current_model_id, current_serial_id)
current_chain_id = None
current_residue_id = None
else:
# no explicit model column; initialize single model
structure_builder.init_model(current_model_id)

if current_chain_id != chainid:
current_chain_id = chainid
structure_builder.init_chain(current_chain_id)

if current_residue_id != resseq:
current_residue_id = resseq
icode, int_resseq = self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
elif current_residue_id != resseq:
current_residue_id = resseq
icode, int_resseq = self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
int_resseq = int(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq, icode)

coord = numpy.array((x, y, z), 'f')
element = element_list[i] if element_list else None
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
Expand Down Expand Up @@ -152,18 +194,6 @@ def _build_structure(self, structure_id):
except:
pass # no cell found, so just ignore

def _get_icode(self, resseq):
"""Tries to return the icode. In MMCIF files this is just part of
resseq! In PDB files, it's a separate field."""
last_resseq_char = resseq[-1]
if last_resseq_char in ascii_letters:
icode = last_resseq_char
int_resseq = int(resseq[0:-1])
else:
icode = " "
int_resseq = int(resseq)
return icode, int_resseq


if __name__ == "__main__":
import sys
Expand Down
6 changes: 5 additions & 1 deletion Bio/PDB/Model.py
Expand Up @@ -58,8 +58,12 @@ def __repr__(self):

# Public

def get_residues(self):
def get_chains(self):
for c in self:
yield c

def get_residues(self):
for c in self.get_chains():
for r in c:
yield r

Expand Down
4 changes: 4 additions & 0 deletions Bio/PDB/Residue.py
Expand Up @@ -112,6 +112,10 @@ def get_unpacked_list(self):
def get_segid(self):
return self.segid

def get_atom(self):
for a in self:
yield a


class DisorderedResidue(DisorderedEntityWrapper):
"""
Expand Down
6 changes: 5 additions & 1 deletion Bio/PDB/Structure.py
Expand Up @@ -37,8 +37,12 @@ def _sort(self, m1, m2):

# Public

def get_chains(self):
def get_models(self):
for m in self:
yield m

def get_chains(self):
for m in self.get_models():
for c in m:
yield c

Expand Down
17 changes: 8 additions & 9 deletions Bio/PDB/StructureBuilder.py
Expand Up @@ -156,9 +156,8 @@ def init_residue(self, resname, field, resseq, icode):
disordered_residue.disordered_add(new_residue)
self.residue = disordered_residue
return
residue = Residue(res_id, resname, self.segid)
self.chain.add(residue)
self.residue = residue
self.residue = Residue(res_id, resname, self.segid)
self.chain.add(self.residue)

def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
serial_number=None, element=None):
Expand Down Expand Up @@ -196,15 +195,15 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
% (duplicate_fullname, fullname,
self.line_counter),
PDBConstructionWarning)
atom = self.atom = Atom(name, coord, b_factor, occupancy, altloc,
fullname, serial_number, element)
self.atom = Atom(name, coord, b_factor, occupancy, altloc,
fullname, serial_number, element)
if altloc != " ":
# The atom is disordered
if residue.has_id(name):
# Residue already contains this atom
duplicate_atom = residue[name]
if duplicate_atom.is_disordered() == 2:
duplicate_atom.disordered_add(atom)
duplicate_atom.disordered_add(self.atom)
else:
# This is an error in the PDB file:
# a disordered atom is found with a blank altloc
Expand All @@ -214,7 +213,7 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
residue.detach_child(name)
disordered_atom = DisorderedAtom(name)
residue.add(disordered_atom)
disordered_atom.disordered_add(atom)
disordered_atom.disordered_add(self.atom)
disordered_atom.disordered_add(duplicate_atom)
residue.flag_disordered()
warnings.warn("WARNING: disordered atom found "
Expand All @@ -228,11 +227,11 @@ def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
residue.add(disordered_atom)
# Add the real atom to the disordered atom, and the
# disordered atom to the residue
disordered_atom.disordered_add(atom)
disordered_atom.disordered_add(self.atom)
residue.flag_disordered()
else:
# The atom is not disordered
residue.add(atom)
residue.add(self.atom)

def set_anisou(self, anisou_array):
"Set anisotropic B factor of current Atom."
Expand Down
4 changes: 2 additions & 2 deletions Bio/PopGen/SimCoal/Controller.py
Expand Up @@ -171,11 +171,11 @@ def __init__(self, fastsimcoal_dir=None, cmd='fastsimcoal', **kwargs):


class FastSimCoalController(object):
def __init__(self, fastsimcoal_dir=None, bin_name="fsc251"):
def __init__(self, fastsimcoal_dir=None, bin_name="fsc252"):
"""Initializes the controller.
fastsimcoal_dir is the directory where fastsimcoal is.
By default the binary should be called fsc251.
By default the binary should be called fsc252.
bin_name specifies a different name for the binary.
The initializer checks for existence and executability of binaries
Expand Down
9 changes: 4 additions & 5 deletions Bio/SeqFeature.py
@@ -1,6 +1,6 @@
# Copyright 2000-2003 Jeff Chang.
# Copyright 2001-2008 Brad Chapman.
# Copyright 2005-2012 by Peter Cock.
# Copyright 2005-2015 by Peter Cock.
# Copyright 2006-2009 Michiel de Hoon.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
Expand Down Expand Up @@ -34,10 +34,9 @@
Specify locations of a feature on a Sequence
--------------------------------------------
This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in
much the same way as Biocorba. This has the advantages of allowing us
to handle fuzzy stuff in case anyone needs it, and also be compatible
with Biocorba.
This aims to handle, in Ewan Birney's words, 'the dreaded fuzziness issue'.
This has the advantages of allowing us to handle fuzzy stuff in case anyone
needs it, and also be compatible with BioPerl etc and BioSQL.
classes:
Expand Down

0 comments on commit 02120d1

Please sign in to comment.