Skip to content

Commit

Permalink
Merge pull request #404 from haddocking/numbering_fix
Browse files Browse the repository at this point in the history
Bugfix for numbering reference between model and receptor
  • Loading branch information
mgiulini committed Apr 14, 2022
2 parents c80475f + af0c2fa commit 2143dfe
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 41 deletions.
91 changes: 51 additions & 40 deletions src/haddock/modules/analysis/caprieval/capri.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def __init__(
reference = str(reference)
model = model_list[0].rel_path
align_func = get_align(aln_method, **params)
self.numbering_dic = align_func(reference, model, path)
if not self.numbering_dic:
self.model2ref_numbering = align_func(reference, model, path)
if not self.model2ref_numbering:
raise CAPRIError("Could not align reference and model")

def irmsd(self, cutoff=5.0):
Expand Down Expand Up @@ -724,18 +724,16 @@ def load_coords(self, pdb_f, filter_resdic=None, match=False):
coords = np.asarray([x, y, z])

if match:
# This will match the number of the MODEL with the
# number of the REFERENCE
try:
resnum = self.numbering_dic[chain][resnum]
resnum = self.model2ref_numbering[chain][resnum]
except KeyError:
# this residue is not matched, and so it should
# not be considered
# self.log(
# f"WARNING: {chain}.{resnum}.{atom_name}"
# " was not matched!"
# )
# this residue is not matched, it exists in
# the MODEL but not in the REFERENCE so it
# should not be considered
continue

# identifier = f"{chain}.{resnum}.{atom_name}"
identifier = (chain, resnum, atom_name)

if atom_name not in self.atoms[resname]:
Expand Down Expand Up @@ -1009,73 +1007,86 @@ def align_strct(reference, model, output_path, lovoalign_exec=None):

def align_seq(reference, model, output_path):
"""Sequence align and get the numbering relationship."""
seqdic_a = pdb2fastadic(reference)
seqdic_b = pdb2fastadic(model)
seqdic_ref = pdb2fastadic(reference)
seqdic_model = pdb2fastadic(model)

if seqdic_a.keys() != seqdic_b.keys():
if seqdic_ref.keys() != seqdic_model.keys():
# TODO: Implement chain-matching here
return False

align_dic = {}
for a, b in zip(seqdic_a, seqdic_b):
for ref_chain, model_chain in zip(seqdic_ref, seqdic_model):

align_dic[a] = {}
assert ref_chain == model_chain

seq_a = Seq("".join(seqdic_a[a].values()))
seq_b = Seq("".join(seqdic_b[b].values()))
align_dic[ref_chain] = {}

seq_ref = Seq("".join(seqdic_ref[ref_chain].values()))
seq_model = Seq("".join(seqdic_model[model_chain].values()))

aligner = Align.PairwiseAligner()
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
alns = aligner.align(seq_a, seq_b)
alns = aligner.align(seq_ref, seq_model)
top_aln = alns[0]

aln_fname = Path(output_path, f"blosum62_{a}.aln")
aln_fname = Path(output_path, f"blosum62_{ref_chain}.aln")
log.debug(f"Writing alignment to {aln_fname.name}")
with open(aln_fname, "w") as fh:
fh.write(str(top_aln))
aligned_seg_a, aligned_seg_b = top_aln.aligned
aligned_ref_segment, aligned_model_segment = top_aln.aligned

# this should always be true
assert len(aligned_seg_a) == len(aligned_seg_b)
assert len(aligned_ref_segment) == len(aligned_model_segment)

identity = (
str(top_aln).count("|") / float(min(len(seq_a), len(seq_b)))
str(top_aln).count("|") / float(min(len(seq_ref), len(seq_model)))
) * 100

if not any(e for e in top_aln.aligned):
# No alignment!
log.warning(
f"No alignment for chain {a} is it protein/dna? "
f"No alignment for chain {ref_chain} is it protein/dna? "
"Matching sequentially"
)
if all("X" in s for s in seq_a) and all("X" in s for s in seq_b):
if all(
"X" in s for s in seq_ref) and all(
"X" in s for s in seq_model):
# this sequence contains only ligands, do it manually
if len(seq_a) != len(seq_b):
if len(seq_ref) != len(seq_model):
# we cannot handle this
raise f"Cannot align chain {b}"
for res_a, res_b in zip(seqdic_a[a], seqdic_b[b]):
align_dic[a].update({res_a: res_b})
raise f"Cannot align chain {model_chain}"
for ref_res, model_res in zip(
seqdic_ref[ref_chain],
seqdic_model[model_chain]):

align_dic[ref_chain].update({model_res: ref_res})
else:
if identity <= 40.0:
# Identity is very low
log.warning(
f"Sequence identity of chain {a} is {identity:.2f}%,"
" please check the results carefully"
)
f"Sequence identity of chain {ref_chain} is "
f"{identity:.2f}%, please check the results carefully")
log.warning(
"Please use alignment_method = \"structure\" instead")
else:
log.info(f"Sequence identity of chain {a} is {identity:.2f}%")
for seg_a, seg_b in zip(aligned_seg_a, aligned_seg_b):
start_a, end_a = seg_a
start_b, end_b = seg_b
log.info(
f"Sequence identity of chain {ref_chain} is "
f"{identity:.2f}%")
for ref_segment, model_segment in zip(
aligned_ref_segment, aligned_model_segment):

reslist_a = list(seqdic_a[a].keys())[start_a:end_a]
reslist_b = list(seqdic_b[b].keys())[start_b:end_b]
start_ref_segment, end_ref_segment = ref_segment
start_model_segment, end_model_segment = model_segment

reslist_ref = list(seqdic_ref[ref_chain].keys())[
start_ref_segment:end_ref_segment]

reslist_model = list(seqdic_model[model_chain].keys())[
start_model_segment:end_model_segment]

for _ref_res, _model_res in zip(reslist_ref, reslist_model):
align_dic[ref_chain].update({_model_res: _ref_res})

align_dic[a].update(
dict((i, j) for i, j in zip(reslist_a, reslist_b))
)
izone_fname = Path(output_path, "blosum62.izone")
log.debug(f"Saving .izone to {izone_fname.name}")
dump_as_izone(izone_fname, align_dic)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_module_caprieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ def test_align_seq():
with tempfile.TemporaryDirectory() as tmpdirname:

observed_numb_dic = align_seq(ref, mod, tmpdirname)
expected_numb_dic = {"B": {1: 101, 2: 102, 3: 110, 5: 112}}
expected_numb_dic = {"B": {101: 1, 102: 2, 110: 3, 112: 5}}

assert observed_numb_dic == expected_numb_dic

Expand Down

0 comments on commit 2143dfe

Please sign in to comment.