Skip to content

Commit

Permalink
Merge pull request #206 from CyrusBiotechnology/custom-template
Browse files Browse the repository at this point in the history
adding a script for threading a sequence onto a structure
  • Loading branch information
gahdritz committed Sep 1, 2022
2 parents 9dd9cea + 25feff5 commit 8239749
Show file tree
Hide file tree
Showing 9 changed files with 532 additions and 311 deletions.
36 changes: 36 additions & 0 deletions openfold/data/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np

from openfold.data import templates, parsers, mmcif_parsing
from openfold.data.templates import get_custom_template_features
from openfold.data.tools import jackhmmer, hhblits, hhsearch
from openfold.data.tools.utils import to_date
from openfold.np import residue_constants, protein
Expand Down Expand Up @@ -259,6 +260,41 @@ def make_msa_features(
return features


def make_sequence_features_with_custom_template(
sequence: str,
mmcif_path: str,
pdb_id: str,
chain_id: str,
kalign_binary_path: str) -> FeatureDict:
"""
process a single fasta file using features derived from a single template rather than an alignment
"""
num_res = len(sequence)

sequence_features = make_sequence_features(
sequence=sequence,
description=pdb_id,
num_res=num_res,
)

msa_data = [[sequence]]
deletion_matrix = [[[0 for _ in sequence]]]

msa_features = make_msa_features(msa_data, deletion_matrix)
template_features = get_custom_template_features(
mmcif_path=mmcif_path,
query_sequence=sequence,
pdb_id=pdb_id,
chain_id=chain_id,
kalign_binary_path=kalign_binary_path
)

return {
**sequence_features,
**msa_features,
**template_features.features
}

class AlignmentRunner:
"""Runs alignment tools and saves the results"""
def __init__(
Expand Down
50 changes: 50 additions & 0 deletions openfold/data/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,56 @@ def _process_single_hit(
return SingleHitResult(features=None, error=error, warning=None)


def get_custom_template_features(
mmcif_path: str,
query_sequence: str,
pdb_id: str,
chain_id: str,
kalign_binary_path: str):

with open(mmcif_path, "r") as mmcif_path:
cif_string = mmcif_path.read()

mmcif_parse_result = mmcif_parsing.parse(
file_id=pdb_id, mmcif_string=cif_string
)
template_sequence = mmcif_parse_result.mmcif_object.chain_to_seqres[chain_id]


mapping = {x:x for x, _ in enumerate(query_sequence)}


features, warnings = _extract_template_features(
mmcif_object=mmcif_parse_result.mmcif_object,
pdb_id=pdb_id,
mapping=mapping,
template_sequence=template_sequence,
query_sequence=query_sequence,
template_chain_id=chain_id,
kalign_binary_path=kalign_binary_path,
_zero_center_positions=True
)
features["template_sum_probs"] = [1.0]

# TODO: clean up this logic
template_features = {}
for template_feature_name in TEMPLATE_FEATURES:
template_features[template_feature_name] = []

for k in template_features:
template_features[k].append(features[k])

for name in template_features:
template_features[name] = np.stack(
template_features[name], axis=0
).astype(TEMPLATE_FEATURES[name])

return TemplateSearchResult(
features=template_features, errors=None, warnings=warnings
)



@dataclasses.dataclass(frozen=True)
class TemplateSearchResult:
features: Mapping[str, Any]
Expand Down
16 changes: 0 additions & 16 deletions openfold/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +0,0 @@
import os
import glob
import importlib as importlib

_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
__all__ = [
os.path.basename(f)[:-3]
for f in _files
if os.path.isfile(f) and not f.endswith("__init__.py")
]
_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
for _m in _modules:
globals()[_m[0]] = _m[1]

# Avoid needlessly cluttering the global namespace
del _files, _m, _modules
16 changes: 0 additions & 16 deletions openfold/np/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +0,0 @@
import os
import glob
import importlib as importlib

_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
__all__ = [
os.path.basename(f)[:-3]
for f in _files
if os.path.isfile(f) and not f.endswith("__init__.py")
]
_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
for _m in _modules:
globals()[_m[0]] = _m[1]

# Avoid needlessly cluttering the global namespace
del _files, _m, _modules
16 changes: 0 additions & 16 deletions openfold/np/relax/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +0,0 @@
import os
import glob
import importlib as importlib

_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
__all__ = [
os.path.basename(f)[:-3]
for f in _files
if os.path.isfile(f) and not f.endswith("__init__.py")
]
_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
for _m in _modules:
globals()[_m[0]] = _m[1]

# Avoid needlessly cluttering the global namespace
del _files, _m, _modules
18 changes: 0 additions & 18 deletions openfold/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +0,0 @@
import os
import glob
import importlib as importlib

from . import kernel

_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
__all__ = [
os.path.basename(f)[:-3]
for f in _files
if os.path.isfile(f) and not f.endswith("__init__.py")
] + ["kernel"]
_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
for _m in _modules:
globals()[_m[0]] = _m[1]

# Avoid needlessly cluttering the global namespace
del _files, _m, _modules

0 comments on commit 8239749

Please sign in to comment.