Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,35 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign`
--kalign_binary_path `which kalign`
```
or run the script `./inference.sh`, you can change
or run the script `./inference.sh`, you can change the parameter in the script
```shell
./inference.sh
```

#### inference with data workflow
alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package by
alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package and add parameter `--enable_workflow` to cmdline or shell script `./inference.sh`
```shell
pip install ray pyarrow
```

Than you can run by the script `./inference_with_workflow.sh`

```shell
./inference_with_flow.sh
python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
--output_dir ./ \
--gpus 2 \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign` \
--enable_workflow
```


## Performance Benchmark

We have included a performance benchmark script in `./benchmark`. You can benchmark the performance of Evoformer using different settings.
Expand Down
2 changes: 0 additions & 2 deletions fastfold/workflow/factory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@
from .hhblits import HHBlitsFactory
from .hhsearch import HHSearchFactory
from .jackhmmer import JackHmmerFactory
from .alphafold import AlphaFoldFactory
from .amber_relax import AmberRelaxFactory
from .hhfilter import HHfilterFactory
75 changes: 0 additions & 75 deletions fastfold/workflow/factory/alphafold.py

This file was deleted.

36 changes: 0 additions & 36 deletions fastfold/workflow/factory/amber_relax.py

This file was deleted.

48 changes: 33 additions & 15 deletions inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from fastfold.config import model_config
from fastfold.model.fastnn import set_chunk_size
from fastfold.data import data_pipeline, feature_pipeline, templates
from fastfold.workflow.template import FastFoldDataWorkFlow
from fastfold.utils import inject_fastnn
from fastfold.utils.import_weights import import_jax_weights_
from fastfold.utils.tensor_utils import tensor_tree_map
Expand Down Expand Up @@ -73,7 +74,7 @@ def add_data_args(parser: argparse.ArgumentParser):
)
parser.add_argument('--obsolete_pdbs_path', type=str, default=None)
parser.add_argument('--release_dates_path', type=str, default=None)

parser.add_argument('--enable_workflow', default=False, action='store_true', help='run inference with ray workflow or not')

def inference_model(rank, world_size, result_q, batch, args):
os.environ['RANK'] = str(rank)
Expand Down Expand Up @@ -158,20 +159,37 @@ def main(args):
if (args.use_precomputed_alignments is None):
if not os.path.exists(local_alignment_dir):
os.makedirs(local_alignment_dir)

alignment_runner = data_pipeline.AlignmentRunner(
jackhmmer_binary_path=args.jackhmmer_binary_path,
hhblits_binary_path=args.hhblits_binary_path,
hhsearch_binary_path=args.hhsearch_binary_path,
uniref90_database_path=args.uniref90_database_path,
mgnify_database_path=args.mgnify_database_path,
bfd_database_path=args.bfd_database_path,
uniclust30_database_path=args.uniclust30_database_path,
pdb70_database_path=args.pdb70_database_path,
use_small_bfd=use_small_bfd,
no_cpus=args.cpus,
)
alignment_runner.run(fasta_path, local_alignment_dir)
if args.enable_workflow:
print("Running alignment with ray workflow...")
alignment_data_workflow_runner = FastFoldDataWorkFlow(
jackhmmer_binary_path=args.jackhmmer_binary_path,
hhblits_binary_path=args.hhblits_binary_path,
hhsearch_binary_path=args.hhsearch_binary_path,
uniref90_database_path=args.uniref90_database_path,
mgnify_database_path=args.mgnify_database_path,
bfd_database_path=args.bfd_database_path,
uniclust30_database_path=args.uniclust30_database_path,
pdb70_database_path=args.pdb70_database_path,
use_small_bfd=use_small_bfd,
no_cpus=args.cpus,
)
t = time.perf_counter()
alignment_data_workflow_runner.run(fasta_path, output_dir=output_dir_base, alignment_dir=local_alignment_dir)
print(f"Alignment data workflow time: {time.perf_counter() - t}")
else:
alignment_runner = data_pipeline.AlignmentRunner(
jackhmmer_binary_path=args.jackhmmer_binary_path,
hhblits_binary_path=args.hhblits_binary_path,
hhsearch_binary_path=args.hhsearch_binary_path,
uniref90_database_path=args.uniref90_database_path,
mgnify_database_path=args.mgnify_database_path,
bfd_database_path=args.bfd_database_path,
uniclust30_database_path=args.uniclust30_database_path,
pdb70_database_path=args.pdb70_database_path,
use_small_bfd=use_small_bfd,
no_cpus=args.cpus,
)
alignment_runner.run(fasta_path, local_alignment_dir)

feature_dict = data_processor.process_fasta(fasta_path=fasta_path,
alignment_dir=local_alignment_dir)
Expand Down
15 changes: 8 additions & 7 deletions inference.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
rm -rf alignments/
rm -rf *.pdb

python inference.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \
--output_dir ./ \
--gpus 2 \
--uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \
--mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \
--param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \
--uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--uniref90_database_path data/uniref90/uniref90.fasta \
--mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
--pdb70_database_path data/pdb70/pdb70 \
--uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
--bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
--jackhmmer_binary_path `which jackhmmer` \
--hhblits_binary_path `which hhblits` \
--hhsearch_binary_path `which hhsearch` \
--kalign_binary_path `which kalign`
--kalign_binary_path `which kalign` \
# --enable_workflow
15 changes: 0 additions & 15 deletions inference_with_flow.sh

This file was deleted.

Loading