hpcaitech · Fazziekey · Aug 23, 2022 · Aug 22, 2022
diff --git a/README.md b/README.md
@@ -90,25 +90,35 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
     --jackhmmer_binary_path `which jackhmmer` \
     --hhblits_binary_path `which hhblits` \
     --hhsearch_binary_path `which hhsearch` \
-    --kalign_binary_path `which kalign`
+    --kalign_binary_path `which kalign`  
 ```
-or run the script `./inference.sh`, you can change
+or run the script `./inference.sh`, you can change the parameter in the script
 ```shell
 ./inference.sh
 ```
 
 #### inference with data workflow
-alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package by
+alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package and add parameter `--enable_workflow` to cmdline or shell script `./inference.sh`
 ```shell
 pip install ray pyarrow
 ```
-
-Than you can run by the script `./inference_with_workflow.sh`
-
 ```shell
-./inference_with_flow.sh
+python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
+    --output_dir ./ \
+    --gpus 2 \
+    --uniref90_database_path data/uniref90/uniref90.fasta \
+    --mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path data/pdb70/pdb70 \
+    --uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --jackhmmer_binary_path `which jackhmmer` \
+    --hhblits_binary_path `which hhblits` \
+    --hhsearch_binary_path `which hhsearch` \
+    --kalign_binary_path `which kalign`  \
+    --enable_workflow 
 ```
 
+
 ## Performance Benchmark
 
 We have included a performance benchmark script in `./benchmark`. You can benchmark the performance of Evoformer using different settings.

diff --git a/fastfold/workflow/factory/__init__.py b/fastfold/workflow/factory/__init__.py
@@ -2,6 +2,4 @@
 from .hhblits import HHBlitsFactory
 from .hhsearch import HHSearchFactory
 from .jackhmmer import JackHmmerFactory
-from .alphafold import AlphaFoldFactory
-from .amber_relax import AmberRelaxFactory
 from .hhfilter import HHfilterFactory
diff --git a/fastfold/workflow/factory/alphafold.py b/fastfold/workflow/factory/alphafold.py
diff --git a/fastfold/workflow/factory/amber_relax.py b/fastfold/workflow/factory/amber_relax.py
diff --git a/inference.py b/inference.py
@@ -31,6 +31,7 @@
 from fastfold.config import model_config
 from fastfold.model.fastnn import set_chunk_size
 from fastfold.data import data_pipeline, feature_pipeline, templates
+from fastfold.workflow.template import FastFoldDataWorkFlow
 from fastfold.utils import inject_fastnn
 from fastfold.utils.import_weights import import_jax_weights_
 from fastfold.utils.tensor_utils import tensor_tree_map
@@ -73,7 +74,7 @@ def add_data_args(parser: argparse.ArgumentParser):
     )
     parser.add_argument('--obsolete_pdbs_path', type=str, default=None)
     parser.add_argument('--release_dates_path', type=str, default=None)
-
+    parser.add_argument('--enable_workflow', default=False, action='store_true', help='run inference with ray workflow or not')
 
 def inference_model(rank, world_size, result_q, batch, args):
     os.environ['RANK'] = str(rank)
@@ -158,20 +159,37 @@ def main(args):
         if (args.use_precomputed_alignments is None):
             if not os.path.exists(local_alignment_dir):
                 os.makedirs(local_alignment_dir)
-
-            alignment_runner = data_pipeline.AlignmentRunner(
-                jackhmmer_binary_path=args.jackhmmer_binary_path,
-                hhblits_binary_path=args.hhblits_binary_path,
-                hhsearch_binary_path=args.hhsearch_binary_path,
-                uniref90_database_path=args.uniref90_database_path,
-                mgnify_database_path=args.mgnify_database_path,
-                bfd_database_path=args.bfd_database_path,
-                uniclust30_database_path=args.uniclust30_database_path,
-                pdb70_database_path=args.pdb70_database_path,
-                use_small_bfd=use_small_bfd,
-                no_cpus=args.cpus,
-            )
-            alignment_runner.run(fasta_path, local_alignment_dir)
+            if args.enable_workflow:
+                print("Running alignment with ray workflow...")
+                alignment_data_workflow_runner = FastFoldDataWorkFlow(
+                    jackhmmer_binary_path=args.jackhmmer_binary_path,
+                    hhblits_binary_path=args.hhblits_binary_path,
+                    hhsearch_binary_path=args.hhsearch_binary_path,
+                    uniref90_database_path=args.uniref90_database_path,
+                    mgnify_database_path=args.mgnify_database_path,
+                    bfd_database_path=args.bfd_database_path,
+                    uniclust30_database_path=args.uniclust30_database_path,
+                    pdb70_database_path=args.pdb70_database_path,
+                    use_small_bfd=use_small_bfd,
+                    no_cpus=args.cpus,
+                    )
+                t = time.perf_counter()
+                alignment_data_workflow_runner.run(fasta_path, output_dir=output_dir_base, alignment_dir=local_alignment_dir)
+                print(f"Alignment data workflow time: {time.perf_counter() - t}")
+            else:
+                alignment_runner = data_pipeline.AlignmentRunner(
+                    jackhmmer_binary_path=args.jackhmmer_binary_path,
+                    hhblits_binary_path=args.hhblits_binary_path,
+                    hhsearch_binary_path=args.hhsearch_binary_path,
+                    uniref90_database_path=args.uniref90_database_path,
+                    mgnify_database_path=args.mgnify_database_path,
+                    bfd_database_path=args.bfd_database_path,
+                    uniclust30_database_path=args.uniclust30_database_path,
+                    pdb70_database_path=args.pdb70_database_path,
+                    use_small_bfd=use_small_bfd,
+                    no_cpus=args.cpus,
+                )
+                alignment_runner.run(fasta_path, local_alignment_dir)
 
         feature_dict = data_processor.process_fasta(fasta_path=fasta_path,
                                                     alignment_dir=local_alignment_dir)

diff --git a/inference.sh b/inference.sh
@@ -1,15 +1,16 @@
 rm -rf alignments/
 rm -rf *.pdb
+
 python inference.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \
     --output_dir ./ \
     --gpus 2 \
-    --uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \
-    --mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \
-    --pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \
-    --param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \
-    --uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
-    --bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --uniref90_database_path data/uniref90/uniref90.fasta \
+    --mgnify_database_path data/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path data/pdb70/pdb70 \
+    --uniclust30_database_path data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
     --jackhmmer_binary_path `which jackhmmer` \
     --hhblits_binary_path `which hhblits` \
     --hhsearch_binary_path `which hhsearch` \
-    --kalign_binary_path `which kalign` 
+    --kalign_binary_path `which kalign`  \
+    # --enable_workflow 
diff --git a/inference_with_flow.sh b/inference_with_flow.sh