From addbf7d10e9e8da63b7e274a888fadaa03f7e032 Mon Sep 17 00:00:00 2001
From: Fazzie <1240419984@qq.com>
Date: Mon, 22 Aug 2022 11:55:00 +0800
Subject: [PATCH] add inference script

---
 README.md                  | 16 ++++++++++++++++
 inference.sh               | 15 +++++++++++++++
 inference_with_flow.sh     | 15 +++++++++++++++
 inference_with_workflow.py |  1 -
 4 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100755 inference.sh
 create mode 100755 inference_with_flow.sh

diff --git a/README.md b/README.md
index cc8e617b..eb44b29b 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,22 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \
     --hhsearch_binary_path `which hhsearch` \
     --kalign_binary_path `which kalign`
 ```
+or run the script `./inference.sh`, you can change
+```shell
+./inference.sh
+```
+
+#### inference with data workflow
+alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package by
+```shell
+pip install ray pyarrow
+```
+
+Than you can run by the script `./inference_with_workflow.sh`
+
+```shell
+./inference_with_flow.sh
+```
 
 ## Performance Benchmark
 
diff --git a/inference.sh b/inference.sh
new file mode 100755
index 00000000..637b58b4
--- /dev/null
+++ b/inference.sh
@@ -0,0 +1,15 @@
+rm -rf alignments/
+rm -rf *.pdb
+python inference.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \
+    --output_dir ./ \
+    --gpus 2 \
+    --uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \
+    --mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \
+    --param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \
+    --uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --jackhmmer_binary_path `which jackhmmer` \
+    --hhblits_binary_path `which hhblits` \
+    --hhsearch_binary_path `which hhsearch` \
+    --kalign_binary_path `which kalign` 
\ No newline at end of file
diff --git a/inference_with_flow.sh b/inference_with_flow.sh
new file mode 100755
index 00000000..b752b3a7
--- /dev/null
+++ b/inference_with_flow.sh
@@ -0,0 +1,15 @@
+rm -rf alignments/
+rm -rf *.pdb
+python inference_with_workflow.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \
+    --output_dir ./ \
+    --gpus 2 \
+    --uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \
+    --mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \
+    --pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \
+    --param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \
+    --uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
+    --bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
+    --jackhmmer_binary_path `which jackhmmer` \
+    --hhblits_binary_path `which hhblits` \
+    --hhsearch_binary_path `which hhsearch` \
+    --kalign_binary_path `which kalign` 
\ No newline at end of file
diff --git a/inference_with_workflow.py b/inference_with_workflow.py
index a27addc7..090cdba7 100644
--- a/inference_with_workflow.py
+++ b/inference_with_workflow.py
@@ -109,7 +109,6 @@ def inference_model(rank, world_size, result_q, batch, args):
 
 
 def main(args):
-    print("--------------- inference_with_workflow.py ---------------")
     config = model_config(args.model_name)
 
     template_featurizer = templates.TemplateHitFeaturizer(