From addbf7d10e9e8da63b7e274a888fadaa03f7e032 Mon Sep 17 00:00:00 2001 From: Fazzie <1240419984@qq.com> Date: Mon, 22 Aug 2022 11:55:00 +0800 Subject: [PATCH] add inference script --- README.md | 16 ++++++++++++++++ inference.sh | 15 +++++++++++++++ inference_with_flow.sh | 15 +++++++++++++++ inference_with_workflow.py | 1 - 4 files changed, 46 insertions(+), 1 deletion(-) create mode 100755 inference.sh create mode 100755 inference_with_flow.sh diff --git a/README.md b/README.md index cc8e617b..eb44b29b 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,22 @@ python inference.py target.fasta data/pdb_mmcif/mmcif_files/ \ --hhsearch_binary_path `which hhsearch` \ --kalign_binary_path `which kalign` ``` +or run the script `./inference.sh`, you can change +```shell +./inference.sh +``` + +#### inference with data workflow +alphafold's data pre-processing takes a lot of time, so we speed up the data pre-process by [ray](https://docs.ray.io/en/latest/workflows/concepts.html) workflow, to run the intference with ray workflow, you should install the package by +```shell +pip install ray pyarrow +``` + +Than you can run by the script `./inference_with_workflow.sh` + +```shell +./inference_with_flow.sh +``` ## Performance Benchmark diff --git a/inference.sh b/inference.sh new file mode 100755 index 00000000..637b58b4 --- /dev/null +++ b/inference.sh @@ -0,0 +1,15 @@ +rm -rf alignments/ +rm -rf *.pdb +python inference.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \ + --output_dir ./ \ + --gpus 2 \ + --uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \ + --mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \ + --pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \ + --param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \ + --uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \ + --bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \ + --jackhmmer_binary_path `which jackhmmer` \ + --hhblits_binary_path `which hhblits` \ + --hhsearch_binary_path `which hhsearch` \ + --kalign_binary_path `which kalign` \ No newline at end of file diff --git a/inference_with_flow.sh b/inference_with_flow.sh new file mode 100755 index 00000000..b752b3a7 --- /dev/null +++ b/inference_with_flow.sh @@ -0,0 +1,15 @@ +rm -rf alignments/ +rm -rf *.pdb +python inference_with_workflow.py target.fasta /data/scratch/alphafold/alphafold/pdb_mmcif/mmcif_files \ + --output_dir ./ \ + --gpus 2 \ + --uniref90_database_path /data/scratch/alphafold/alphafold/uniref90/uniref90.fasta \ + --mgnify_database_path /data/scratch/alphafold/alphafold/mgnify/mgy_clusters_2018_12.fa \ + --pdb70_database_path /data/scratch/alphafold/alphafold/pdb70/pdb70 \ + --param_path /data/scratch/alphafold/alphafold/params/params_model_1.npz \ + --uniclust30_database_path /data/scratch/alphafold/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \ + --bfd_database_path /data/scratch/alphafold/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \ + --jackhmmer_binary_path `which jackhmmer` \ + --hhblits_binary_path `which hhblits` \ + --hhsearch_binary_path `which hhsearch` \ + --kalign_binary_path `which kalign` \ No newline at end of file diff --git a/inference_with_workflow.py b/inference_with_workflow.py index a27addc7..090cdba7 100644 --- a/inference_with_workflow.py +++ b/inference_with_workflow.py @@ -109,7 +109,6 @@ def inference_model(rank, world_size, result_q, batch, args): def main(args): - print("--------------- inference_with_workflow.py ---------------") config = model_config(args.model_name) template_featurizer = templates.TemplateHitFeaturizer(