# Run workflow using API

In [1]:
from pathlib import Path

from rich import print as pprint

In [2]:
import logging

logging.basicConfig(level=logging.WARNING)
# Set to WARNING to see only warnings
# Set to INFO to see sparql queries
# Set to DEBUG to see raw results

## Setup session directory

In this directory files like PDB files are stored and a DuckDB database for meta data.

In [3]:
session_dir = Path("session1")
session_dir

PosixPath('session1')

## Search Uniprot for structures

In [4]:
from protein_detective.uniprot import Query
from protein_detective.workflow import search_structures_in_uniprot

In [5]:
query = Query(
    taxon_id="9606",
    reviewed=True,
    subcellular_location_uniprot="nucleus",
    subcellular_location_go="GO:0005634",  # Cellular component - Nucleus
    molecular_function_go="GO:0003677",  # Molecular function - DNA binding
)

In [6]:
nr_uniprot, nr_pdbe, nr_af = search_structures_in_uniprot(query, session_dir, limit=80)
nr_uniprot, nr_pdbe, nr_af



(55, 13, 55)

Use [database queries](#query-session-database) to see what was found.

## Fetch structures from PDBe and Alphafold found in Uniprot


In [4]:
from protein_detective.workflow import retrieve_structures

In [5]:
download_path, nr_pdbs, nr_alphafolds = retrieve_structures(session_dir)
download_path, nr_pdbs, nr_alphafolds

Downloading PDBe files: 100%|██████████| 13/13 [00:00<00:00, 54635.22it/s]
Fetching Alphafold summaries: 100%|██████████| 55/55 [00:02<00:00, 25.34it/s]
Downloading AlphaFold files: 100%|██████████| 110/110 [00:00<00:00, 66672.46it/s]


(PosixPath('session1/downloads'), 13, 55)

## Filter out AlphafoldDB structures with low confidence

And write PDB files without those low confidence residues to new directory.

In [12]:
from protein_detective.alphafold.density import DensityFilterQuery
from protein_detective.workflow import density_filter

In [13]:
dquery = DensityFilterQuery(
    confidence=70.0,
    min_threshold=100,
    max_threshold=500,
)

In [14]:
result = density_filter(session_dir, dquery)
pprint(result)

DensityFilterSessionResult(density_filtered_dir=PosixPath('session1/density_filtered'),
                           nr_kept=24,
                           nr_discarded=31)


## Prune PDBe files
Prune the PDB files to only keep the first chain of the found Uniprot entries and rename that chain to A.

In [1]:
from protein_detective.workflow import prune_pdbs

In [7]:
(single_chain_dir, nr_passed) = prune_pdbs(session_dir)
(single_chain_dir, nr_passed)

Saving single chain PDB files from PDBe: 100%|██████████| 13/13 [00:02<00:00,  5.76it/s]


(PosixPath('session1/single_chain'), 13)

## Run powerfit

In [4]:
from protein_detective.powerfit.options import PowerfitOptions
from protein_detective.workflow import powerfit_commands

In [5]:
options = PowerfitOptions(
    target=Path("../../powerfit-tutorial/ribosome-KsgA.map"),
    resolution=13,
    angle=20,
    laplace=True,
)

In [6]:
commands, powerfit_run_id = powerfit_commands(session_dir, options)

In [7]:
powerfit_run_id

10

In [8]:
cwd = str(Path.cwd())
rel_commands = [c.replace(cwd, ".") for c in commands]
pprint(rel_commands)

These commands should be run on a cluster. Here we will just run 2 of them.

In [9]:
!{rel_commands[6]}

Target file read from:                                                          
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/powerfit/10/[0m
[95mribosome-KsgA.map[0m                                                               
Target resolution: [1;36m13.00[0m                                                        
Initial shape of density: [1;36m128[0m [1;36m128[0m [1;36m128[0m                                           
Shape after trimming: [1;36m60[0m [1;36m73[0m [1;36m67[0m                                                  
Shape after extending: [1;36m60[0m [1;36m75[0m [1;36m70[0m                                                 
Template file read from:                                                        
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/single_chain[0m
[35m/[0m[95mA8MT69_pdb4e45.ent_B2A.pdb[0m                                                     
Reading in rotations.              

In [10]:
!{rel_commands[0]}

Target file read from:                                                          
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/powerfit/10/[0m
[95mribosome-KsgA.map[0m                                                               
Target resolution: [1;36m13.00[0m                                                        
Initial shape of density: [1;36m128[0m [1;36m128[0m [1;36m128[0m                                           
Shape after trimming: [1;36m60[0m [1;36m73[0m [1;36m67[0m                                                  
Shape after extending: [1;36m60[0m [1;36m75[0m [1;36m70[0m                                                 
Template file read from:                                                        
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/single_chain[0m
[35m/[0m[95mA8MT69_pdb4ne5.ent_B2A.pdb[0m                                                     
Reading in rotations.              

In [12]:
!{rel_commands[-1]}

Target file read from:                                                          
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/powerfit/10/[0m
[95mribosome-KsgA.map[0m                                                               
Target resolution: [1;36m13.00[0m                                                        
Initial shape of density: [1;36m128[0m [1;36m128[0m [1;36m128[0m                                           
Shape after trimming: [1;36m60[0m [1;36m73[0m [1;36m67[0m                                                  
Shape after extending: [1;36m60[0m [1;36m75[0m [1;36m70[0m                                                 
Template file read from:                                                        
[35m/home/verhoes/git/protein-detective/protein-detective/docs/session1/density_filt[0m
[35mered/[0m[95mAF-A8MT65-F1-model_v4.pdb[0m                                                  
Reading in rotations.              

Once all powerfit jobs are done the results can be read into session database.
Then the best structure fit can be reported.

In [4]:
from protein_detective.workflow import powerfit_report

In [5]:
solutions = powerfit_report(session_dir)

In [6]:
len(solutions)

5238

In [8]:
pprint(solutions[0:5])