In [1]:
%load_ext lab_black
# python internal
import collections
import copy
import gc
from glob import glob
import h5py
import itertools
import os
import random
import re
import socket
import shutil
import subprocess
import sys

# conda/pip
import dask
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
from tqdm import tqdm

# special packages on the DIGS
import py3Dmol
import pymol
import pyrosetta

# notebook magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

print(os.getcwd())
print(socket.gethostname())

/mnt/home/cdemakis/pymatcher_test
dig120


In [3]:
import pymatcher as pm
from pyrosetta.distributed import requires_init
from pyrosetta.distributed.packed_pose.core import PackedPose


@requires_init
def wrapper(packed_pose_in: PackedPose, **kwargs):
    print(kwargs["-out_path"])
    os.chdir(kwargs["-out_path"])
    pm.matcher(**kwargs)
    return None

In [4]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
import logging
import pwd
from pyrosetta.distributed.cluster.core import PyRosettaCluster


print("run the following from your local terminal:")
print(
    f"ssh -L 8000:localhost:8787 {pwd.getpwuid(os.getuid()).pw_name}@{socket.gethostname()}"
)


def create_tasks(selected, options):
    for scaf in scafs:
        tasks = {
            "options": f"-match:scaffold_active_site_residues /home/cdemakis/pymatcher_test/{scaf}.pos",
        }
        tasks["extra_options"] = options
        tasks[
            "-s"
        ] = f"/home/hwyeh/denovo_luciferase/rifgen/BDZ_rifdock/scaffolds_5k/5kinp_0001.pdb"
        tasks["-lig_name"] = "dtz"
        tasks["-out_path"] = f"/home/cdemakis/pymatcher_test/{scaf}"
        yield tasks


logging.basicConfig(level=logging.INFO)
scafs = ["5kinp_0001_1", "5kinp_0001_2"]
options = {
    "-out:level 300",
    "-in:file:extra_res_fa /home/cdemakis/pymatcher_test/dtz.params",
    "-geometric_constraint_file /home/cdemakis/pymatcher_test/luc.cst",
    "-match::dynamic_grid_refinement true",
    "-match::enumerate_ligand_rotamers true",
    "-match::consolidate_matches true",
    "-match::output_matches_per_group 10",
    "-in:ignore_unrecognized_res",
    "-ex1",
    "-ex2",
    "-chemical:exclude_patches D_AA",
    "-match::euclid_bin_size 0.5",
    "-match::euler_bin_size 5.0",
}

output_path = os.path.join(os.getcwd())
client_opts = options
client_opts.add(
    "-run:constant_seed 1",
)
pyrosetta.distributed.init(" ".join(list(client_opts)))

if __name__ == "__main__":
    # configure SLURM cluster as a context manager
    with SLURMCluster(
        cores=1,
        processes=1,
        job_cpu=1,
        memory="8GB",
        queue="short",
        walltime="02:00:00",
        death_timeout=120,
        local_directory="$TMPDIR/dask",
        log_directory="/mnt/home/cdemakis/logs/slurm_logs",
        extra=["--lifetime", "2h", "--lifetime-stagger", "4m"],
    ) as cluster:
        print(cluster.job_script())
        # scale between 1-1020 workers,
        cluster.adapt(
            minimum=1,
            maximum=1,  # TODO
            wait_count=999,  # Number of consecutive times that a worker should be suggested for removal it is removed
            interval="5s",  # Time between checks
        )
        # setup a client to interact with the cluster as a context manager
        with Client(cluster) as client:
            print(client)
            PyRosettaCluster(
                tasks=create_tasks(scafs, options),
                client=client,
                scratch_dir=output_path,
                output_path=output_path,
                sha1=None,  # ignore git status for non-production runs
            ).distribute(protocols=[wrapper])

INFO:pyrosetta.rosetta:Found rosetta database at: /home/cdemakis/.conda/envs/from_phil/lib/python3.8/site-packages/pyrosetta/database; using it....
INFO:pyrosetta.rosetta:PyRosetta-4 2021 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release 2021.27+release.7ce64884a77d606b7b667c363527acc846541030 2021-07-09T18:10:05] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.


https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html
https://conda.io/activation

run the following from your local terminal:
ssh -L 8000:localhost:8787 cdemakis@dig36
#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /mnt/home/cdemakis/logs/slurm_logs/dask-worker-%J.err
#SBATCH -o /mnt/home/cdemakis/logs/slurm_logs/dask-worker-%J.out
#SBATCH -p short
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH -t 02:00:00

JOB_ID=${SLURM_JOB_ID%;*}

/home/cdemakis/.conda/envs/from_phil/bin/python -m distributed.cli.dask_worker tcp://172.16.131.66:37803 --nthreads 1 --memory-limit 7.45GiB --name name --nanny --death-timeout 120 --local-directory $TMPDIR/dask --lifetime 2h --lifetime-stagger 4m

<Client: 'tcp://172.16.131.66:37803' processes=0 threads=0, memory=0 B>


`conda env export --prefix /home/cdemakis/.conda/envs/from_phil > environment.yml`
to reproduce this simulation later.


In [4]:
for a in np.arange(1, 2):
    print(a)

1
