Skip to content

Commit

Permalink
Merge version 1.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
hfswetton committed May 10, 2024
1 parent 5b30eb1 commit 226969f
Show file tree
Hide file tree
Showing 11 changed files with 1,074 additions and 164 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# Demo output
correlations/
correlations.zip
correlations_*/
correlations_*.zip

### VisualStudioCode template
.vscode/*
Expand Down
1 change: 1 addition & 0 deletions .idea/PDBcor.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

661 changes: 661 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pdbcor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from . import clustering
from .cli import cli
from .cli import CLI
from .correlation_extraction import CorrelationExtraction

__all__ = ["CorrelationExtraction", "cli", "clustering"]
__all__ = ["CorrelationExtraction", "CLI", "clustering"]
4 changes: 2 additions & 2 deletions pdbcor/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from . import cli
from . import CLI

cli()
CLI.run()
207 changes: 138 additions & 69 deletions pdbcor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,147 @@
import json
import os

from .console import console
from .correlation_extraction import CorrelationExtraction


def cli():
class CLI:
"""Provide a commandline interface to CorrelationExtraction for use as a standalone program."""
parser = argparse.ArgumentParser(
description="Correlation extraction from multistate protein bundles"
)
parser.add_argument("bundle", type=str, help="protein bundle file path")
parser.add_argument(
"--format",
type=str,
default="",
help="Input file format (or leave blank to determine from file extension)",
choices=["PDB", "mmCIF"],
)
parser.add_argument("--nstates", type=int, default=2, help="number of states")
parser.add_argument(
"--graphics", type=bool, default=True, help="generate graphical output"
)
parser.add_argument("--mode", type=str, default="backbone", help="correlation mode")
parser.add_argument(
"--therm_fluct",
type=float,
default=0.5,
help="Thermal fluctuation of distances in the protein bundle",
)
parser.add_argument(
"--therm_iter", type=int, default=5, help="Number of thermal simulations"
)
parser.add_argument("--loop_start", type=int, default=-1, help="Start of the loop")
parser.add_argument("--loop_end", type=int, default=-1, help="End of the loop")
args = parser.parse_args()

# create correlations folder
cor_path = os.path.join(os.path.dirname(args.bundle), "correlations")
os.makedirs(cor_path, exist_ok=True)

# write parameters of the correlation extraction
args_dict = vars(args)
args_path = os.path.join(cor_path, "args.json")
with open(args_path, "w") as outfile:
json.dump(args_dict, outfile)

# correlation mode
if args.mode == "backbone":
modes = ["backbone"]
elif args.mode == "sidechain":
modes = ["sidechain"]
elif args.mode == "combined":
modes = ["combined"]
elif args.mode == "full":
modes = ["backbone", "sidechain", "combined"]
else:
modes = []
parser.error("Mode has to be either backbone, sidechain, combined or full")

for mode in modes:
print(
"###############################################################################\n"
f"############################ {mode.upper()} CORRELATIONS ########################\n"
"###############################################################################"

def __init__(self, *args):
"""
Parse current CLI arguments and save to disk, then create list of `CorrelationExtraction` objects.
If an optional list of arguments is passed, these will be used instead of the default (`sys.argv`).
"""
parser = self.new_arg_parser()
if len(args) > 0:
self.args = parser.parse_args(args)
else:
self.args = parser.parse_args()

console.set_quiet(quiet=self.args.quiet)

self.modes = (
["backbone", "sidechain", "combined"]
if self.args.mode == "full"
else [self.args.mode]
)

self.extractors = [
CorrelationExtraction(
self.args.bundle,
input_file_format=(
self.args.format if len(self.args.format) > 0 else None
),
output_directory=self.args.output,
mode=mode,
nstates=self.args.num_states,
therm_fluct=self.args.therm_fluct,
therm_iter=self.args.therm_iter,
loop_start=self.args.loop[0],
loop_end=self.args.loop[1],
)
for mode in self.modes
]

args_dict = vars(self.args)
args_path = os.path.join(self.extractors[0].savePath, "args.json")
with open(args_path, "w") as outfile:
json.dump(args_dict, outfile)

def calculate_correlation(self):
"""Run the correlation extraction for each enabled mode."""
for mode, extractor in zip(self.modes, self.extractors):
extractor.calculate_correlation(graphics=self.args.graphics)

@classmethod
def run(cls):
"""Run the CLI as a standalone program."""
cli = cls()
cli.calculate_correlation()

@staticmethod
def new_arg_parser():
"""Create a new `argparse.ArgumentParser` instance for the CLI."""
parser = argparse.ArgumentParser(
description="Correlation extraction from multistate protein bundles"
)

parser.add_argument("bundle", type=str, help="protein bundle file path")

io_args = parser.add_argument_group("input/output settings")
io_args.add_argument(
"-f",
"--format",
dest="format",
type=str,
default="",
help="input file format (default: determine from file extension)",
choices=["PDB", "mmCIF"],
)
io_args.add_argument(
"-o",
"--output",
dest="output",
type=str,
default="",
help='filename for output directory (default: "correlations_<name of structure file>")',
)
print()
a = CorrelationExtraction(
args.bundle,
input_file_format=(args.format if len(args.format) > 0 else None),
mode=mode,
nstates=args.nstates,
therm_fluct=args.therm_fluct,
therm_iter=args.therm_iter,
loop_start=args.loop_start,
loop_end=args.loop_end,
io_args.add_argument(
"--nographics",
dest="graphics",
action="store_false",
help="do not generate graphical output",
)
io_args.add_argument(
"-q",
"--quiet",
action="store_true",
help="quiet mode (only output errors to console)",
)

corr_args = parser.add_argument_group("correlation extraction settings")
corr_args.add_argument(
"-n",
"--num-states",
dest="num_states",
type=int,
default=2,
help="number of states (default: 2)",
)
a.calculate_correlation(graphics=args.graphics)
corr_args.add_argument(
"-m",
"--mode",
dest="mode",
type=str,
default="backbone",
help="correlation mode (default: backbone)",
choices=["backbone", "sidechain", "combined", "full"],
)
corr_args.add_argument(
"-i",
"--therm-iter",
dest="therm_iter",
type=int,
default=5,
help="number of thermal iterations to average for distance-based correlations (default: 5)",
)
corr_args.add_argument(
"--therm-fluct",
dest="therm_fluct",
type=float,
default=0.5,
help="thermal fluctuation of distances in the protein bundle "
"-> scaling factor for added random noise "
"(default: 0.5)",
)
corr_args.add_argument(
"--loop",
nargs=2,
type=int,
default=[-1, -1],
help="residue numbers of start & end of loop to exclude from analysis (default: none)",
)

return parser
9 changes: 4 additions & 5 deletions pdbcor/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from Bio.PDB import is_aa
from Bio.PDB.Structure import Structure
from Bio.PDB.Residue import Residue
from tqdm import tqdm

from .console import console


class DistanceCor:
Expand Down Expand Up @@ -146,8 +147,7 @@ def clust_cor(self, chain: str, resid: List[int]) -> Tuple[np.ndarray, List[int]
self.resid = resid
self.coord_matrix = self._residue_coords(chain)
clusters = []
print("DISTANCE CLUSTERING PROCESS:")
for i in tqdm(range(len(self.resid))):
for i in console.tqdm(range(len(self.resid)), desc="Calculating clusters"):
if self.resid[i] in self.banres:
clusters.append(self.resid[i])
clusters.extend(list(np.zeros(len(self.structure))))
Expand Down Expand Up @@ -284,8 +284,7 @@ def clust_cor(self, chain: str, resid: List[int]) -> Tuple[np.ndarray, List[int]
self.resid = resid
# collect all clusterings
clusters = []
print("ANGLE CLUSTERING PROCESS:")
for i in tqdm(range(len(self.resid))):
for i in console.tqdm(range(len(self.resid)), desc="Calculating clusters"):
clusters.append(self.resid[i])
clusters.extend(list(self._clust_aa(self.resid[i])))
return np.array(clusters).reshape(-1, self.nConf + 1), self.banres
55 changes: 55 additions & 0 deletions pdbcor/console.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import sys

from rich.console import Console as RichConsole
from rich.markdown import Markdown
from tqdm import tqdm as tqdm_base


class Console(RichConsole):
def __init__(self, *args, **kwargs):
self.quiet = False
super().__init__(*args, **kwargs)

def set_quiet(self, quiet=True):
self.quiet = quiet

def print(self, *args, **kwargs):
if not self.quiet:
super().print(*args, **kwargs)

def _h(self, text, level):
"""Print header with given level (e.g. `h1` for `level = 1`)."""
if not isinstance(level, int) or not 1 <= level <= 6:
raise ValueError(f"Invalid header level: {level}")
md = f"{'#'*level} {text}"
self.print(Markdown(md))

def h1(self, text):
self._h(text, 1)

def h2(self, text):
self._h(text.upper(), 2)

def h3(self, text):
self._h(text, 3)

def h4(self, text):
self._h(text, 4)

def tqdm(self, *args, **kwargs):
return self._Tqdm(*args, outer=self, **kwargs)

class _Tqdm(tqdm_base):
kwargs_default = {
"file": sys.stdout,
"disable": None,
}

def __init__(self, *args, outer=None, **kwargs):
if outer is not None and outer.quiet:
kwargs.update(disable=True)

super().__init__(*args, **{**self.kwargs_default, **kwargs})


console = Console()
Loading

0 comments on commit 226969f

Please sign in to comment.