From c24ad5511e11dfbe96975c157600e199573eff89 Mon Sep 17 00:00:00 2001 From: ahmedselim2017 Date: Tue, 9 Apr 2024 18:10:14 +0300 Subject: [PATCH 1/4] Added pdb_selb.py --- pdbtools/pdb_selb.py | 223 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 pdbtools/pdb_selb.py diff --git a/pdbtools/pdb_selb.py b/pdbtools/pdb_selb.py new file mode 100644 index 0000000..fce6144 --- /dev/null +++ b/pdbtools/pdb_selb.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2024 Ahmed Selim Üzüm +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Selects atoms according to their B-factor value. + +Valid Operations: + [g] - greater than + [l] - less than + [e] - equal to + [n] - not equal to + +Usage: + python pdb_selb.py - -treshold + +Example: + python pdb_selb.py -g -10 1CTF.pdb # selects atoms with B-factor greater than 10 + python pdb_selb.py -l -10 1CTF.pdb # selects atoms with B-factor less than 10 + python pdb_selb.py -ge -10 1CTF.pdb # selects atoms with B-factor greater or equal to 10 + python pdb_selb.py -le -10 1CTF.pdb # selects atoms with B-factor less or equal to 10 + python pdb_selb.py -e -10 1CTF.pdb # selects atoms with B-factor equal to 10 + python pdb_selb.py -n -10 1CTF.pdb # selects atoms with B-factor not equal to 10 + +This program is part of the `pdb-tools` suite of utilities and should not be +distributed isolatedly. The `pdb-tools` were created to quickly manipulate PDB +files using the terminal, and can be used sequentially, with one tool streaming +data to another. They are based on old FORTRAN77 code that was taking too much +effort to maintain and compile. RIP. +""" +import os +import sys + +__author__ = "Ahmed Selim Üzüm" +__email__ = "ahmedselimuzum@gmail.com" + + +def check_input(args): + """Checks whether to read from stdin/file and validates user input/options.""" + + # Defaults + operators = {"g", "l", "e", "n"} + operator = "" + operator_treshold = None + fh = sys.stdin # file handle + + if len(args) <= 1: + # Reading from pipe with default option + if sys.stdin.isatty(): + sys.stderr.write(__doc__) + sys.exit(1) + + elif len(args) == 2: + # Two options: Options & Pipe + if args[0].startswith("-"): + operator = args[0][1:] + if args[1].startswith("-"): + operator_treshold = args[1][1:] + + if sys.stdin.isatty(): # ensure the PDB data is streamed in + emsg = "ERROR!! No data to process!\n" + sys.stderr.write(emsg) + sys.stderr.write(__doc__) + sys.exit(1) + + elif len(args) == 3: + # Three options: options & File + if not args[0].startswith("-"): + emsg = "ERROR! First argument is not an option: '{}'\n" + sys.stderr.write(emsg.format(args[0])) + sys.stderr.write(__doc__) + sys.exit(1) + elif not args[1].startswith("-"): + emsg = "ERROR! Second argument is not an option: '{}'\n" + sys.stderr.write(emsg.format(args[0])) + sys.stderr.write(__doc__) + sys.exit(1) + + if not os.path.isfile(args[2]): + emsg = "ERROR!! File not found or not readable: '{}'\n" + sys.stderr.write(emsg.format(args[1])) + sys.stderr.write(__doc__) + sys.exit(1) + + operator = args[0][1:] + operator_treshold = args[1][1:] + fh = open(args[2], "r") + + else: # Whatever ... + sys.stderr.write(__doc__) + sys.exit(1) + + # Validate option + try: + assert set(operator) <= operators + + if operator == "": + emsg = "ERROR!! You did not provided an operator." + sys.stderr.write(emsg.format()) + sys.exit(1) + elif operator_treshold == None: + emsg = "ERROR!! You did not provided an operator treshold." + sys.stderr.write(emsg.format()) + sys.exit(1) + + operator_treshold = float(operator_treshold) + except AssertionError: + emsg = "ERROR!! You provided an invalid operator: '{}'" + sys.stderr.write(emsg.format(operator)) + sys.exit(1) + except ValueError: + emsg = "ERROR!! You provided an invalid b-factor treshold value: '{}'" + sys.stderr.write(emsg.format(operator_treshold)) + sys.exit(1) + + return (fh, operator, operator_treshold) + + +def pad_line(line): + """Pad line to 80 characters in case it is shorter.""" + size_of_line = len(line) + if size_of_line < 80: + padding = 80 - size_of_line + 1 + line = line.strip("\n") + " " * padding + "\n" + return line[:81] # 80 + newline character + + +def run(fhandle, operator, operator_treshold): + """ + Set the temperature column in all ATOM/HETATM records to a given value. + + This function is a generator. + + Parameters + ---------- + fhandle : a line-by-line iterator of the original PDB file. + + bfactor : float + The desired bfactor. + + Yields + ------ + str (line-by-line) + The modified (or not) PDB line. + """ + _pad_line = pad_line + records = ("ATOM", "HETATM") + for line in fhandle: + if line.startswith(records): + line = _pad_line(line) + bfactor = float(line[60:66]) + + for o in operator: + if o == "g": + if bfactor > operator_treshold: + yield line + continue + elif o == "l": + if bfactor < operator_treshold: + yield line + continue + elif o == "e": + if bfactor == operator_treshold: + yield line + continue + elif o == "n": + if bfactor != operator_treshold: + yield line + continue + else: + yield line + + +alter_bfactor = run + + +def main(): + + # Check Input + pdbfh, operator, operator_treshold = check_input(sys.argv[1:]) + + # Do the job + new_pdb = run(pdbfh, operator, operator_treshold) + + # Output results + try: + _buffer = [] + _buffer_size = 5000 # write N lines at a time + for lineno, line in enumerate(new_pdb): + if not (lineno % _buffer_size): + sys.stdout.write("".join(_buffer)) + _buffer = [] + _buffer.append(line) + + sys.stdout.write("".join(_buffer)) + sys.stdout.flush() + except IOError: + # This is here to catch Broken Pipes + # for example to use 'head' or 'tail' without + # the error message showing up + pass + + # last line of the script + # Close file handle even if it is sys.stdin, no problem here. + pdbfh.close() + sys.exit(0) + + +if __name__ == "__main__": + main() From 64f8d4f4de749acb9c5566c6068e97a608b0b15d Mon Sep 17 00:00:00 2001 From: ahmedselim2017 Date: Tue, 9 Apr 2024 18:10:25 +0300 Subject: [PATCH 2/4] Added tests for pdb_selb.py --- tests/test_pdb_selb.py | 215 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 tests/test_pdb_selb.py diff --git a/tests/test_pdb_selb.py b/tests/test_pdb_selb.py new file mode 100644 index 0000000..2cc0c51 --- /dev/null +++ b/tests/test_pdb_selb.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2024 Ahmed Selim Üzüm +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unit Tests for `pdb_selb`. +""" + +import os +import sys +import unittest + +from config import data_dir +from utils import OutputCapture + + +class TestTool(unittest.TestCase): + """ + Generic class for testing tools. + """ + + def setUp(self): + # Dynamically import the module + name = "pdbtools.pdb_selb" + self.module = __import__(name, fromlist=[""]) + + def exec_module(self): + """ + Execs module. + """ + + with OutputCapture() as output: + try: + self.module.main() + except SystemExit as e: + self.retcode = e.code + + self.stdout = output.stdout + self.stderr = output.stderr + + return + + def test_operation_greater_than(self): + """$ pdb_selb -g -10 data/dummy.pdb""" + + sys.argv = ["", "-g", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) > 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_operation_less_than(self): + """$ pdb_selb -l -10 data/dummy.pdb""" + + sys.argv = ["", "-l", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) < 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_operation_equal_to(self): + """$ pdb_selb -e -10 data/dummy.pdb""" + + sys.argv = ["", "-e", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) == 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_operation_not_equal_to(self): + """$ pdb_selb -n -10 data/dummy.pdb""" + + sys.argv = ["", "-n", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) != 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_operation_greater_or_equal_to(self): + """$ pdb_selb -ge -10 data/dummy.pdb""" + + sys.argv = ["", "-ge", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) >= 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_operation_less_or_equal_to(self): + """$ pdb_selb -le -10 data/dummy.pdb""" + + sys.argv = ["", "-le", "-10", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stderr), 0) + + records = ("ATOM", "HETATM") + bfactors = [l[60:66] for l in self.stdout if l.startswith(records)] + bfactor_results = [float(b) <= 10 for b in bfactors] + self.assertNotIn(False, bfactor_results) + + def test_file_not_found(self): + """$ pdb_selb not_existing.pdb""" + + afile = os.path.join(data_dir, "not_existing.pdb") + sys.argv = ["", "-g", "-10", afile] + + self.exec_module() + + self.assertEqual(self.retcode, 1) # exit code is 1 (error) + self.assertEqual(len(self.stdout), 0) # nothing written to stdout + self.assertEqual( + self.stderr[0][:22], "ERROR!! File not found" + ) # proper error message + + @unittest.skipIf(os.getenv("SKIP_TTY_TESTS"), "skip on GHA - no TTY") + def test_file_missing(self): + """$ pdb_selb -g -10""" + + sys.argv = ["", "-g", "-10"] + + self.exec_module() + + self.assertEqual(self.retcode, 1) + self.assertEqual(len(self.stdout), 0) # no output + self.assertEqual(self.stderr[0], "ERROR!! No data to process!") + + @unittest.skipIf(os.getenv("SKIP_TTY_TESTS"), "skip on GHA - no TTY") + def test_helptext(self): + """$ pdb_selb""" + + sys.argv = [""] + + self.exec_module() + + self.assertEqual(self.retcode, 1) # ensure the program exited gracefully. + self.assertEqual(len(self.stdout), 0) # no output + self.assertEqual(self.stderr, self.module.__doc__.split("\n")[:-1]) + + def test_invalid_operator(self): + """$ pdb_selb -A data/dummy.pdb""" + + sys.argv = ["", "-A", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 1) + self.assertEqual(len(self.stdout), 0) + self.assertEqual( + self.stderr[0][:47], "ERROR!! You provided an invalid operator: 'A'" + ) + + def test_not_an_option(self): + """$ pdb_selb 20 data/dummy.pdb""" + + sys.argv = ["", "20", os.path.join(data_dir, "dummy.pdb")] + + self.exec_module() + + self.assertEqual(self.retcode, 1) + self.assertEqual(len(self.stdout), 0) + self.assertEqual(self.stderr[0], "ERROR!! You did not provided an operator.") + + +if __name__ == "__main__": + from config import test_dir + + mpath = os.path.abspath(os.path.join(test_dir, "..")) + sys.path.insert(0, mpath) # so we load dev files before any installation + + unittest.main() From ef2e1ebf02bb428f9f98c7e4c5c576122099987e Mon Sep 17 00:00:00 2001 From: ahmedselim2017 Date: Tue, 9 Apr 2024 18:10:33 +0300 Subject: [PATCH 3/4] Added documentation for pdb_selb.py --- docs/index.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/index.md b/docs/index.md index b855259..1100018 100644 --- a/docs/index.md +++ b/docs/index.md @@ -587,6 +587,29 @@ Example:
+pdb_selb

Selects atoms according to their B-factor value.

+ +Valid Operations: + [g] - greater than + [l] - less than + [e] - equal to + [n] - not equal to + +Usage: + python pdb_selb.py -<operators> -treshold <pdb file> + +Example: + python pdb_selb.py -g -10 1CTF.pdb # selects atoms with B-factor greater than 10 + python pdb_selb.py -l -10 1CTF.pdb # selects atoms with B-factor less than 10 + python pdb_selb.py -ge -10 1CTF.pdb # selects atoms with B-factor greater or equal to 10 + python pdb_selb.py -le -10 1CTF.pdb # selects atoms with B-factor less or equal to 10 + python pdb_selb.py -e -10 1CTF.pdb # selects atoms with B-factor equal to 10 + python pdb_selb.py -n -10 1CTF.pdb # selects atoms with B-factor not equal to 10 + +
+
+
+
pdb_selchain

Extracts one or more chains from a PDB file.

Usage: From c02ede0b356a95151772dd058021b96a95b82b50 Mon Sep 17 00:00:00 2001 From: ahmedselim2017 Date: Sun, 28 Apr 2024 11:48:29 +0300 Subject: [PATCH 4/4] Added documentation for the run function in pdb_selb.py --- pdbtools/pdb_selb.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pdbtools/pdb_selb.py b/pdbtools/pdb_selb.py index fce6144..f75e346 100644 --- a/pdbtools/pdb_selb.py +++ b/pdbtools/pdb_selb.py @@ -140,7 +140,8 @@ def pad_line(line): def run(fhandle, operator, operator_treshold): """ - Set the temperature column in all ATOM/HETATM records to a given value. + Filter according to the bfactor value using the given operator and operator + treshold. This function is a generator. @@ -148,8 +149,17 @@ def run(fhandle, operator, operator_treshold): ---------- fhandle : a line-by-line iterator of the original PDB file. - bfactor : float - The desired bfactor. + operator : string + The desired operator that should be used to filter by bafctor. The + combination of multiple operator values such as "ge" can also be used. + Operator values: + "g": greater than operator + "l": less than operator + "e": equal to operator + "n": not equal to operator + + operator_treshold : float + The treshold bfactor value that should be used to filter by bfactor. Yields ------