<a href="https://colab.research.google.com/github/eoinleen/PDB-tools/blob/main/pyrosetta_H_mod_for_total_analysis_mod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
"""
#########################################################
# PDB HYDROGEN NAMING FIX TOOL - SIMPLE INSTRUCTIONS
#########################################################
#
# WHAT THIS DOES:
# This tool fixes hydrogen atom names in PDB files
# (changes format like "1H" to "H1" to meet standard conventions)
#
# HOW TO USE (SUPER SIMPLE):
# 1. Run this notebook in Google Colab (press play button)
# 2. Allow access to your Google Drive when prompted
# 3. A text box will appear - paste your file path there
#    Example: /content/drive/MyDrive/my_protein.pdb
# 4. Click the "Process PDB Files" button
# 5. Your fixed file will be saved with "mod_" prefix
#    Example: mod_my_protein.pdb
#
# NOTES FOR BEGINNERS:
# - Your Google Drive will be connected at: /content/drive
# - You can process a single file or a whole folder of PDB files
# - Fixed files are saved in the same location as originals
#
#########################################################
"""

# PDB Hydrogen Naming Fix Script for Google Colab
# This script fixes hydrogen atom naming conventions in PDB files

import os
import re
import subprocess
import tempfile
from google.colab import drive
from google.colab import files
import ipywidgets as widgets
from IPython.display import display

# Mount Google Drive
drive.mount('/content/drive')

# Install pdbtools if not already installed
!pip install -q pdbtools

def fix_hydrogen_naming_regex(input_file, output_file):
    """Fix hydrogen atom naming using regex pattern matching"""
    with open(input_file, 'r') as f:
        content = f.read()

    # Replace terminal hydrogens (1H, 2H, 3H)
    # This addresses the most common problematic pattern
    pattern1 = r'(ATOM\s+\d+\s+)1H(\s+)'
    pattern2 = r'(ATOM\s+\d+\s+)2H(\s+)'
    pattern3 = r'(ATOM\s+\d+\s+)3H(\s+)'

    content = re.sub(pattern1, r'\1H1\2', content)
    content = re.sub(pattern2, r'\1H2\2', content)
    content = re.sub(pattern3, r'\1H3\2', content)

    with open(output_file, 'w') as f:
        f.write(content)

    print(f"PDB file converted using regex and saved as {output_file}")

def fix_hydrogen_naming_pdbtools(input_file, output_file):
    """Fix hydrogen atom naming using pdbtools"""
    try:
        # Create a temporary file
        fd, temp_path = tempfile.mkstemp(suffix='.pdb')
        os.close(fd)

        # Run pdbtools to standardize atom names
        subprocess.run(f'pdb_reatom {input_file} > {temp_path}', shell=True, check=True)

        # Copy the temporary file to the final destination
        with open(temp_path, 'r') as src, open(output_file, 'w') as dst:
            dst.write(src.read())

        # Remove the temporary file
        os.remove(temp_path)
        print(f"PDB file converted using pdbtools and saved as {output_file}")

    except Exception as e:
        print(f"Error using pdbtools: {str(e)}")
        print("Falling back to regex method...")
        fix_hydrogen_naming_regex(input_file, output_file)

def process_files():
    # Get user-specified path
    file_path = path_widget.value

    if not file_path:
        print("Please enter a valid file or directory path.")
        return

    file_path = os.path.expanduser(file_path)

    # Check if the path is a directory or a file
    if os.path.isdir(file_path):
        # Process all PDB files in the directory
        pdb_files = [f for f in os.listdir(file_path) if f.endswith('.pdb')]

        if not pdb_files:
            print(f"No PDB files found in {file_path}")
            return

        print(f"Found {len(pdb_files)} PDB files in {file_path}")

        for idx, file_name in enumerate(pdb_files, 1):
            input_file = os.path.join(file_path, file_name)
            output_file = os.path.join(file_path, f"mod_{file_name}")
            print(f"Processing file {idx}/{len(pdb_files)}: {file_name}")

            if method_widget.value == "pdbtools":
                fix_hydrogen_naming_pdbtools(input_file, output_file)
            else:
                fix_hydrogen_naming_regex(input_file, output_file)

    elif os.path.isfile(file_path) and file_path.endswith('.pdb'):
        # Process a single PDB file
        dir_path = os.path.dirname(file_path)
        file_name = os.path.basename(file_path)
        output_file = os.path.join(dir_path, f"mod_{file_name}")

        print(f"Processing file: {file_name}")

        if method_widget.value == "pdbtools":
            fix_hydrogen_naming_pdbtools(file_path, output_file)
        else:
            fix_hydrogen_naming_regex(file_path, output_file)

    else:
        print("The specified path is not a valid PDB file or directory containing PDB files.")

# Create widgets for the UI
path_widget = widgets.Text(
    value='',
    placeholder='Enter the path to a PDB file or directory in Google Drive (e.g., /content/drive/MyDrive/pdb_files)',
    description='Path:',
    style={'description_width': 'initial'},
    layout={'width': '800px'}
)

method_widget = widgets.RadioButtons(
    options=['pdbtools (recommended)', 'regex only'],
    value='pdbtools (recommended)',
    description='Method:',
    style={'description_width': 'initial'}
)

process_button = widgets.Button(
    description='Process PDB Files',
    button_style='primary',
    tooltip='Click to process the PDB files'
)

# Set up the button click handler
process_button.on_click(lambda b: process_files())

# Display the UI
print("PDB Hydrogen Naming Fix Tool")
print("----------------------------")
print("This tool will fix hydrogen atom naming in PDB files to make them compatible with interface analysis scripts.")
print("The fixed files will be saved in the same directory with 'mod_' prefix.")
display(path_widget)
display(method_widget)
display(process_button)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[31mERROR: Could not find a version that satisfies the requirement pdbtools (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pdbtools[0m[31m
[0mPDB Hydrogen Naming Fix Tool
----------------------------
This tool will fix hydrogen atom naming in PDB files to make them compatible with interface analysis scripts.
The fixed files will be saved in the same directory with 'mod_' prefix.


Text(value='', description='Path:', layout=Layout(width='800px'), placeholder='Enter the path to a PDB file or…

RadioButtons(description='Method:', options=('pdbtools (recommended)', 'regex only'), style=DescriptionStyle(d…

Button(button_style='primary', description='Process PDB Files', style=ButtonStyle(), tooltip='Click to process…

Processing file: 1_bind_0_dldesign_0_af2pred.pdb
PDB file converted using regex and saved as /content/drive/MyDrive/PDB-files/20250303-pdb-mod/mod_1_bind_0_dldesign_0_af2pred.pdb


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
