<a href="https://colab.research.google.com/github/eoinleen/PDB-tools/blob/main/ANISO_remover_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
"""
PDB ANISOU Remover (Chain renaming optional)

What does this do?
----------------
1. Removes all ANISOU records from PDB files
2. Optionally changes chain designations (if chain_map provided)
3. Saves modified file with 'mod_' prefix in same directory

Usage
-----
1. Set chain_map = {} to only remove ANISOU (no chain renaming)
2. Set chain_map = {'E': 'A', 'F': 'B'} to also rename chains
3. Update input_pdb_file path to your file location
4. Run script
"""

from google.colab import drive
import os

def clean_pdb_name(filename):
    base = os.path.splitext(filename)[0]
    clean = ''.join(c if c.isalnum() else '_' for c in base)
    return f"{clean}.pdb"

def process_pdb_lines(input_file, chain_map=None):
    """Process PDB file - remove ANISOU and optionally rename chains"""
    modified_lines = []
    line_count = 0
    modified_count = 0
    anisou_count = 0

    # If chain_map is None or empty, don't rename chains
    rename_chains = chain_map and len(chain_map) > 0

    try:
        with open(input_file, 'r') as file:
            for line in file:
                line_count += 1

                # Skip ANISOU lines (this always happens)
                if line.startswith("ANISOU"):
                    anisou_count += 1
                    continue

                # If line is empty or too short, keep as is
                if not line.strip() or len(line) < 22:
                    modified_lines.append(line)
                    continue

                # Only rename chains if chain_map is provided and not empty
                if rename_chains:
                    if line.startswith(("ATOM", "HETATM")):
                        chain_id = line[21]
                        if chain_id in chain_map:
                            modified_count += 1
                            if len(line) < 76:
                                line = line.rstrip() + ' ' * (80 - len(line.rstrip()))

                            new_line = (line[:21] +
                                      chain_map[chain_id] +
                                      line[22:72] +
                                      chain_map[chain_id].ljust(4) +
                                      line[76:])
                            modified_lines.append(new_line)
                            continue
                    elif line.startswith("TER"):
                        chain_id = line[21]
                        if chain_id in chain_map:
                            modified_count += 1
                            new_line = line[:21] + chain_map[chain_id] + line[22:]
                            modified_lines.append(new_line)
                            continue

                # Keep line as is (either no chain renaming, or chain not in map)
                modified_lines.append(line)

        print(f"Processed {line_count} lines")
        print(f"Removed {anisou_count} ANISOU records")
        if rename_chains:
            print(f"Modified {modified_count} ATOM/HETATM/TER records for chain renaming")
        else:
            print("No chain renaming performed")
        return modified_lines

    except Exception as e:
        print(f"Error reading file: {e}")
        print(f"Error occurred at line {line_count}")
        return None

def process_pdb_file(input_file, chain_map=None):
    """
    Process PDB file to remove ANISOU and optionally rename chains

    Args:
        input_file: Path to input PDB file
        chain_map: Dictionary for chain renaming (e.g., {'E': 'A', 'F': 'B'})
                  Set to {} or None to only remove ANISOU records
    """
    if not os.path.exists(input_file):
        print(f"Error: Input file not found: {input_file}")
        return False

    directory = os.path.dirname(input_file)
    filename = os.path.basename(input_file)
    clean_name = clean_pdb_name(filename)
    output_file = os.path.join(directory, f"mod_{clean_name}")

    print(f"Processing: {filename}")
    print(f"Input path: {input_file}")
    print(f"Output path: {output_file}")

    if chain_map:
        print(f"Chain mapping: {chain_map}")
    else:
        print("Chain mapping: None (only removing ANISOU records)")

    modified_content = process_pdb_lines(input_file, chain_map)

    if modified_content:
        try:
            with open(output_file, 'w') as file:
                file.writelines(modified_content)
            print(f"\nSuccessfully saved modified PDB to: {output_file}")

            print("\nFirst few lines of modified file:")
            with open(output_file, 'r') as f:
                for i, line in enumerate(f):
                    if i < 5:
                        print(line.rstrip())
            return True
        except Exception as e:
            print(f"Error saving file: {e}")
            return False
    return False

# Mount Google Drive and process the file
drive.mount('/content/drive')

# CONFIGURATION - Change these values
input_pdb_file = "/content/drive/MyDrive/PDB-files/20250606_mysm1/mysm1_J8.pdb"

# OPTION 1: Only remove ANISOU records (no chain renaming)
chain_map = {}

# OPTION 2: Remove ANISOU and rename chains
# chain_map = {'E': 'A', 'F': 'B'}

# OPTION 3: No chain renaming at all
# chain_map = None

success = process_pdb_file(input_pdb_file, chain_map)

if success:
    print("\nPDB processing completed successfully!")
else:
    print("\nFailed to process PDB file.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing: mysm1_J8.pdb
Input path: /content/drive/MyDrive/PDB-files/20250606_mysm1/mysm1_J8.pdb
Output path: /content/drive/MyDrive/PDB-files/20250606_mysm1/mod_mysm1_J8.pdb
Chain mapping: None (only removing ANISOU records)
Processed 6394 lines
Removed 3193 ANISOU records
No chain renaming performed

Successfully saved modified PDB to: /content/drive/MyDrive/PDB-files/20250606_mysm1/mod_mysm1_J8.pdb

First few lines of modified file:
ATOM      1  N   ASP A 557      23.277  -8.273 -50.603  1.00 68.00           N
ATOM      2  CA  ASP A 557      23.141  -6.811 -50.848  1.00 68.95           C
ATOM      3  C   ASP A 557      23.360  -6.090 -49.520  1.00 68.72           C
ATOM      4  O   ASP A 557      22.722  -6.400 -48.516  1.00 68.58           O
ATOM      5  CB  ASP A 557      21.906  -6.476 -51.683  1.00 70.53           C

PDB processing completed successfu