<a href="https://colab.research.google.com/github/eoinleen/PDB-tools/blob/main/chain_num_mod_aniso_rewmove.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
"""
PDB Chain Renamer & ANISOU Remover

What does this do?
----------------
1. Changes chain designations in PDB files
2. Removes all ANISOU records
3. Saves modified file with 'mod_' prefix in same directory

Requirements
-----------
1. PDB file in Google Drive
2. Google Colab environment

Usage
-----
1. Change chain_map to specify chain renaming (e.g., chain_map={'E': 'A', 'F': 'B'})
2. Update input_pdb_file path to your file location
3. Run script

Example path: /content/drive/MyDrive/your_folder/your_file.pdb

Output
------
- New file with 'mod_' prefix
- Same location as input file
- All ANISOU records removed
- Chain IDs changed as specified
"""

from google.colab import drive
import os

def clean_pdb_name(filename):
    base = os.path.splitext(filename)[0]
    clean = ''.join(c if c.isalnum() else '_' for c in base)
    return f"{clean}.pdb"

def rename_chains_in_pdb(input_file, chain_map):
    modified_lines = []
    line_count = 0
    modified_count = 0
    anisou_count = 0

    try:
        with open(input_file, 'r') as file:
            for line in file:
                line_count += 1

                # Skip ANISOU lines
                if line.startswith("ANISOU"):
                    anisou_count += 1
                    continue

                if not line.strip():
                    modified_lines.append(line)
                    continue

                if len(line) < 22:
                    modified_lines.append(line)
                    continue

                if line.startswith(("ATOM", "HETATM")):
                    chain_id = line[21]
                    if chain_id in chain_map:
                        modified_count += 1
                        if len(line) < 76:
                            line = line.rstrip() + ' ' * (80 - len(line.rstrip()))

                        new_line = (line[:21] +
                                  chain_map[chain_id] +
                                  line[22:72] +
                                  chain_map[chain_id].ljust(4) +
                                  line[76:])
                        modified_lines.append(new_line)
                        continue
                elif line.startswith("TER"):
                    chain_id = line[21]
                    if chain_id in chain_map:
                        modified_count += 1
                        new_line = line[:21] + chain_map[chain_id] + line[22:]
                        modified_lines.append(new_line)
                        continue

                modified_lines.append(line)

        print(f"Processed {line_count} lines")
        print(f"Removed {anisou_count} ANISOU records")
        print(f"Modified {modified_count} ATOM/HETATM/TER records")
        return modified_lines

    except Exception as e:
        print(f"Error reading file: {e}")
        print(f"Error occurred at line {line_count}")
        return None

def process_pdb_file(input_file, chain_map={'B': 'A', 'C': 'B'}):
    if not os.path.exists(input_file):
        print(f"Error: Input file not found: {input_file}")
        return False

    directory = os.path.dirname(input_file)
    filename = os.path.basename(input_file)
    clean_name = clean_pdb_name(filename)
    output_file = os.path.join(directory, f"mod_{clean_name}")

    print(f"Processing: {filename}")
    print(f"Input path: {input_file}")
    print(f"Output path: {output_file}")

    modified_content = rename_chains_in_pdb(input_file, chain_map)

    if modified_content:
        try:
            with open(output_file, 'w') as file:
                file.writelines(modified_content)
            print(f"\nSuccessfully saved modified PDB to: {output_file}")

            print("\nFirst few lines of modified file:")
            with open(output_file, 'r') as f:
                for i, line in enumerate(f):
                    if i < 5:
                        print(line.rstrip())
            return True
        except Exception as e:
            print(f"Error saving file: {e}")
            return False
    return False

# Mount Google Drive and process the file
drive.mount('/content/drive')
input_pdb_file = "/content/drive/MyDrive/PDB-files/PDB-files-for-mod/RA-MF-2_test/Copy of RA-MF_Ub2-K11-str.pdb"  # Change this path
success = process_pdb_file(input_pdb_file)

if success:
    print("\nChain renaming completed successfully!")
else:
    print("\nFailed to process PDB file.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing: Copy of RA-MF_Ub2-K11-str.pdb
Input path: /content/drive/MyDrive/PDB-files/PDB-files-for-mod/RA-MF-2_test/Copy of RA-MF_Ub2-K11-str.pdb
Output path: /content/drive/MyDrive/PDB-files/PDB-files-for-mod/RA-MF-2_test/mod_Copy_of_RA_MF_Ub2_K11_str.pdb
Processed 2340 lines
Removed 1168 ANISOU records
Modified 1168 ATOM/HETATM/TER records

Successfully saved modified PDB to: /content/drive/MyDrive/PDB-files/PDB-files-for-mod/RA-MF-2_test/mod_Copy_of_RA_MF_Ub2_K11_str.pdb

First few lines of modified file:
CRYST1  157.232  157.232  157.232  90.00  90.00  90.00 P 43 3 2      1
ATOM      1  N   MET A   1       4.453  15.829 -59.881  1.00 59.64      A    N
ATOM      2  CA  MET A   1       3.866  14.881 -58.897  1.00 60.38      A    C
ATOM      3  C   MET A   1       4.960  13.929 -58.428  1.00 59.90      A    C
ATOM      4  O   MET A   1       5.925  13.679 