In [136]:
import subprocess
import matplotlib.pyplot as plt
from Bio import PDB
import re
import os

In [125]:
# def run_chimerax_matchmaker(reference_pdb, target_cif, output_file):
#     # ChimeraX command script
#     chimerax_script = f"""
#     open {reference_pdb}
#     open {target_cif}
#     matchmaker #{1} to #{2}
#     exit
#     """

#     # Write the script to a temporary file with a .cxc extension
#     script_filename = 'chimerax_script.cxc'
#     with open(script_filename, 'w') as file:
#         file.write(chimerax_script)
    
#     # Path to ChimeraX executable
#     chimerax_executable = '/Applications/ChimeraX-1.5.app/Contents/MacOS/ChimeraX'

#     # # Run ChimeraX with the script
#     # subprocess.run([chimerax_executable, '--nogui', script_filename])

#     # Run ChimeraX with the script and capture output
#     with open(output_file, 'w') as file:
#         subprocess.run([chimerax_executable, '--nogui', script_filename], stdout=file, stderr=subprocess.STDOUT)

In [126]:
# reference_pdb = "af3_predictions/fold_t1124/7ux8.pdb" 
# target_cif = "af3_predictions/fold_t1124/fold_t1124_model_0.cif"
# output_file = "rmsd_values.txt"
# run_chimerax_matchmaker(reference_pdb, target_cif, output_file)

In [130]:
# def count_residues(pdb_file):
#     # Initialize the PDB parser
#     pdb_parser = PDB.PDBParser(QUIET=True)
    
#     # Load the structure from the PDB file
#     structure = pdb_parser.get_structure('structure_id', pdb_file)
    
#     # Initialize residue count
#     residue_count = 0
    
#     # Iterate over models, chains, and residues to count them
#     for model in structure:
#         for chain in model:
#             residue_count += len(list(chain.get_residues()))
    
#     return residue_count
# print(count_residues("af3_predictions/fold_t1124/7ux8.pdb" ))

1543


In [137]:
def extract_rmsd_value_once(output_file):
    rmsd_values = []
    found_value = False

    # Regular expression to match the first RMSD value
    rmsd_pattern = re.compile(r'RMSD between \d+ pruned atom pairs is ([\d.]+) angstroms')

    with open(output_file, 'r') as file:
        for line in file:
            # Search for the RMSD pattern
            match = rmsd_pattern.search(line)
            if match and not found_value:
                # Extract and convert the RMSD value
                rmsd_value = float(match.group(1))
                rmsd_values.append(rmsd_value)
                found_value = True  # Ensure we only add the value once

    return rmsd_values

In [138]:
def run_chimerax_matchmaker_and_rmsd(reference_pdb, target_cif, window_size=10, step_size=3):
    nres = 739
    rmsd_list = []

     # Path to ChimeraX executable
    chimerax_executable = '/Applications/ChimeraX-1.5.app/Contents/MacOS/ChimeraX'

    # Iterate over residues with the given window size and step size
    for start in range(1, nres - window_size + 2, step_size):
        end = start + window_size - 1

        # Create a unique output filename for each window
        output_file = f'rmsd_values_{start}_{end}.txt'

        # Create the ChimeraX script for the current window
        chimerax_script = f"""
        open {reference_pdb}
        open {target_cif}
        matchmaker #{1}:{start}-{end} to #{2}:{start}-{end}
        exit
        """

        # Write the script to a file
        script_filename = 'chimerax_script.cxc'
        with open(script_filename, 'w') as file:
            file.write(chimerax_script)

        # Run ChimeraX with the script and capture output
        with open(output_file, 'w') as file:
            subprocess.run([chimerax_executable, '--nogui', script_filename], stdout=file, stderr=subprocess.STDOUT)
        
        # Extract RMSD value from the generated output file
        rmsd_values = extract_rmsd_value_once(output_file)
        if rmsd_values:
            rmsd_list.extend(rmsd_values)
        
        # remove the output file to avoid clutter
        os.remove(output_file)

    return rmsd_list


In [139]:
# Example usage
reference_pdb = "af3_predictions/fold_t1124/7ux8.pdb"  
target_cif = "af3_predictions/fold_t1124/fold_t1124_model_0.cif"
rmsd_results = run_chimerax_matchmaker_and_rmsd(reference_pdb, target_cif)
print (rmsd_results)

[0.847, 0.947, 0.373, 0.511, 0.586, 0.565, 0.146, 0.091, 0.068, 0.062, 0.152, 0.104, 0.14, 0.113, 0.104, 0.158, 0.17, 0.246, 0.327, 0.326, 0.146, 0.121, 0.098, 0.092, 0.108, 0.195, 0.13, 0.104, 0.68, 0.958, 0.868, 0.989, 1.202, 0.279, 0.15, 0.173, 0.225, 0.19, 0.108, 0.103, 0.153, 0.269, 0.289, 0.318, 0.149, 0.161, 0.99, 0.331, 0.93, 1.245, 1.13, 1.03, 0.73, 1.16, 0.887, 0.252, 0.28, 0.134, 0.103, 0.082, 0.178, 0.507, 0.603, 0.405, 0.191, 0.099, 0.158, 0.121, 0.115, 0.218, 0.16, 0.116, 0.183, 0.153, 0.136, 0.152, 0.106, 0.084, 0.079, 0.124, 0.187, 0.194, 0.437, 0.836, 0.619, 0.484, 0.296, 0.158, 0.169, 0.168, 0.197, 0.171, 0.113, 0.115, 0.175, 0.192, 0.205, 0.139, 0.524, 0.849, 0.671, 0.572, 0.174, 0.117, 0.176, 0.176, 0.297, 0.342, 0.516, 0.514, 0.11, 0.093, 0.101, 0.125, 0.161, 0.131, 0.187, 0.162, 0.228, 0.214, 0.829, 1.013, 1.249, 0.747, 0.757, 1.468]


In [140]:
print(len(rmsd_results))

126


In [141]:
# Plot the RMSD values

# plt.plot(rmsd_results)
# plt.xlabel('Window Start Position')
# plt.ylabel('RMSD')
# plt.title(f'RMSD over 10-residue windows with 3-residue steps')
# plt.show()