In [1]:
import os
import json
import uuid
import subprocess

import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Mount the GPCRmd directory with: 

`sshfs aperalta@gwaihir.prib.upf.edu:/data/GPCRmd/ /home/alex/sshfs_mountpoints/gpcrmd/`

Mount verde home with:

`sshfs aperalta@verde.prib.upf.edu:/home/aperalta/ /home/alex/sshfs_mountpoints/verde`

In [2]:
# Paths to run locally.

PROJECT_ROOT = "/home/alex/Documents/pocket_tool"
GPCRMD_ROOT = "/home/alex/sshfs_mountpoints/gpcrmd/media"
ALIGNED_POCKETS_DIR = "/home/alex/sshfs_mountpoints/verde/combine_pockets/results/aligned/aligned_pockets"
# pocket_dir = "/home/alex/Desktop/mdpocket_oversized_mount/"

COMPL_INFO_PATH = os.path.join(GPCRMD_ROOT, "files/Precomputed/compl_info.json")

RESULTS_DIR = os.path.join(PROJECT_ROOT, "results/06_gpcrmd_crystals_overlap/output")
REFERENCE_GPCR = os.path.join(PROJECT_ROOT, "data/ref_gpcr/data/a2a_6gdg_opm_rotated.pdb")
CHIMERA = "/home/alex/.local/UCSF-Chimera64-1.17.3/bin/chimera"
TMP_DIR = "/home/alex/Desktop"
PDB_DIR = os.path.join(PROJECT_ROOT, "data/pdbs/data")



In [None]:
def read_dynids():
    dynids_path = os.path.join(PROJECT_ROOT, "data/working_sims/data/dynids.csv")
    with open(dynids_path, 'r') as file:
        dynids = file.read().splitlines()
    return dynids


def read_trajids():
    trajids_path = os.path.join(PROJECT_ROOT, "data/working_sims/data/trajids.csv")
    with open(trajids_path, 'r') as file:
        trajids = file.read().splitlines()
    return trajids


def read_tm_allosteric_modulators():
    tm_mods_path = os.path.join(PROJECT_ROOT, "data/tm_allosteric_modulators_list/data/tm_allosteric_modulators.csv")
    return pd.read_csv(tm_mods_path, sep=';')


def read_json_into_dict(json_path):
    """
    Reads a JSON file and returns the data as a dictionary.
    """
    with open(json_path, 'r') as f:
        data = json.load(f)
    return data


def correct_wrong_uniprot_names_complinfo(uniprot_name):
    """
    Corrects the UniProt names by replacing underscores with hyphens and converting to lowercase.
    """
    if uniprot_name == "pd2r2_human":
        return "pd2r_human"
    else:
        return uniprot_name
    

# TODO IMPORTANT: If I want this to work properly, I have to align to the TM region.
def chimera_align(target_pdb):
    """
    """
    # Construct aligned PDB path.
    aligned_pdb_basename = os.path.basename(target_pdb).replace(".pdb", "_all_aligned.pdb")
    aligned_pdb_path = f'{RESULTS_DIR}/aligned_pdbs/{aligned_pdb_basename}'
    
    if os.path.exists(aligned_pdb_path):
        # print(f"Aligned PDB already exists: {aligned_pdb_path}")
        return aligned_pdb_path

    chimera_script = (
        f'open {REFERENCE_GPCR}; '                    # Model #0
        f'open {target_pdb}; '                        # Model #1
        f'mm #1 #0:; '  # Align model #1 to #0
        f'select #1; write selected relative 0 #1 {aligned_pdb_path};' # Save aligned target GPCR
    )

    # Writing and executing the chimera align script.
    tmp_script = os.path.join(
        TMP_DIR, f'tmp_chimera_script_{str(uuid.uuid4())}')
    with open(tmp_script, "w") as fh:
        print(chimera_script, file=fh)

    result = subprocess.run(
        [CHIMERA, '--nogui', f'cmd:{tmp_script}'], 
        stdout=subprocess.PIPE,  # Capture standard output
        text=True                # Decode output as string
    )

    # # Extract the RMSD value from the Chimera output.
    # terminal_output = result.stdout
    # for line in terminal_output.split("\n"):
    #     if line.startswith("RMSD between"):
    #         rmsd = line.split("pairs: ")[1].strip(")")
    
    # Clean up the temporary files.
    os.remove(tmp_script)

    return aligned_pdb_path


def get_trajetory_ids_from_dynid(dynid, compl_info):
    """
    Retrieves trajectory IDs associated with a given dynamic ID.
    """
    trajids = []
    traj_files = compl_info['dyn'+str(dynid)]['traj_f']
    for file in traj_files:
        basename = os.path.basename(file)
        trajid = basename.split("_")[0]
        trajids.append(trajid)
    return trajids


def create_pymol_startup_line(pdbs):
    """
    Creates a PyMOL startup line for the given PDB files.
    """
    return ("pymol " + " ".join([os.path.join(ALIGNED_POCKETS_DIR, pdb) for pdb in pdbs]))


In [33]:
# Loading stuff
allosteric_df = read_tm_allosteric_modulators()
dynids = read_dynids()
trajids = read_trajids()
compl_info = read_json_into_dict(COMPL_INFO_PATH)

In [35]:
for allosteric_df_entry in allosteric_df.itertuples():
    uniprot_name = allosteric_df_entry.uniprot_name.strip().lower()
    pdb_code = allosteric_df_entry.pdb.strip().lower()

    dynds_and_trajids = []
    for dynid_key in compl_info:

        # Ignore if fields not correctly completed
        if not compl_info[dynid_key]['up_name']:
            continue

        dynid = dynid_key.strip('dyn')
        compl_uniprot_name = compl_info[dynid_key]['up_name'].strip().lower()
        compl_uniprot_name = correct_wrong_uniprot_names_complinfo(compl_uniprot_name)
        compl_pdb_code = compl_info[dynid_key]['pdb_id'].strip().lower()

        # Check the uniprot names match
        if compl_uniprot_name != uniprot_name:
            continue

        # Check if it's apoform
        if compl_info[dynid_key]["lig_lname"] != False:
            continue

        # Don't compare the PDB with self simulation
        if pdb_code == compl_pdb_code:
            continue

        # Check the dynID is on our working list of dynids
        if dynid not in dynids:
            continue

        trajids = get_trajetory_ids_from_dynid(dynid, compl_info)
        dynds_and_trajids.append((dynid, trajids))

    if dynds_and_trajids:
        print(f"Processing {uniprot_name} ({pdb_code}) with dynids and trajids: {dynds_and_trajids}")
        aligned_pdb = chimera_align(target_pdb=os.path.join(PDB_DIR, f'{pdb_code}.pdb'))

        aligned_files = [aligned_pdb]
        for dynid, trajids in dynds_and_trajids:
            for trajid in trajids:
                aligned_pocket_file = f"dyn{dynid}_traj{trajid}_pocket*tmaligned.pdb"
                aligned_files.append(aligned_pocket_file)
        print(create_pymol_startup_line(aligned_files), end="\n\n")


Processing adrb2_human (6n48) with dynids and trajids: [('751', ['15441', '15442', '15443']), ('769', ['15567', '15568', '15569']), ('763', ['15525', '15526', '15527']), ('829', ['15972', '15973', '15974']), ('863', ['16210', '16211', '16212']), ('962', ['16872', '16873', '16874']), ('970', ['16923', '16924', '16925']), ('951', ['16795', '16796', '16797']), ('926', ['16630', '16631', '16632']), ('979', ['16976', '16977', '16978']), ('1003', ['17144', '17145', '17146']), ('11', ['10185', '10186', '10187']), ('100', ['10953', '10954', '10955']), ('96', ['10916', '10917', '10918']), ('107', ['11015', '11016', '11017']), ('113', ['11073', '11074', '11075']), ('82', ['10799', '10800', '10801']), ('65', ['10647', '10648', '10649']), ('114', ['11085', '11086', '11087']), ('159', ['11490', '11491', '11492']), ('88', ['10850', '10851', '10852']), ('116', ['11105', '11106', '11107'])]
pymol /home/alex/Documents/pocket_tool/results/06_gpcrmd_crystals_overlap/output/aligned_pdbs/6n48_all_aligned.p