In [1]:
from pyrosetta import *
import os
import string
import argparse
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

In [2]:
init()

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python311.ubuntu 2024.39+release.59628fbc5bc09f1221e1642f1f8d157ce49b1410 2024-09-23T07:49:48] retrieved from: http://www.pyrosetta.org
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.Release.python311.ubuntu r387 2024.39+release.59628fbc5b 59628fbc5b

In [3]:
os.chdir("/home/cadeniran/storage/cadeniran/mpp/memscan-prot/")
from modules import AddSpanlessMembraneMover
from modules import HelixTools
from modules import HydrophobicMoment
from modules import analyze_y
from modules import sns_heatmap
from modules import AH_CA_rmsd

In [56]:
def myrmsd(native, best):
    number_of_residues = native.size()
    pymol = PyMOLMover()

    #initiate the spanless membrane mover
    fm = AddSpanlessMembraneMover()
    fm.add_membrane_virtual(native)
    fm.apply(native)

    #move the protein along the x and y axes to keep embedding the same
    cmass = pyrosetta.rosetta.core.pose.center_of_mass(native, 1, native.size()-1)
    move_xy = pyrosetta.rosetta.numeric.xyzVector_double_t(-cmass[0], -cmass[1],0)

    #run the translation
    shifted = native.clone()
    copy_best = best.clone()
    translation_mover = pyrosetta.rosetta.protocols.rigid.WholeBodyTranslationMover(move_xy)
    translation_mover.apply(shifted)
 
    ht = HelixTools()
    helix_normal = ht.calculate_screw_axis(shifted)
    angle_with_x = ht.calc_angle(helix_normal,'x')
    angle_with_y = ht.calc_angle(helix_normal,'y')
    angle_with_z = ht.calc_angle(helix_normal,'z')
    print(angle_with_x, angle_with_y, angle_with_z)

    helix_best_normal = ht.calculate_screw_axis(best)
    best_angle_with_x = ht.calc_angle(helix_best_normal,'x')
    best_angle_with_y = ht.calc_angle(helix_best_normal,'y')
    best_angle_with_z = ht.calc_angle(helix_best_normal,'z')
    print(best_angle_with_x, best_angle_with_y, best_angle_with_z)


    x_diff = best_angle_with_x - angle_with_x

    align_x = pyrosetta.rosetta.protocols.rigid.WholeBodyRotationMover(pyrosetta.rosetta.numeric.xyzVector_double_t(0,1,0), pyrosetta.rosetta.core.pose.center_of_mass(shifted, 1, shifted.size()-1), x_diff)
    align_x.apply(shifted)

    helix_best_normal = ht.calculate_screw_axis(shifted)
    best_angle_with_x = ht.calc_angle(helix_best_normal,'x')
    best_angle_with_y = ht.calc_angle(helix_best_normal,'y')
    best_angle_with_z = ht.calc_angle(helix_best_normal,'z')
    print(best_angle_with_x, best_angle_with_y, best_angle_with_z)

    y_diff = best_angle_with_y - angle_with_y

    align_y = pyrosetta.rosetta.protocols.rigid.WholeBodyRotationMover(pyrosetta.rosetta.numeric.xyzVector_double_t(0,1,0), pyrosetta.rosetta.core.pose.center_of_mass(shifted, 1, shifted.size()-1), y_diff)
    align_y.apply(shifted)

    helix_best_normal = ht.calculate_screw_axis(shifted)
    best_angle_with_x = ht.calc_angle(helix_best_normal,'x')
    best_angle_with_y = ht.calc_angle(helix_best_normal,'y')
    best_angle_with_z = ht.calc_angle(helix_best_normal,'z')
    print(best_angle_with_x, best_angle_with_y, best_angle_with_z)

    z_diff = best_angle_with_z - angle_with_z

    align_z = pyrosetta.rosetta.protocols.rigid.WholeBodyRotationMover(pyrosetta.rosetta.numeric.xyzVector_double_t(0,0,1), pyrosetta.rosetta.core.pose.center_of_mass(shifted, 1, shifted.size()-1), z_diff)
    align_z.apply(shifted)

    helix_best_normal = ht.calculate_screw_axis(shifted)
    best_angle_with_x = ht.calc_angle(helix_best_normal,'x')
    best_angle_with_y = ht.calc_angle(helix_best_normal,'y')
    best_angle_with_z = ht.calc_angle(helix_best_normal,'z')
    print(best_angle_with_x, best_angle_with_y, best_angle_with_z)

    shifted.dump_pdb("shifted_native.pdb")
 
    ca_shifted = []
    ca_best = []

    #calculate the CA positions for the shifted original and the best structures
    for i in range(1,shifted.size()):
        ca_shifted.append(np.array(shifted.residue(i).xyz('CA')))
    for j in range(1, best.size()):
        ca_best.append(np.array(best.residue(j).xyz('CA')))
  
    shifted_df = pd.DataFrame(ca_shifted)
    best_df = pd.DataFrame(ca_best)
    
    shifted_df.to_csv('shifted.csv', sep=' ', index=False, header=False)
    best_df.to_csv('best.csv', sep=' ', index=False, header=False)

    total = 0
    for k in range(number_of_residues):
        if all(item < 0 for item in shifted_df[2]) is all(item < 0 for item in best_df[2]):
            total = total + np.square((ca_best[k][0] - ca_shifted[k][0])) + np.square((ca_best[k][1] - ca_shifted[k][1])) + np.square((ca_best[k][2] - ca_shifted[k][2]))
        else:
            total = total + np.square((abs(ca_shifted[k][0]) - abs(ca_best[k][0]))) + np.square((abs(ca_shifted[k][1]) - abs(ca_best[k][1]))) + np.square((abs(ca_shifted[k][2]) - abs(ca_best[k][2])))

    average = total/len(ca_best)
    squared_average = np.sqrt(average)

    return squared_average

In [46]:
def as_matrix(pose, mask = None):
    """
    Return an (n, 3) matrix, where n is the number of atoms in the pose and 3
    refers to the 3 dimensional X, Y and Z degrees of freedom of each atom.
    """
    
    coords = []
    index = 0
    for res_index in range(1, pose.total_residue() + 1):
        residue = pose.residue(res_index)
        for atom_index in range(1, residue.natoms() + 1):
            if residue.type().is_virtual(atom_index):
                continue
            if mask == None or mask[index] == 1:
                coords.append(residue.xyz(atom_index))
            index += 1
    return np.array(coords)

In [47]:
def rmsd(pose, reference_pose, movable_mask = None, reference_mask = None):
    """
    Calculate the RMSD between this pose and a reference pose. Also
    returns the number of atoms considered for this calculation. If movable
    and references masks are provided, only consider the atoms whose index
    in the corresponding mask is set to True.
    """

    movable_coords   = as_matrix(pose, movable_mask)
    reference_coords = as_matrix(reference_pose, reference_mask)

    assert len(movable_coords) == len(reference_coords), \
        "Movable (%d) and Reference (%d) number of atoms do not match" % \
        (len(movable_coords), len(reference_coords))

    n_atoms          = len(reference_coords)
    distance         = reference_coords - movable_coords

    return n_atoms, np.sqrt(np.sum(distance * distance) / n_atoms)

### RMSD

In [36]:
protein = "1eak_A_renum"
protein_tag = protein.split(sep='_')[0]
multiple_tag = protein.split(sep='_')[1]

native = pose_from_pdb('input_pdbs/{}_{}_renum.pdb'.format(protein_tag,multiple_tag))
shifted = pose_from_pdb('shifted/shift_{}.pdb'.format(protein_tag))
best = pose_from_pdb('results/{}_{}/output_pdbs/{}_{}_renum_best_pose_overall.pdb'.format(protein_tag,multiple_tag,protein_tag,multiple_tag))

core.import_pose.import_pose: File 'input_pdbs/1eak_A_renum.pdb' automatically determined to be of type PDB
core.conformation.Conformation: Found disulfide between residues 29 34
core.conformation.Conformation: Found disulfide between residues 202 228
core.conformation.Conformation: Found disulfide between residues 216 243
core.conformation.Conformation: Found disulfide between residues 260 286
core.conformation.Conformation: Found disulfide between residues 274 301
core.conformation.Conformation: Found disulfide between residues 318 344
core.conformation.Conformation: Found disulfide between residues 332 359
core.import_pose.import_pose: File 'shifted/shift_1eak.pdb' automatically determined to be of type PDB
core.conformation.Conformation: Found disulfide between residues 29 34
core.conformation.Conformation: Found disulfide between residues 202 228
core.conformation.Conformation: Found disulfide between residues 216 243
core.conformation.Conformation: Found disulfide between residue

In [37]:
print("The RMSD is: {}".format(AH_CA_rmsd(native, best)))

WELCOME TO THE WORLD OF MEMBRANE PROTEINS...

Setting initial membrane center and normal to position used by the user-provided membrane residue
9.9713079900983 93.26343851084194 99.41187784869511
19.1799589965513 81.11276203288571 73.14531032855724
18.909501568985434 93.26343851084195 108.60462796585799
18.909501568985494 93.26343851084194 108.60462796585806
19.442570414908797 84.55220655192204 108.60462796585804
The RMSD is: 24.40341556572148


In [61]:
protein = "6ei6_A_renum"
protein_tag = protein.split(sep='_')[0]
multiple_tag = protein.split(sep='_')[1]

native = pose_from_pdb('input_pdbs/{}_{}_renum.pdb'.format(protein_tag,multiple_tag))
shifted = pose_from_pdb('shifted/shift_{}.pdb'.format(protein_tag))
best = pose_from_pdb('results/{}_{}/output_pdbs/{}_{}_renum_best_pose_overall.pdb'.format(protein_tag,multiple_tag,protein_tag,multiple_tag))

core.import_pose.import_pose: File 'input_pdbs/6ei6_A_renum.pdb' automatically determined to be of type PDB
core.conformation.Conformation: Found disulfide between residues 1 8
core.import_pose.import_pose: File 'shifted/shift_6ei6.pdb' automatically determined to be of type PDB
core.conformation.Conformation: Found disulfide between residues 1 8
core.import_pose.import_pose: File 'results/6ei6_A/output_pdbs/6ei6_A_renum_best_pose_overall.pdb' automatically determined to be of type PDB
core.conformation.Conformation: Found disulfide between residues 1 8


In [60]:
print("The RMSD is: {}".format(myrmsd(native, best)))

WELCOME TO THE WORLD OF MEMBRANE PROTEINS...

Setting initial membrane center and normal to position used by the user-provided membrane residue
66.81190641123241 156.79378378735262 89.13212284059695
58.451960855816964 143.76023828065902 74.02717032297913
67.20937795711127 156.79378378735265 85.85769740967923
67.20937795711137 156.79378378735274 85.85769740967913
70.47396369375566 160.00420592060772 85.85769740967912
The RMSD is: 21.32092833706637


In [62]:
movable_mask = None
reference_mask = None

count, rms = rmsd(best, shifted, movable_mask, reference_mask)
print("RMSD: %6.3f angstrom (%3d atoms)" % (rms, count))

RMSD: 21.516 angstrom (3982 atoms)
