# ALIGN ALL CHAINS ON THE SAME REFERENCE

## Imports

In [72]:
from JSU_lib import *

## Functions

In [79]:
def align_structures(struc_files, struc_dir, trans_dir):
    """
    """
    left_handed = []
    right_handed = []
    orig_centroids = {}
    rot_matrices = {}
    for i, struc_file in enumerate(sorted(struc_files)):
        if i % 100 == 0:
            print(i)
        pdb_in = os.path.join(struc_dir, struc_file)
        struc_name, _, ext = struc_file.split(".")
        struc_name = "_".join(struc_name.split("_")[:2])
        pdb_out = os.path.join(trans_dir, f'{struc_name}.trans.{ext}')
        df = PDBXreader(inputfile = pdb_in).atoms(format_type = "pdb", excluded = ())
        df_trans = df.copy(deep = True)
        
        # Step 1: Translating points to (0, 0 ,0)
    
        XYZ = list(zip(df['Cartn_x'], df['Cartn_y'], df['Cartn_z']))
        points = np.array(XYZ)
        centroid = points.mean(axis = 0)
        translated_points = points - centroid
        orig_centroids[struc_name]  = centroid
        
        # Step 2: Perform PCA to find the principal components
        pca = PCA(n_components = 3)  # Use 3 components to keep 3D space
        pca.fit(translated_points)
        
        # Step 3: Construct a rotation matrix
        # The first two principal components
        pc1 = pca.components_[0]
        pc2 = pca.components_[1]
        # The third axis can be the cross product of the first two to ensure orthogonality
        pc3 = np.cross(pc1, pc2) 

        # The rotation matrix aligns the PCs with the x, y, and z axes
        rotation_matrix = np.array([pc2, pc1, pc3])
        
        # Calculate the determinant to check if the system is right-handed
        determinant = np.linalg.det(rotation_matrix)

        # If the determinant is negative, invert the third axis
        if determinant < 0:
            correction_matrix = np.diag([-1, -1, -1])
            rotation_matrix = np.dot(rotation_matrix, correction_matrix)
            left_handed.append(struc_name)
        else:
            right_handed.append(struc_name)
        
        # Step 4: Apply the rotation matrix
        # Need to transpose the matrix to align dimensions correctly
        rotated_points = np.dot(translated_points, rotation_matrix.T)
    
        df['Cartn_x'] = rotated_points[:, 0]  # Assign all x-coordinates
        df['Cartn_y'] = rotated_points[:, 1]  # Assign all y-coordinates
        df['Cartn_z'] = rotated_points[:, 2]  # Assign all z-coordinates

        XYZ_trans = list(zip(df['Cartn_x'], df['Cartn_y'], df['Cartn_z']))

        if not os.path.isfile(pdb_out):
    
            w = PDBXwriter(outputfile = pdb_out)
            w.run(df, format_type = "pdb")

        RMSD = round(calculate_rmsd(pdb_in,  pdb_out), 4)

        print(struc_name, RMSD)

        rot_matrices[struc_name] = rotation_matrix

    return right_handed, left_handed, orig_centroids, rot_matrices 

## Input data

In [74]:
rep_chains_dir = './../DATA/clean_rep_chains'
trans_chains_dir = './../DATA/trans_rep_chains_V2'

In [75]:
rep_chains = [el for el in os.listdir(rep_chains_dir) if el.endswith(".pdb")]

In [76]:
print(len(rep_chains))

7950


In [77]:
print(rep_chains[:5])

['3f5o_E.clean.pdb', '3q2g_B.clean.pdb', '8qyt_A.clean.pdb', '2hgs_A.clean.pdb', '3e04_A.clean.pdb']


measure inertia #1-50 perChain true showEllipsoid true

## Transforming clean structures

In [80]:
RH, LH, orig_centroids, rot_matrices = align_structures(sorted(rep_chains), rep_chains_dir, trans_chains_dir)

0
1a52_A 0.0005
1a5h_B 0.0005
1a5r_A 0.0005
1a7s_A 0.0005
1a9w_E 0.0005
1ags_B 0.0005
1aii_A 0.0005
1aje_A 0.0005
1ajj_A 0.0005
1aly_A 0.0005
1am4_A 0.0005
1aqd_K 0.0005
1au1_A 0.0005
1au1_B 0.0005
1aue_B 0.0005
1auk_A 0.0005
1avr_A 0.0005
1aye_A 0.0005
1ayk_A 0.0005
1aze_A 0.0005
1b1c_A 0.0005
1b2i_A 0.0005
1b3o_B 0.0005
1b3u_A 0.0005
1b50_B 0.0005
1b55_B 0.0005
1b6a_A 0.0005
1b6c_F 0.0005
1b8m_A 0.0005
1b9o_A 0.0005
1bbc_A 0.0005
1bbo_A 0.0005
1bci_A 0.0005
1bd9_A 0.0005
1bda_A 0.0005
1bf9_A 0.0005
1bhg_B 0.0005
1bhi_A 0.0005
1bhx_F 0.0005
1bla_A 0.0005
1bld_A 0.0005
1blx_A 0.0005
1bm6_A 0.0005
1bmo_A 0.0005
1bnl_A 0.0005
1bo1_A 0.0005
1bor_A 0.0005
1boy_A 0.0005
1bp3_B 0.0005
1bp5_A 0.0005
1bqt_A 0.0005
1buv_M 0.0005
1byg_A 0.0005
1bzs_A 0.0005
1c07_A 0.0005
1c15_A 0.0005
1c4z_A 0.0005
1c5n_L 0.0005
1c7p_A 0.0005
1c7u_A 0.0005
1c9q_A 0.0005
1c9y_A 0.0005
1cb6_A 0.0005
1cdb_A 0.0005
1cee_B 0.0005
1cf4_A 0.0005
1cjy_B 0.0005
1ck7_A 0.0005
1ckl_A 0.0005
1cm0_B 0.0005
1cm8_A 0.0005
1cn4

In [81]:
len(rot_matrices)

7950

In [82]:
len(orig_centroids)

7950

In [83]:
print(len(RH), len(LH))

0 7950


color byattribute a:bfactor #!1-50 target abcs palette 0,blue:0.25,green:0.5,yellow:0.75,orange:1,red

In [84]:
len(os.listdir(trans_chains_dir))

7950

In [85]:
len([el for el in os.listdir(trans_chains_dir) if el.endswith("pdb")])

7950

In [86]:
save_to_pickle(rot_matrices, "./results/PDB_rot_matrices_ALL_CLUST.pkl")
save_to_pickle(orig_centroids, "./results/PDB_orig_centroids_ALL_CLUST.pkl")