In [1]:
from parsing.parse_functions import parse_pdb_files
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import matplotlib
from matplotlib.patches import Circle
from mpl_toolkits.mplot3d import Axes3D   # noqa: F401  (needed for 3-D projection)

from utils.scale_low_res_coordinates import scale_low_res_coords
from utils.pucker_data_functions import determine_pucker_data

In [2]:
input_pdb_dir = "/Users/kaisardauletbek/Documents/GitHub/RNA-Classification/data/rna2020_pruned_pdbs/"
suites = parse_pdb_files(input_pdb_dir, input_pdb_folder=input_pdb_dir)

puckers = ['c2c2', 'c2c3', 'c3c2', 'c3c3']
pucker_indices = {}
for pucker in puckers:
    indices, _ = determine_pucker_data(suites, pucker)
    pucker_indices[pucker] = indices

# -- ensure every index array is really integer-typed  -----------------
pucker_indices = {k: np.asarray(v, dtype=np.intp)    # <-- np.intp = “platform int”
                  for k, v in pucker_indices.items()}


In [3]:
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def spherical_to_vec(theta_deg: np.ndarray, phi_deg: np.ndarray) -> np.ndarray:
    t, p = np.radians(theta_deg), np.radians(phi_deg)
    return np.column_stack([np.sin(t) * np.cos(p),
                            np.sin(t) * np.sin(p),
                            np.cos(t)])

def arc_distance(a: np.ndarray, b: float) -> np.ndarray:
    """Shortest signed arc distance between vectors of angles a and scalar b (radians)."""
    d = (a - b + np.pi) % (2 * np.pi) - np.pi
    return d

def exponential_map(V, p):
    """
    V - point cloud N x R^m
    p - point of tangency R^m
    """
    N, M = V.shape[0], V.shape[1]
    V_mean = V.mean(axis=0)
    # check if points are centered at 0, center them
    V -= V_mean
    V = np.column_stack([V, np.zeros(N)])
    V_norm = np.linalg.norm(V, axis=1)[:, None]
    
    return np.cos(V_norm) * p + np.sin(V_norm) * (V / V_norm)
    

In [4]:
# scale only the distance‐variance, leave α‐variance and both means alone
scaled_coords, lambda_d, lambda_alpha = scale_low_res_coords(
    suites,
    scale_distance_variance=True,
    scale_alpha_variance=False,
    preserve_distance_mean=True,
    preserve_alpha_mean=True,
    store_attr="scaled_dvar_only"
)

d2_s, d3_s, alpha_s, theta1, phi1, theta2, phi2 = scaled_coords.T
N = len(d2_s)

In [5]:
x = np.array([d2_s[0], d3_s[0], alpha_s[0], theta1[0], phi1[0], theta2[0], phi2[0]])
x_prime = np.array([d2_s[1], d3_s[1], alpha_s[1], theta1[1], phi1[1], theta2[1], phi2[1]])

In [6]:
np.dot(x, x_prime)

20242.6872313057

In [9]:
def get_distance(x, x_prime):
    return np.dot(x, x_prime)

def get_distance_matrix(scaled_coords):
    d2_s, d3_s, alpha_s, theta1, phi1, theta2, phi2 = scaled_coords.T
    N = len(d2_s)
    distance_matrix = np.zeros((N, N))
    for i in range(N):
        for j in range(i+1, N):
            x = np.array([d2_s[i], d3_s[i], alpha_s[i], theta1[i], phi1[i], theta2[i], phi2[i]])
            x_prime = np.array([d2_s[j], d3_s[j], d3_s[j], theta1[j], phi1[j], theta2[j], phi2[j]])
            distance_matrix[i, j] = get_distance(x, x_prime)
    return distance_matrix

distance_matrix = get_distance_matrix(scaled_coords)


array([[    0.        , 16270.74695766, 18219.56040179, ...,
        17364.28387503, 17031.3551825 , 17960.32560052],
       [    0.        ,     0.        , 18255.48784867, ...,
        17376.94080798, 17057.06637522, 18008.67226883],
       [    0.        ,     0.        ,     0.        , ...,
        19495.24407555, 19116.32419855, 20188.06514501],
       ...,
       [    0.        ,     0.        ,     0.        , ...,
            0.        , 18181.73247492, 19196.02614517],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        , 18818.5342806 ],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ]])