In [2]:
%pip install biopandas prody

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from biopandas.pdb import PandasPdb
from prody import parsePDBHeader
from typing import Optional

def read_pdb_to_dataframe(
    pdb_path: Optional[str] = None,
    model_index: int = 1,
    parse_header: bool = True,
    ) -> pd.DataFrame:
    """
    Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata.

    Args:
        pdb_path (str, optional): Path to a local PDB file to read. Defaults to None.
        model_index (int, optional): Index of the model to extract from the PDB file, in case
            it contains multiple models. Defaults to 1.
        parse_header (bool, optional): Whether to parse the PDB header and extract metadata.
            Defaults to True.

    Returns:
        pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row
            per atom
    """
    atomic_df = PandasPdb().read_pdb(pdb_path)
    if parse_header:
        header = parsePDBHeader(pdb_path)
    else:
        header = None
    atomic_df = atomic_df.get_model(model_index)
    if len(atomic_df.df["ATOM"]) == 0:
        raise ValueError(f"No model found for index: {model_index}")

    return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"]]), header

In [6]:
df, df_header = read_pdb_to_dataframe('ranked_0.pdb')
df.head(1000)

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx,model_id
0,ATOM,1,,N,,MET,,A,1,,...,7.386,2.590,1.0,96.19,,,N,,0,1
1,ATOM,2,,H,,MET,,A,1,,...,7.164,2.418,1.0,96.19,,,H,,1,1
2,ATOM,3,,H2,,MET,,A,1,,...,6.910,1.885,1.0,96.19,,,H,,2,1
3,ATOM,4,,H3,,MET,,A,1,,...,8.373,2.438,1.0,96.19,,,H,,3,1
4,ATOM,5,,CA,,MET,,A,1,,...,6.994,3.950,1.0,96.19,,,C,,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ATOM,996,,HE22,,GLN,,A,62,,...,-4.281,9.124,1.0,96.94,,,H,,995,1
996,ATOM,997,,OE1,,GLN,,A,62,,...,-2.590,9.825,1.0,96.94,,,O,,996,1
997,ATOM,998,,N,,LYS,,A,63,,...,2.124,7.416,1.0,97.73,,,N,,997,1
998,ATOM,999,,H,,LYS,,A,63,,...,1.758,7.395,1.0,97.73,,,H,,998,1


In [None]:
%pip install plotly
%pip install nbformat 

In [None]:
import plotly.express as px

fig = px.scatter_3d(df, x='x_coord', y='y_coord', z='z_coord', color='element_symbol')
fig.update_traces(marker_size = 4)

fig.show()

In [16]:
X = df[["x_coord", "y_coord", "z_coord"]].values
X

array([[-12.696,   7.386,   2.59 ],
       [-13.667,   7.164,   2.418],
       [-12.151,   6.91 ,   1.885],
       ...,
       [ 24.255,  -1.284,  -9.781],
       [ 23.914,  -2.472,  -9.573],
       [ 25.122,  -0.873, -10.573]])

In [17]:
residues = df["residue_number"]
residues

0        1
1        1
2        1
3        1
4        1
        ..
1226    76
1227    76
1228    76
1229    76
1230    76
Name: residue_number, Length: 1231, dtype: int64

In [31]:
import numpy as np
def k(x, y, sig=1):
    r = np.linalg.norm(x - y)
    return np.exp(-0.5 * np.square(r) / np.square(sig))
def GaussianKernel(X, sig=1):
    n = X.shape[0]
    kernel = np.empty((n,n))
    for i in range(n):
        for j in range(n):
            kernel[i, j] = k(X[i], X[j], sig)
    return kernel / n

K = GaussianKernel(X)

In [33]:
print(np.linalg.matrix_rank(K))
print(K.shape)
print(np.linalg.eig(K))
# import matplotlib.pyplot as plt
# xx = np.linspace(0,100,101)
# plt.plot(xx, np.linalg.eig(K)[:101])
# plt.show()

1231
(1231, 1231)
(array([0.00221413, 0.0022034 , 0.00220167, ..., 0.00063048, 0.00062096,
       0.00062392]), array([[-1.18849772e-03, -5.39121262e-03,  4.97941846e-03, ...,
         7.77202130e-05,  3.83087825e-06, -2.43578048e-04],
       [-7.86261072e-04, -3.57281368e-03,  3.30337817e-03, ...,
        -7.49990392e-03, -1.31726431e-04, -1.67599225e-02],
       [-9.72574467e-04, -4.21717791e-03,  3.92374118e-03, ...,
         3.83366006e-03,  2.14125233e-05,  1.25289776e-02],
       ...,
       [-1.78655527e-06, -2.68456228e-07, -5.02279670e-08, ...,
         4.26022931e-06, -1.71599814e-08,  6.94956464e-08],
       [-8.81368892e-07, -1.32482414e-07, -2.47888575e-08, ...,
         1.03277212e-05, -6.50858432e-08,  2.36012544e-07],
       [-7.88820637e-07, -1.19142008e-07, -2.23099389e-08, ...,
        -1.13100722e-05,  6.56225823e-08, -2.44082803e-07]]))
