In [9]:
import numpy as np
from tqdm.auto import tqdm

In [10]:
import pandas as pd
from pathlib import Path

In [11]:
from Bio.PDB import PDBParser
import nglview as nv

In [12]:
def structs_to_pd(structs):
    records = []
    for residue in structs.get_residues():
        d = {}
        full_id = residue.get_full_id()
        d['nuc'] = full_id[2]
        d['pos'] = full_id[3][1]
        for atom in residue.get_atoms():
            d[atom.name] = atom.coord
        records.append(d)
    return pd.DataFrame(records)

In [13]:
from scipy.spatial.distance import pdist, squareform, euclidean, cosine

In [14]:
def df_to_distance_map(df):
    c2 = np.vstack(df['C2'].values)
    return squareform(pdist(c2,'euclidean'))


In [15]:
def df_to_angles_map(df):
    c2 = np.vstack(df['C2'].values)
    с4 = np.vstack(df['C4'].values)
    с6 = np.vstack(df['C6'].values)
    c4c2 = с4 - c2
    c6c2 = с6 - c2
    normal = np.cross(c4c2,c6c2)
    return 1 - squareform(pdist(normal,'cosine'))

In [19]:
!mkdir ../data/nsp_distances_angles2

In [20]:
for file in tqdm(Path('../nsp_optim_full/').glob('*.pdb')):
    name = file.name[:12]
    struct = PDBParser().get_structure('name', file)
    df = structs_to_pd(struct)
    distances = df_to_distance_map(df)
    angles = df_to_angles_map(df)
    features = np.stack([distances,angles],axis=-1)
    np.save('../data/nsp_distances_angles2/'+name+'.npy',features,allow_pickle=False)

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…






