In [39]:
import numpy as np
import pandas as pd

### Load the training data:
* normalize XYZ with the 1st Atom
* get the distance of each 2 Atoms
* get the angle of each 3 Atoms
* get the angle of each 5 Atoms

In [108]:
PATH = 'test01.pdb'

In [40]:
data = pd.read_csv(PATH,sep='\s+')

data.columns = ['REMARK','4','1DPP','COMPLIES','X','Y','Z','2.0,','18-NOV-1998']
display(data)

Unnamed: 0,REMARK,4,1DPP,COMPLIES,X,Y,Z,"2.0,",18-NOV-1998
ATOM,1,C1,OLA,1,44.29,24.62,58.04,1.0,0.0
ATOM,2,C2,OLA,1,44.63,25.19,56.66,1.0,0.0
ATOM,3,C3,OLA,1,46.04,24.71,56.29,1.0,0.0
ATOM,4,C4,OLA,1,46.63,25.14,54.94,1.0,0.0
ATOM,5,C5,OLA,1,48.05,24.57,54.94,1.0,0.0
ATOM,6,C6,OLA,1,48.83,25.06,53.72,1.0,0.0
ATOM,7,C7,OLA,1,48.14,24.56,52.45,1.0,0.0
ATOM,8,C8,OLA,1,48.63,24.69,51.0,1.0,0.0
ATOM,9,C9,OLA,1,47.74,24.05,49.94,1.0,0.0
ATOM,10,C10,OLA,1,47.84,24.38,48.59,1.0,0.0


### Get the angle of each 3 Atoms

In [41]:
def get_angle(a, b, c):
    ba = a - b
    bc = c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(cosine_angle)
    return(np.degrees(angle))

def dots_angle(data):
    data.index = range(len(data))
    angles = [0]
    for k in range(len(data)-2):
        a = data.loc[k, ['X', 'Y', 'Z']]
        b = data.loc[k+1, ['X', 'Y', 'Z']]
        c = data.loc[k+2, ['X', 'Y', 'Z']]
        angles.append(get_angle(a, b, c))
    angles.append(0)
    return(angles)

In [44]:
data['dots-angle'] = dots_angle(data)
display(data)

Unnamed: 0,REMARK,4,1DPP,COMPLIES,X,Y,Z,"2.0,",18-NOV-1998,dots-angle
0,1,C1,OLA,1,44.29,24.62,58.04,1.0,0.0,0.0
1,2,C2,OLA,1,44.63,25.19,56.66,1.0,0.0,107.748079
2,3,C3,OLA,1,46.04,24.71,56.29,1.0,0.0,118.529798
3,4,C4,OLA,1,46.63,25.14,54.94,1.0,0.0,104.618734
4,5,C5,OLA,1,48.05,24.57,54.94,1.0,0.0,110.738952
5,6,C6,OLA,1,48.83,25.06,53.72,1.0,0.0,109.130631
6,7,C7,OLA,1,48.14,24.56,52.45,1.0,0.0,127.755005
7,8,C8,OLA,1,48.63,24.69,51.0,1.0,0.0,115.752242
8,9,C9,OLA,1,47.74,24.05,49.94,1.0,0.0,122.155375
9,10,C10,OLA,1,47.84,24.38,48.59,1.0,0.0,126.320389


### Normalize XYZ with the 1st Atom

In [45]:
data[['X','Y','Z']] = data[['X','Y','Z']] - data.loc[0, ['X','Y','Z']]
display(data)

Unnamed: 0,REMARK,4,1DPP,COMPLIES,X,Y,Z,"2.0,",18-NOV-1998,dots-angle
0,1,C1,OLA,1,0.0,0.0,0.0,1.0,0.0,0.0
1,2,C2,OLA,1,0.34,0.57,-1.38,1.0,0.0,107.748079
2,3,C3,OLA,1,1.75,0.09,-1.75,1.0,0.0,118.529798
3,4,C4,OLA,1,2.34,0.52,-3.1,1.0,0.0,104.618734
4,5,C5,OLA,1,3.76,-0.05,-3.1,1.0,0.0,110.738952
5,6,C6,OLA,1,4.54,0.44,-4.32,1.0,0.0,109.130631
6,7,C7,OLA,1,3.85,-0.06,-5.59,1.0,0.0,127.755005
7,8,C8,OLA,1,4.34,0.07,-7.04,1.0,0.0,115.752242
8,9,C9,OLA,1,3.45,-0.57,-8.1,1.0,0.0,122.155375
9,10,C10,OLA,1,3.55,-0.24,-9.45,1.0,0.0,126.320389


### Get the distance of each 2 Atoms

In [46]:
def dots_distance(data):
    data.index = range(len(data))
    distances = []
    for k in range(len(data)-1):
        p1 = data.loc[k, ['X', 'Y', 'Z']]
        p2 = data.loc[k+1, ['X', 'Y', 'Z']]
        squared_dist = np.sum((p1-p2)**2, axis=0)
        dist = np.sqrt(squared_dist)
        distances.append(dist)
    distances.append(0)
    return(distances)

In [48]:
data['dots-distance'] = dots_distance(data)
display(data)

Unnamed: 0,REMARK,4,1DPP,COMPLIES,X,Y,Z,"2.0,",18-NOV-1998,dots-angle,dots-distance
0,1,C1,OLA,1,0.0,0.0,0.0,1.0,0.0,0.0,1.531307
1,2,C2,OLA,1,0.34,0.57,-1.38,1.0,0.0,107.748079,1.534731
2,3,C3,OLA,1,1.75,0.09,-1.75,1.0,0.0,118.529798,1.534764
3,4,C4,OLA,1,2.34,0.52,-3.1,1.0,0.0,104.618734,1.530131
4,5,C5,OLA,1,3.76,-0.05,-3.1,1.0,0.0,110.738952,1.528692
5,6,C6,OLA,1,4.54,0.44,-4.32,1.0,0.0,109.130631,1.529379
6,7,C7,OLA,1,3.85,-0.06,-5.59,1.0,0.0,127.755005,1.536066
7,8,C8,OLA,1,4.34,0.07,-7.04,1.0,0.0,115.752242,1.524893
8,9,C9,OLA,1,3.45,-0.57,-8.1,1.0,0.0,122.155375,1.393341
9,10,C10,OLA,1,3.55,-0.24,-9.45,1.0,0.0,126.320389,1.523975


### Get the angle of each 5 Atoms

In [67]:
import math

# Equation of a plane through three points
# https://kitchingroup.cheme.cmu.edu/blog/2015/01/18/Equation-of-a-plane-through-three-points/
def dots_plane(a, b, c):
    p1 = np.array(list(a))
    p2 = np.array(list(b))
    p3 = np.array(list(c))
    v1 = p3 - p1
    v2 = p2 - p1
    return(np.cross(v1, v2))

# Find the Angle between two planes in 3D.
# https://www.geeksforgeeks.org/angle-between-two-planes-in-3d/
def planes_angle(a1, b1, c1, a2, b2, c2):
	
	d = ( a1 * a2 + b1 * b2 + c1 * c2 )
	e1 = math.sqrt( a1 * a1 + b1 * b1 + c1 * c1)
	e2 = math.sqrt( a2 * a2 + b2 * b2 + c2 * c2)
	d = d / (e1 * e2)
	A = math.degrees(math.acos(d))
	return(A)

def surface_angle(data):
    data.index = range(len(data))
    angles = [0,0]
    for k in range(len(data)-4):
        a = data.loc[k, ['X', 'Y', 'Z']]
        b = data.loc[k+1, ['X', 'Y', 'Z']]
        c = data.loc[k+2, ['X', 'Y', 'Z']]
        d = data.loc[k+3, ['X', 'Y', 'Z']]
        e = data.loc[k+4, ['X', 'Y', 'Z']]
        x1, y1, z1 = dots_plane(a,b,c)
        x2, y2, z2 = dots_plane(c,b,e)
        A = planes_angle(x1, y1, z1, x2, y2, z2)
        angles.append(A)
    angles.append(0)
    angles.append(0)
    return(angles)

In [105]:
data['planes-angle'] = surface_angle(data)
display(data)

Unnamed: 0,REMARK,4,1DPP,COMPLIES,X,Y,Z,"2.0,",18-NOV-1998,dots-angle,dots-distance,planes-angle
0,1,C1,OLA,1,0.0,0.0,0.0,1.0,0.0,0.0,1.531307,0.0
1,2,C2,OLA,1,0.34,0.57,-1.38,1.0,0.0,107.748079,1.534731,0.0
2,3,C3,OLA,1,1.75,0.09,-1.75,1.0,0.0,118.529798,1.534764,5.07524
3,4,C4,OLA,1,2.34,0.52,-3.1,1.0,0.0,104.618734,1.530131,9.337508
4,5,C5,OLA,1,3.76,-0.05,-3.1,1.0,0.0,110.738952,1.528692,24.068684
5,6,C6,OLA,1,4.54,0.44,-4.32,1.0,0.0,109.130631,1.529379,113.983438
6,7,C7,OLA,1,3.85,-0.06,-5.59,1.0,0.0,127.755005,1.536066,9.021707
7,8,C8,OLA,1,4.34,0.07,-7.04,1.0,0.0,115.752242,1.524893,11.185341
8,9,C9,OLA,1,3.45,-0.57,-8.1,1.0,0.0,122.155375,1.393341,20.419322
9,10,C10,OLA,1,3.55,-0.24,-9.45,1.0,0.0,126.320389,1.523975,162.484362


In [111]:
data.to_csv('new'+PATH)