In [1]:
import numpy as np
import pandas as pd
import numpy as np
from scipy.spatial.transform import Rotation as R
from tqdm.auto import tqdm

def angle(vector_1, vector_2): # find angle between vectors, radian
    unit_vector_1 = vector_1 / np.linalg.norm(vector_1)
    unit_vector_2 = vector_2 / np.linalg.norm(vector_2)
    dot_product = np.dot(unit_vector_1, unit_vector_2)
    return np.arccos(dot_product)

def rotate_system(a: np.array, b: np.array, coord: np.array) -> np.array:
    """
    Parameters
    ----------
    a : rotatable vector
    b : direction vector
    coord : coordinations of our system
    Returns
    -------
    System will be rotate at the same angle as we rotate vector a to make it parallel with vector b
    """
    if np.linalg.norm(np.cross(a, b)) == 0: # calc mult vector, in case of parallel vector we choose any normal vector
        if a[0] == 0:
            rv = [np.pi, 0, 0]
        else:
            rv = np.array([-(a[1]+a[2])/a[0], 1, 1])/np.linalg.norm([-(a[1]+a[2])/a[0], 1, 1]) * np.pi
    else:
        rv = np.cross(a, b)/np.linalg.norm(np.cross(a, b)) * angle(a, b)
    
    r = R.from_rotvec(rv)
    return r.apply(coord)


# |------------------------- readers -------------------------|


def read_xyz(file):
    with open(file) as file:
        system = []
        for cnt, line in enumerate(file):
            if cnt > 1:
                system.append(line.split())
    df = pd.DataFrame(system, columns = ('atom', 'x', 'y', 'z'))
    return df.astype({'atom': str, 'x': float, 'y': float, 'z': float})


def read_mol2(file):
    read = False
    with open(file) as file:
        system = []
        for line in file:
            if '@<TRIPOS>BOND' in line:
                read = False
            if read:
                system.append(line.split())
            
            if '@<TRIPOS>ATOM' in line:
                read = True
    df = pd.DataFrame(system, columns = ('atnum', 'atom', 'x', 'y', 'z', 'atType',
                                         'resnum', 'res', 'something'))
    return df.astype({'atnum': np.uint32, 'resnum': np.uint32, 'atom': str, 'x': float, 'y': float, 'z': float})
        

def read_pdb(file, first_word='HETATM'):
    with open(file) as file:
        system = []
        for line in file:
            if first_word in line:
                system.append(line.split()[1:])           
    df = pd.DataFrame(system, columns = ('atnum', 'atom', 'res', 'resnum', 'x', 'y', 'z', 'smth0', 'smth1', 'atType'))
    df = df.assign(resnum=[1]*df.shape[0])
    return df.astype({'atnum': np.uint32, 'resnum': np.uint32, 'atom': str, 'x': float, 'y': float, 'z': float})


def read_gro(file):
    df = []
    with open(file) as file:
        for cnt, line in enumerate(file):
            if (cnt > 1) and len(line) >= 40:
                df.append([line[0:5], line[5:10].strip(), line[10:15].strip(),
                           line[15:20], line[20:28], line[28:36], line[36:44]])
            elif len(line.split()) == 3:
                box = line
    df = pd.DataFrame(df, columns=('resnum', 'res', 'atom', 'atnum', 'x', 'y', 'z'))        
    return df.astype({'atnum': np.uint32, 'resnum': np.uint32, 'atom': str, 'x': float, 'y': float, 'z': float}), box

# |------------------------- writers -------------------------|


def write_xyz(df, filename='test.xyz'):
    with open(filename, 'w') as file:
        file.write(f'{df.shape[0]}\n\n')
        for i in range(df.shape[0]):
            file.write('{:3s}{:17.9f}{:17.9f}{:17.9f}\n'.format(df.iloc[i].atom, df.iloc[i].x, df.iloc[i].y, df.iloc[i].z))


def write_pdb(system, file='test.pdb'):
    system['atType'] = system.atType.apply(lambda x: x[0])
    with open(file, 'w') as file:
        for i in range(system.shape[0]):
            if i > 1 and system.iloc[i].resnum != system.iloc[i-1].resnum:
                file.write('TER\n')

            file.write('HETATM{:5d} {:>4s} {:<4s} {:<4d}    {:8.3f}{:8.3f}{:8.3f}  0.00  0.00               {:s}\n'
                       .format(*system.iloc[i][['atnum', 'atom', 'res', 'resnum', 'x', 'y', 'z', 'atType']]))
        file.write('END\n')


def write_gro(system, file='test.gro', box='10 10 10\n'):
    # system['atType'] = system.atType.apply(lambda x: x[0])
    system[['x', 'y', 'z']] /= 10
    system[['x', 'y', 'z']] -= system[['x', 'y', 'z']].min()
    system['x'] += 7.4
    with open(file, 'w') as file:
        file.write(f'!comment\n{system.shape[0]}\n')
        for i in tqdm(range(system.shape[0])):
            file.write('{:5d}{:<5s}{:5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n'
                       .format(*system.iloc[i][['resnum', 'res', 'atom', 'atnum', 'x', 'y', 'z']]))

        file.write(box)


# |------------------------- build mems -------------------------|


def farest_atom(df, n=0):
    coords = df[['x', 'y', 'z']].values
    dist = np.sum((coords - coords[:, np.newaxis])**2, axis=-1)**0.5
    return dist[n].argmax()


def random_move(coords, dxyz):
    coords[['x', 'y', 'z']] += np.random.random(3) * np.array(dxyz)
    return coords


check_min_dist = lambda coords0, coords1, r_c: np.sum((coords0 - coords1[:, np.newaxis]) ** 2, axis=-1).min() > r_c**0.5


def random_box(df, box_size, random_angle, r_c=1, iters=1000):
    system = df.copy()
    for i in tqdm(range(iters)):
        new_mol = random_move(df.copy(), box_size)
        random_angle = np.random.random(3) * np.array(random_angle)
        new_mol[['x', 'y', 'z']] = rotate_system(random_angle, np.array([0, 1, 1.]), new_mol[['x', 'y', 'z']])
        if check_min_dist(new_mol[['x', 'y', 'z']].values, system[['x', 'y', 'z']].values, r_c):
            new_mol['resnum'] = system.resnum.iloc[-1] + 1
            system = pd.concat([system, new_mol])
            
    system['atnum'] = np.arange(1, system.shape[0] + 1)
    return system


def generate_boxes(n_iters, top, *args, **kwargs):
    systems = []
    for i in tqdm(range(n_iters)):
        systems.append(random_box(*args, **kwargs))
        if i % 10 == 0:
            systems.sort(key=lambda x: -x.shape[0])
            systems = systems[:10]
    return systems[:10]


def make_raws_box(df, system, r_c=14.5, d_y=7, d_z=7):
    for i in tqdm(range(d_y)):
        for j in range(d_z):
            if i+j == 0:
                continue
            for _ in range(2000):
                new_mol = df.copy()
                random_angle = np.random.random()*360
                r = R.from_euler('x', random_angle, degrees=True)
                new_mol[['x', 'y', 'z']] = r.apply(new_mol[['x', 'y', 'z']])
                new_mol[['x', 'y', 'z']] += np.array([0, r_c, r_c]) * np.array([0, i, j])
                if check_min_dist(new_mol[['x', 'y', 'z']].values,
                                  system[['x', 'y', 'z']].values, 1.2):
                    break
            else:
                return None

            new_mol['resnum'] = system.resnum.iloc[-1] + 1
            system = pd.concat([system, new_mol])


    system['atnum'] = np.arange(1, system.shape[0] + 1)
    return system


def make_layer(*args, **kmargs):
    n = 1
    while True:
        result = make_raws_box(*args, **kmargs)
        if result is not None:
            break
        n+=1
        print(f'attempt {n}')
    return result

# df = read_mol2('AMA.mol2')
df = read_pdb('AMA_charmm.pdb', 'ATOM')
atoms = (df[df.atom == 'C3'].index[0],
         df[df.atom == 'C28'].index[0])

df.loc[:, ['x', 'y', 'z']] = df.loc[:, ['x', 'y', 'z']] - df.loc[[atoms[0]], ['x', 'y', 'z']].values
df[['x', 'y', 'z']] = rotate_system(df.loc[[atoms[1]], ['x', 'y', 'z']], np.array([1, 0, 0.]), df[['x', 'y', 'z']])

n = 10
step = 11.1
system0 = make_layer(df, df.copy(), step, n, n)
df[['x', 'y', 'z']] = rotate_system(df.loc[[atoms[1]], ['x', 'y', 'z']], np.array([-1, 0, 0.]), df[['x', 'y', 'z']])
d = 1
df['x'] += system0.x.max() - df.x.min() - d
df.x.min(), system0.x.max()
system = make_layer(df, pd.concat([system0, df]), step, n, n)
write_pdb(system, 'test.pdb')
# df, box = read_gro('em_20.gro')
# df.head()

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

attempt 2


  0%|          | 0/10 [00:00<?, ?it/s]

In [2]:
df = df.rename(columns={'x': 'z', 'z': 'y', 'y': 'x'})
df[['x', 'y', 'z']] *= 10
df.head()

Unnamed: 0,resnum,res,atom,atnum,z,x,y
0,1,AMA,H783,1,77.08,4.36,6.99
1,1,AMA,C78,2,77.27,3.51,6.3
2,1,AMA,H781,3,77.33,3.9,5.25
3,1,AMA,H782,4,78.26,3.05,6.56
4,1,AMA,C77,5,76.17,2.47,6.41


In [3]:
write_gro(df, 'out_file.gro', box)

  0%|          | 0/195013 [00:00<?, ?it/s]

In [16]:
check_min_dist = lambda coords0, coords1: np.sum((coords0 - coords1[:, np.newaxis]) ** 2, axis=-1).min() 
check_min_dist(system[23300:][['x', 'y', 'z']].values, system[:23300][['x', 'y', 'z']].values)**0.5

0.6679384884553858