In [1]:
import pandas as pd
import numpy as np

from scipy.spatial import distance

import joblib

from reader_writer import xyz_to_df

In [4]:
df = xyz_to_df('ethanol.xyz')
r_c = joblib.load('r_c.pkl')
r_c

{'c': {'h': 1.09, 'c': 1.54, 'o': 1.42, 'n': 1.47},
 'o': {'h': 0.97, 'c': 1.42, 'n': 1.43, 'o': 1.48},
 'h': {'h': 0.74, 'c': 1.09, 'n': 1.01, 'o': 0.97},
 'n': {'n': 1.45, 'c': 1.47, 'o': 1.43, 'h': 1.01}}

In [5]:
def create_bond_matrix(df, r_c, tol=0.2):
    coords = df[['x', 'y', 'z']].values
    atoms = df['atom'].values

    dist_matrix = distance.cdist(coords, coords, 'euclidean')
    bond_matrix = np.zeros(dist_matrix.shape, dtype=bool)

    n = len(atoms)
    for i in range(n):
        for j in range(i+1, n):  # Only need to consider half the matrix
            atom_i = atoms[i].lower()
            atom_j = atoms[j].lower()

            # Ensure we have bond length data for these atoms
            if atom_i in r_c and atom_j in r_c[atom_i]:
                if abs(dist_matrix[i, j] - r_c[atom_i][atom_j]) < tol:
                    bond_matrix[i, j] = bond_matrix[j, i] = True  # Atoms are likely bonded

    df_bonds = pd.DataFrame(bond_matrix, index=atoms, columns=atoms)

    return df_bonds

create_bond_matrix(df, r_c)

Unnamed: 0,H,C,H.1,C.1,O,H.2,H.3,H.4,H.5
H,False,True,False,False,False,False,False,False,False
C,True,False,True,True,False,False,False,False,True
H,False,True,False,False,False,False,False,False,False
C,False,True,False,False,True,True,False,True,False
O,False,False,False,True,False,False,True,False,False
H,False,False,False,True,False,False,False,False,False
H,False,False,False,False,True,False,False,False,False
H,False,False,False,True,False,False,False,False,False
H,False,True,False,False,False,False,False,False,False
