In [41]:
import pandas as pd
import numpy as np

from scipy.spatial import distance
from scipy.optimize import minimize

import joblib

from reader_writer import xyz_to_df

In [36]:
df = xyz_to_df('ethanol.xyz')
r_c = joblib.load('r_c.pkl')
r_c

{'c': {'h': 1.09, 'c': 1.54, 'o': 1.42, 'n': 1.47},
 'o': {'h': 0.97, 'c': 1.42, 'n': 1.43, 'o': 1.48},
 'h': {'h': 0.74, 'c': 1.09, 'n': 1.01, 'o': 0.97},
 'n': {'n': 1.45, 'c': 1.47, 'o': 1.43, 'h': 1.01}}

In [52]:
class Molecule:
    def __init__(self, df, r_c, k_E_r=100):
        self.df = df
        self.r_c = r_c
        self.create_bond_matrix()
        self.create_distance_mask()
        self.k_E_r = k_E_r

    def create_bond_matrix(self, tol=0.2):
        coords = self.df[['x', 'y', 'z']].values
        atoms = self.df['atom'].values

        dist_matrix = distance.cdist(coords, coords, 'euclidean')
        bond_matrix = np.zeros(dist_matrix.shape, dtype=bool)

        n = len(atoms)
        for i in range(n):
            for j in range(i+1, n):  # Only need to consider half the matrix
                atom_i = atoms[i].lower()
                atom_j = atoms[j].lower()

                # Ensure we have bond length data for these atoms
                if atom_i in self.r_c and atom_j in self.r_c[atom_i]:
                    if abs(dist_matrix[i, j] - self.r_c[atom_i][atom_j]) < tol:
                        bond_matrix[i, j] = bond_matrix[j, i] = True  # Atoms are likely bonded
        self.df_bonds = pd.DataFrame(bond_matrix, index=atoms, columns=atoms)

    def create_distance_mask(self):
        atoms = self.df['atom'].values
        n = len(atoms)

        dist_mask = np.zeros((n, n), dtype=float)

        for i in range(n):
            for j in range(i+1, n):  # Only need to consider half the matrix
                atom_i = atoms[i].lower()
                atom_j = atoms[j].lower()

                # Ensure we have bond length data for these atoms
                if atom_i in self.r_c and atom_j in self.r_c[atom_i]:
                    dist_mask[i, j] = dist_mask[j, i] = self.r_c[atom_i][atom_j]

        self.df_dist_mask = pd.DataFrame(dist_mask, index=atoms, columns=atoms)

    def energy_distance(self, coords):
        # coords = self.df[['x', 'y', 'z']].values
        coords = coords.reshape(df[['x', 'y', 'z']].shape)
        dist_matrix = distance.cdist(coords, coords, 'euclidean')
        return np.sum(self.k_E_r * (dist_matrix - self.df_dist_mask.values) ** 2 \
                      * self.df_bonds.values) / 2


ethanol = Molecule(df, r_c)
ethanol.df_bonds
ethanol.energy_distance(ethanol.df[['x', 'y', 'z']].values.flatten())

0.062286068720076485

In [53]:
def find_minima(f, initial_guess):
    result = minimize(f, initial_guess, method='trust-constr')
    if result.success:
        return result.x, result.fun
    else:
        raise Exception("The optimization process was not successful.")
    
find_minima(ethanol.energy_distance, ethanol.df[['x', 'y', 'z']].values.flatten())

(array([-2.07873172,  0.4328589 ,  0.07216627, -1.2203252 , -0.23396007,
        -0.00901677, -1.26462523, -0.95287648,  0.80908878,  0.09534658,
         0.56396543,  0.05361414,  1.22833655, -0.27287322, -0.12648248,
         0.14876096,  1.11753864,  0.99105939,  1.2473821 , -0.8990527 ,
         0.61408252,  0.13183892,  1.28388441, -0.76399516, -1.27362396,
        -0.7747082 , -0.95392455]),
 2.1578724683049923e-14)