# NONCOVToolbox: Step 1
## Displace Optimized Molecular Fragments with @StructureModifier

### Centroid-to-centroid displacement

Take a relaxed xyz structure with two interacting fragments and displace them relative to each other along the vector connecting the two centroids:

$$ 
p_{new} = p_{old} + \hat{d} \cdot s \cdot i
$$

where $p$ are the new and old coordinates respectively, $\hat{d}$ is the normalized displacement direction vector connecting the two centroids, $s$ is the step size and $i$ is the number of iterations you want.

### Aromatic_plane-to-centroid displacement

Take a relaxed xyz structure with two interacting fragments, compute the normal vector with respect to the aromatic ring plane of the first fragment and displace the second fragment along the direction of the normal vector:

$$ 
p_{new} = p_{old} + \hat{d} \cdot s \cdot i
$$


### Load necessary modules from the NONCOVToolbox src

In [None]:
# Get the NONCOVToolbox library and print header
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
from sklearn.cluster import KMeans
import pathlib as Path

path_noncov = os.path.abspath(os.path.join('..', 'src'))

if path_noncov not in sys.path:
    sys.path.append(path_noncov)

from noncov import NONCOVToolbox, NONCOVHeader

noncov = NONCOVToolbox()

#NONCOVHeader.print_header()

# Pre work on molecular geometries
from noncov import StructureModifier

# OrcaAnalysis module used for converting paths
from noncov import OrcaAnalysis

# Disable printing
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore printing
def enablePrint():
    sys.stdout = sys.__stdout__
    
# Set print options to suppress scientific notation
np.set_printoptions(suppress=True, precision=5)

### Get directories

In [None]:
# Get work directory and scratch folder for the output data
current_dir = os.getcwd()
print(f'Current work directory is: {current_dir}')

scratch_dir = os.path.abspath(os.path.join('..', 'scratch'))
print(f'Current scratch directory is: {scratch_dir}')
scratch_dir = OrcaAnalysis().convert_path(scratch_dir)

test_dir = os.path.abspath(os.path.join('..', 'tests'))
print(f'Current test directory is: {test_dir}')
test_dir = OrcaAnalysis().convert_path(test_dir)

#here modify so that it recursively displace all structs in folder
#keep in mind the input file needs to change as well
mol_dir = os.path.join(scratch_dir, 'StructureModifier/input_structures/df_cut_4_n1_opt.xyz')
print(f'Current molecule directory is: {mol_dir}')
mol_dir = OrcaAnalysis().convert_path(mol_dir)

In [None]:
# Users specify relative paths
start_structure = os.path.join(scratch_dir, 'StructureModifier/input_structures/df_cut_4_n1_opt.xyz')
molecule_name = os.path.splitext(os.path.basename(start_structure))[0]

centroid_out = os.path.join(scratch_dir, f'StructureModifier/centroid_output/{molecule_name}_centroid_file.xyz')
input_file = os.path.join(scratch_dir, 'StructureModifier/input_file/input_df_cut_4_n1_opt.txt')

In [None]:
# Read xyz file: this should be either a fully optimized geometry or one with relaxed H or a CREST conformer
coordinates, atom_identities = StructureModifier().read_atomic_coord(start_structure)

In [None]:
# Assign coordinates to molecular fragments
coords1, coords2, ring_coords = StructureModifier().assign_molecule_fragments(coordinates, input_file)

# Concatenate coordinates for k-means clustering
all_coords = np.concatenate((coords1, coords2), axis=0)

# Count how many fragments you have defined in the input file, important for accurate K-means clustering
n_fragments = StructureModifier().count_fragments(input_file)
print(f"Number of '$fragment': {n_fragments}")

In [None]:
# Read displacement step size from input file
displacement_step = None
with open(input_file, 'r') as f:
    lines = f.readlines()
    read_displacement = False
    for line in lines:
        if read_displacement:
            displacement_values = line.strip().split()
            if displacement_values:
                displacement_step = float(displacement_values[0])
                break
        elif line.strip() == "$displacement":
            read_displacement = True

if displacement_step is None:
    print('ERROR: displacement step size not found in input file, please specify it! Syntax => $displacement + number')
print(f'Displacement step is: {displacement_step} Angstroem') 

In [None]:
# Read dissociation limit value from input file
diss_lim = None
with open(input_file, 'r') as f:
    lines = f.readlines()
    read_diss_lim = False
    for line in lines:
        if read_diss_lim:
            diss_lim_values = line.strip().split()
            if diss_lim_values:
                diss_lim = float(diss_lim_values[0])
                break
        elif line.strip() == "$diss_lim":
            read_diss_limt = True

if diss_lim is None:
    print('ERROR: Dissociation limit not found in input file, please specify it! Syntax => $diss_lim + number')
print(f'Dissociation limit is: {diss_lim} steps') 

### Here is the section for the CentroidtoCentroid displacement

In [None]:
# Perform k-means clustering to compute centroids
kmeans = KMeans(n_clusters=n_fragments) # K-means clusters = number of centroids = number of fragments
kmeans.fit(all_coords)
centroids = kmeans.cluster_centers_

# Compute centroids for each fragment
fragment_centroids = StructureModifier().calculate_centroids([coords1, coords2])

# Write centroid coordinates to file
StructureModifier().write_centroids(centroid_out, fragment_centroids)
#print(f'Centroid coordinates: {fragment_centroids}')

In [None]:
# Calculate displacement direction (line connecting centroids)
displacement_direction = fragment_centroids[0,:] - fragment_centroids[1,:]
displacement_direction /= np.linalg.norm(displacement_direction)
print(f'Displacement direction:{displacement_direction}')

In [None]:
# Displace the second fragment iteratively and save each structure
displaced_fragment_coords = coords2.copy()  # Make a copy of the original coordinates of the fragment that is displaced

# Initialize the coordinates for the fixed fragment (e.g., coords1)
coords_fixed = coords1.copy() # make a copy of the fixed fragment coordinates to append to the displaced ones

all_displaced_fragment_coords = [displaced_fragment_coords]  # List to store all displaced structures

# Combine displaced coordinates with original ones
all_combined_coords = [np.concatenate((coords_fixed, displaced_fragment_coords), axis=0)]  # List to store all combined structures

fragment_centroids = [fragment_centroids[0]]  # List to store all centroids

In [None]:
# Displacement in action

for i in range(0, diss_lim):  # Iterate x times (adjust the number as needed) 

    displacement_vector = [] 

    # Compute new set of coordinates for displaced fragments, change $displacement value in input file to tune the displacement
    displaced_fragment_coords = StructureModifier().displace_fragment(coords2, displacement_direction, displacement_step, i)

    combined_coords = np.concatenate((coords_fixed, displaced_fragment_coords), axis=0)
    all_combined_coords.append(combined_coords)

    # Update centroids for the displaced structure
    fragment_centroid = StructureModifier().calculate_centroids([displaced_fragment_coords])
    fragment_centroids.append(fragment_centroid[0])
    
    # Write displaced structure to file
    output_file = os.path.join(scratch_dir, f'StructureModifier/CentroidtoCentroid/{molecule_name}_disp_struct_{i}.xyz')
    StructureModifier().write_displaced_xyz_file(output_file, coords_fixed, displaced_fragment_coords, atom_identities)

    all_displaced_fragment_coords.append(displaced_fragment_coords)

    # Compute distance between the fixed fragment centroid and all the atoms from the displaced fragment
    centroid_to_displaced_distance = StructureModifier().compute_distance_from_centroid(displaced_fragment_coords, centroids)

    # Write distances to file - needed for DFT calculations outputs
    distance_output_file = os.path.join(scratch_dir, f'StructureModifier/distance_files/{molecule_name}_distances_{i}.xyz')
    StructureModifier().write_distances_file(distance_output_file, displaced_fragment_coords, centroid_to_displaced_distance, atom_identities, displacement_step)

In [None]:
# Plot initial topology for molecular fragments and centroids
fig = StructureModifier().plot_starting_molecular_fragments(coords1, coords2, centroids)

# Generate colors for the plots based on displacement iteration
num_iterations = len(all_displaced_fragment_coords)
colors = plt.cm.viridis(np.linspace(0.2, 1.0, num_iterations))

# Plot displaced molecular fragments and centroids
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Plot original fragments and centroids
ax.scatter(coords1[:, 0], coords1[:, 1], coords1[:, 2], color=colors[0], label='Molecule 1 (Original)')
ax.scatter(coords2[:, 0], coords2[:, 1], coords2[:, 2], color=colors[0], label='Molecule 2 (Original)')
#ax.scatter(centroids[0, 0], centroids[:, 1], centroids[:, 2], color=colors[0], marker='x', s=100, label='Centroids (Original)')

# Plot displaced fragments and centroids
for i, displaced_coords in enumerate(all_displaced_fragment_coords[1:], start=1):
    color = colors[i]
    label = f'Iteration {i}'
    ax.scatter(displaced_coords[:, 0], displaced_coords[:, 1], displaced_coords[:, 2], color=color)
    #ax.scatter(fragment_centroids[i][0], fragment_centroids[i][1], fragment_centroids[i][2], color=color)

    #ax.scatter(displaced_coords[:, 0], displaced_coords[:, 1], displaced_coords[:, 2], color=color, label=label)
    #ax.scatter(fragment_centroids[i][0], fragment_centroids[i][1], fragment_centroids[i][2], color=color, marker='x', s=100, label=f'Centroids ({label})')

ax.legend()
plt.show()

### Here is the section for the NormaltoPlane displacement

#### Example of NormaltoPlane pipeline

In [None]:
# Function to calculate the normal vector using the cross product of vectors with the same origin formed by 3 points
def get_norm_arom_plane(ring_coords, moving_frag_centroid, tolerance=1e-1):
    
    if len(ring_coords) < 3:
        raise ValueError("At least 3 points are required to define a plane.")
    
    # Calculate the centroid of the ring
    ring_centroid = calculate_centroid(ring_coords)
    
    # Select any 3 non-collinear points from the ring to calculate normal vector
    vec1 = ring_coords[1] - ring_coords[0]
    vec2 = ring_coords[3] - ring_coords[0]
    
    # Compute the normal using the cross product
    normal_dir = np.cross(vec1, vec2)
    
    # Normalize the normal vector
    normal_dir /= np.linalg.norm(normal_dir)
    
    # Vector from ring centroid to moving fragment centroid
    vector_to_moving_frag = moving_frag_centroid - ring_centroid
    
    # Distance between the moving fragment centroid and the center of the aromatic ring
    distance_centroid_aromatics = np.linalg.norm(moving_frag_centroid - ring_centroid)
    distance_centroid_aromatics = distance_centroid_aromatics.round(2)
        
    # Check the direction of the normal vector
    if np.dot(normal_dir, vector_to_moving_frag) < 0:
        normal_dir = -normal_dir
    
    return normal_dir, ring_centroid

# Function to calculate the centroid (middle point) of the plane formed by points
def calculate_centroid(coords):
    
    return np.mean(coords, axis=0)

# Function to displace all fragment coordinates along a normal direction
def displace_fragment_along_normal(fragment_coords, normal_dir, displacement_step, diss_lim):
    
    displaced_fragments = []
    
    for i in range(0, diss_lim):
        
        # Displace all fragment 2 atoms by the same step
        displaced_fragment = fragment_coords + normal_dir * displacement_step * (i + 1)
        displaced_fragments.append(displaced_fragment)
            
    return np.array(displaced_fragments)

# Function to save coordinates to an XYZ file in the specified directory
def save_xyz(filename, coords1, displaced_coords2, step, save_dir):
    
    # Create the directory if it doesn't exist
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Get the number of atoms
    num_atoms = len(coords1) + len(displaced_coords2)
    
    # Complete file path
    file_path = os.path.join(save_dir, filename)
    
    # Open the file for writing
    with open(file_path, 'w') as f:
        # Write the number of atoms
        f.write(f"{num_atoms}\n")
        
        # Write the comment line (could be step number or description)
        f.write(f"Step {step}: Combined Fragment 1 and Displaced Fragment 2 Coordinates\n")
        
        # Write the coordinates of fragment 1 (assumed to be carbon atoms)
        for coord in coords1:
            f.write(f"C {coord[0]:.5f} {coord[1]:.5f} {coord[2]:.5f}\n")
        
        # Write the displaced coordinates of fragment 2 (also assumed to be carbon atoms, can change if necessary)
        for coord in displaced_coords2:
            f.write(f"C {coord[0]:.5f} {coord[1]:.5f} {coord[2]:.5f}\n")

# Main function to process the fragments
def process_fragments(coords1, coords2, ring_coords, save_dir):
    
    # Calculate the centroid of fragment 2
    moving_frag_centroid = calculate_centroid(coords2)
        
    try:
        # Calculate the normal vector and centroid of the plane for fragment 1 (ring)
        normal_dir, ring_centroid = get_norm_arom_plane(ring_coords, moving_frag_centroid)
               
        # Output the normal vector direction
        print(f"Normal Vector: {normal_dir}")
        print(f"Centroid of the Plane: {ring_centroid}")
        
        # Displace the fragment 2 coordinates along the normal direction
        displaced_fragments = displace_fragment_along_normal(coords2, normal_dir, displacement_step, diss_lim)
        
        # Save the coordinates for each displacement step
        for i, displaced_fragment in enumerate(displaced_fragments):
            # Combine fragment 1 and the displaced fragment 2
            final_combined_coords = np.vstack((coords1, displaced_fragment))
            
            # Save to an XYZ file, naming the file based on the iteration (step number)
            filename = f"fragment_combined_step_{i+1}.xyz"
            save_xyz(filename, coords1, displaced_fragment, i+1, save_dir)
            
            # Write displaced structure to file
            output_file = os.path.join(scratch_dir, f'StructureModifier/NormaltoPlane/{molecule_name}_disp_struct_{i}.xyz')
            StructureModifier().write_displaced_xyz_file(output_file, coords1, displaced_fragment, atom_identities)
        
        # Return the final displaced coordinates of fragment 2
        final_fragment_coords = displaced_fragments[-1]
        return final_fragment_coords

    except ValueError as e:
        print(e)


#### Example of NormaltoPlane from src code

In [None]:
diss_lim = 20

output_dir = os.path.join(scratch_dir, f'StructureModifier/NormaltoPlane')

StructureModifier().NormaltoPlane(coords1, coords2, ring_coords, output_dir, molecule_name, diss_lim, displacement_step, atom_identities)

#### NormaltoPlane runner

In [25]:
# Get the NONCOVToolbox library and print header
import sys
import os
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
from sklearn.cluster import KMeans
import pathlib as Path

path_noncov = os.path.abspath(os.path.join('..', 'src'))

if path_noncov not in sys.path:
    sys.path.append(path_noncov)

from noncov import NONCOVToolbox, NONCOVHeader

noncov = NONCOVToolbox()

#NONCOVHeader.print_header()

# Pre work on molecular geometries
from noncov import StructureModifier

# OrcaAnalysis module used for converting paths
from noncov import OrcaAnalysis

# Disable printing
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore printing
def enablePrint():
    sys.stdout = sys.__stdout__
    
# Set print options to suppress scientific notation
np.set_printoptions(suppress=True, precision=5)

# Get work directory and scratch folder for the output data
current_dir = os.getcwd()
print(f'Current work directory is: {current_dir}')

scratch_dir = os.path.abspath(os.path.join('..', 'scratch'))
print(f'Current scratch directory is: {scratch_dir}')
scratch_dir = OrcaAnalysis().convert_path(scratch_dir)

test_dir = os.path.abspath(os.path.join('..', 'tests'))
print(f'Current test directory is: {test_dir}')
test_dir = OrcaAnalysis().convert_path(test_dir)

#here modify so that it recursively displace all structs in folder
#keep in mind the input file needs to change as well
mol_dir = input("Enter the path for structure: ")
#mol_dir = os.path.join(scratch_dir, 'StructureModifier/input_structures/df_cut_4_n1_opt.xyz')
print(f'Current molecule directory is: {mol_dir}')
mol_dir = OrcaAnalysis().convert_path(mol_dir)

# Users specify relative paths
#start_structure = os.path.join(scratch_dir, 'StructureModifier/input_structures/df_cut_4_n1_opt.xyz')
start_structure = mol_dir
molecule_name = os.path.splitext(os.path.basename(start_structure))[0]

centroid_out = os.path.join(scratch_dir, f'StructureModifier/centroid_output/{molecule_name}_centroid_file.xyz')
input_file = input("Enter the path to the input file: ")
input_file = OrcaAnalysis().convert_path(input_file)

#input_file = os.path.join(scratch_dir, 'StructureModifier/input_file/input_df_cut_4_n1_opt.txt')

# Read xyz file: this should be either a fully optimized geometry or one with relaxed H or a CREST conformer
coordinates, atom_identities = StructureModifier().read_atomic_coord(start_structure)

# Assign coordinates to molecular fragments
coords1, coords2, ring_coords = StructureModifier().assign_molecule_fragments(coordinates, input_file)

# Concatenate coordinates for k-means clustering
all_coords = np.concatenate((coords1, coords2), axis=0)

# Count how many fragments you have defined in the input file, important for accurate K-means clustering
n_fragments = StructureModifier().count_fragments(input_file)
print(f"Number of '$fragment': {n_fragments}")

# Read displacement step size from input file
displacement_step = None
with open(input_file, 'r') as f:
    lines = f.readlines()
    read_displacement = False
    for line in lines:
        if read_displacement:
            displacement_values = line.strip().split()
            if displacement_values:
                displacement_step = float(displacement_values[0])
                break
        elif line.strip() == "$displacement":
            read_displacement = True

if displacement_step is None:
    print('ERROR: displacement step size not found in input file, please specify it! Syntax => $displacement + number')
print(f'Displacement step is: {displacement_step} Angstroem') 

diss_lim = 20

output_dir = os.path.join(scratch_dir, f'StructureModifier/NormaltoPlane')

StructureModifier().NormaltoPlane(coords1, coords2, ring_coords, output_dir, molecule_name, diss_lim, displacement_step, atom_identities)

Current work directory is: C:\Users\ettor\Desktop\NONCOV\results
Current scratch directory is: C:\Users\ettor\Desktop\NONCOV\scratch
Normalized path using os.path: C:/Users/ettor/Desktop/NONCOV/scratch
Current test directory is: C:\Users\ettor\Desktop\NONCOV\tests
Normalized path using os.path: C:/Users/ettor/Desktop/NONCOV/tests
Enter the path for structure: "C:\Users\ettor\Desktop\NONCOV\scratch\StructureModifier\input_structures\ry_cut_5_p1_opt.xyz"
Current molecule directory is: "C:\Users\ettor\Desktop\NONCOV\scratch\StructureModifier\input_structures\ry_cut_5_p1_opt.xyz"
Normalized path using os.path: C:/Users/ettor/Desktop/NONCOV/scratch/StructureModifier/input_structures/ry_cut_5_p1_opt.xyz
Enter the path to the input file: "C:\Users\ettor\Desktop\NONCOV\scratch\StructureModifier\input_file\input_ry_cut_5_p1_opt.txt"
Normalized path using os.path: C:/Users/ettor/Desktop/NONCOV/scratch/StructureModifier/input_file/input_ry_cut_5_p1_opt.txt
Number of '$fragment': 2
Displacement st

array([[-0.12043, 11.03436,  4.50627],
       [-0.67448, 11.44003,  3.65147],
       [-0.81917, 10.66064,  5.26156],
       [ 0.81084,  9.98249,  4.10051],
       [ 1.78648, 10.22501,  4.04541],
       [ 0.46193,  8.77023,  3.64828],
       [ 1.41608,  7.89445,  3.31763],
       [ 1.16916,  6.96929,  2.976  ],
       [-0.81552,  8.40308,  3.50929],
       [-1.5566 ,  9.08167,  3.55965],
       [-1.03696,  7.47446,  3.17182],
       [ 0.45572, 11.84303,  4.95563],
       [ 2.37033,  8.05328,  3.59434]])