In [2]:
#import os
import numpy as np
import pandas as pd
from ccdc.io import CrystalReader
from ccdc.crystal import PackingSimilarity
import time

def calc_RMSD(directory,**kwargs):
    #CCDC API Packing Similarity tool
    print('RMSD Calculation Started')
    PS = PackingSimilarity()
    print('Packing Similarity Object Created')
    #defining parameters from input (or use default values)
    PSS = kwargs.get('PSS',20)
    DT = kwargs.get('DT',0.2)
    AT = kwargs.get('AT',20)
    PS.settings.packing_shell_size = PSS
    PS.settings.distance_tolerance = DT
    PS.settings.angle_tolerance = AT
    i=0
    print('Parameters Updated:')
    print('Packing Shell Size: ',PSS)
    print('Distance Tolerance: ',DT)
    print('Angle Tolerance: ',AT)
    #get a list of all cifs in the directory
    cifs = []
    for filename in os.listdir(directory):
        cifs.append(filename)
    #create output matrices for RMSD and MM. their size depends on the number of CIFS
    rmsd_matrix = np.empty((len(cifs),len(cifs)))
    rmsd_matrix[:] = np.nan
    matchedMolecules_matrix = np.empty((len(cifs),len(cifs)))
    matchedMolecules_matrix[:] = np.nan
    #CIFs list will be altered. save CIFs to column list to be used in ouput later
    cols = cifs
    #begin iterative process & start timer
    print('Starting Comparisons')
    starttime = time.time()
    print('Start Time',starttime)
    for i in range(len(cifs)):
        '''
        Each CIF file is compared to all other CIF files
        First crystal in each CIF file is used
        This is done by creating two crystal reader objects, and
        comparing them with the packing shell comparison method.
        The RMSD and MM from the comparison is saved in the ouput
        matrices
        the CIF is then deleted from the CIFs list so that no comparison
        is repeated.
        '''
        outter_cif = cifs[i]
        crystal_reader = CrystalReader(directory+'\\'+outter_cif)
        outter_crystal = crystal_reader[0]
        for j in range(len(cifs)):
            inner_cif = cifs[j]
            if inner_cif != outter_cif and inner_cif != "":
                crystal_reader = CrystalReader(directory+'\\'+inner_cif)
                inner_crystal = crystal_reader[0]
                comp = PS.compare(outter_crystal,inner_crystal)#compares the two crystal objects
                rmsd = comp.rmsd
                matched_molecules = comp.nmatched_molecules
                rmsd_matrix[i][j] = rmsd
                matchedMolecules_matrix[i][j] = matched_molecules
            elif inner_cif == outter_cif: #if the two CIFs are the same, skip the comparison step
                rmsd_matrix[i][j] = 0
                matchedMolecules_matrix[i][j] = PSS#default MM value
        cifs[i] = ""
        i+=1
    endtime = time.time()
    #Print outputs
    print('End Time: ',endtime)
    print('Taken: ',endtime - starttime,'s')
    rmsd_df = pd.DataFrame(rmsd_matrix)
    matchedMolecule_df = pd.DataFrame(matchedMolecules_matrix)
    cols=[]#bug where cols values don't persist after loop
    for filename in os.listdir(directory):
        cols.append(filename)
    rmsd_df.index = cols
    rmsd_df.columns = cols
    matchedMolecule_df.index = cols
    matchedMolecule_df.columns = cols
    rmsd_df.to_csv(directory+r'\00_output_RMSD.csv')
    matchedMolecule_df.to_csv(directory+r'\00_output_MM.csv')
    print('Saved to File')
    print('==== COMPLETE ====')


In [3]:
calc_RMSD(r'Masters\Demonstration Crystals',PSS=15,DT=0.2,AT=20)

RMSD Calculation Started
Packing Similarity Object Created
Parameters Updated:
Packing Shell Size:  15
Distance Tolerance:  0.2
Angle Tolerance:  20
Starting Comparisons
Start Time 1631894246.3648229
End Time:  1631894489.2821727
Taken:  242.91734981536865 s
Saved to File
==== COMPLETE ====
