# Calculating the RACs for some MOFs ...

In [7]:
from molSimplify.Informatics.MOF.MOF_descriptors import get_MOF_descriptors
import os
from glob import glob
from pathlib import Path
import pandas as pd 

The first step is to get some MOF CIFs as primitive structures. For this, we can use some of your favorites and the following utlity function

In [None]:
from pymatgen.io.cif import CifParser
def get_primitive(datapath, writepath):
    s = CifParser(datapath, occupancy_tolerance=1).get_structures()[0]
    sprim = s.get_primitive_structure()
    sprim.to("cif",writepath)

In [4]:
get_primitive(os.path.join('structures', 'UiO66.cif'), os.path.join('structures', 'UiO66_primitive.cif'))
get_primitive(os.path.join('structures', 'hkust1.cif'), os.path.join('structures', 'hkust1_primitive.cif'))

Now, we can get the RACs ..

In [6]:
featurization_list = []
for cif_file in glob(os.path.join('structures', '*_primitive.cif')):
    full_names, full_descriptors = get_MOF_descriptors(
        cif_file, # inputstructure 
        3, # scope 
        path=os.path.join('structures', 'output'), # stuff will be dumped here 
        xyzpath=os.path.join('structures', 'output', 'xyz', Path(cif_file).stem + '.xyz'))
    full_names.append('filename')
    full_descriptors.append(cif_file)
    featurization = dict(zip(full_names, full_descriptors))
    featurization_list.append(featurization)


('cell vectors: ', 'alpha, beta, gamma = 89.99975933, 120.0001 ,120.00041684')
[18.5949, 0, 0]
[-9.297584317777133, 16.103617861180716, 0]
[-9.297528106318486, -5.367930352512864, 15.18273097445874]
Making the SBU RACs worked
176 176
('cell vectors: ', 'alpha, beta, gamma = 60.0, 60.0 ,60.0')
[14.63739321, 0, 0]
[7.318696605000002, 12.67635436504185, 0]
[7.318696605000002, 4.225451455013951, 11.951381509659711]
Making the SBU RACs worked
176 176


In [13]:
df = pd.DataFrame(featurization_list)

In [14]:
df

Unnamed: 0,f-chi-0-all,f-chi-1-all,f-chi-2-all,f-chi-3-all,f-Z-0-all,f-Z-1-all,f-Z-2-all,f-Z-3-all,f-I-0-all,f-I-1-all,...,D_func-T-3-all,D_func-S-0-all,D_func-S-1-all,D_func-S-2-all,D_func-S-3-all,D_func-alpha-0-all,D_func-alpha-1-all,D_func-alpha-2-all,D_func-alpha-3-all,filename
0,127.8988,244.928,456.1952,525.632,2338.0,4480.0,4832.0,6016.0,14.0,32.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,structures/hkust1_primitive.cif
1,467.3186,860.2752,4181.3352,3624.384,12080.0,33024.0,71424.0,90624.0,50.0,144.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,structures/UiO66_primitive.cif


In [15]:
df.shape

(2, 177)

Let's get the 156 RACs reported in the [paper](https://chemrxiv.org/articles/Understanding_the_Diversity_of_the_Metal-Organic_Framework_Ecosystem/12251186). SBU Racs are redundant with mc RACs and are not included.

In [10]:
keep = [val for val in df.columns.values if ('mc' in val) or ('lc' in val) or ('f-lig' in val) or ('func') in val]
df = df[['filename']+keep]

In [11]:
df.shape

(2, 157)