In [1]:
from omsdetector import MofCollection 
import warnings
import os
from pymatgen.core import Structure
from pymatgen.io.vasp import Poscar
from pymatgen.io.cif import CifWriter




## Creating a MOF Collection
### Loading from path names.

We can build a **MofCollection** from a list of paths to MOF CIF files. We can also specify the **analysis_folder** where all the results of the analyses perfromed on this MOF collection will be stored. The default value is analysis_folder = 'analysis_folder'.

In [2]:
# POSCAR to cif
path2test='/Users/gjonesresearch/FeO_Local/Zeolite/test_zeolites'
structure = Structure.from_file(os.path.join(path2test,'POSCAR'))
structure.to(filename=os.path.join(path2test,'zsmfixed.cif'))

In [6]:
path_list = [os.path.join(path2test,'zsmfixed.cif')]

# path_list = ['cif_files_example/HKUST-1_ASR_FIQCEN_clean.cif', 
             # 'cif_files_example/MgMOF-74_ASR_RAVVUH_clean.cif', 
             # 'cif_files_example/MOF-5_ASR_MIBQAR_clean.cif']

a_mof_collection = MofCollection(path_list = path_list, analysis_folder="analysis_folder_example")

print("There are {} MOFs in the collection.".format(len(a_mof_collection)))

Loading CIF files...
100.0 %
All Done.
There are 1 MOFs in the collection.


In [7]:
print(a_mof_collection)

--------------------------------------------------
This collection holds information for 1 MOFs.
Analysis folder is: /Users/gjonesresearch/open_metal_detector/examples/analysis_folder_example

List of cif files in collection:

/Users/gjonesresearch/FeO_Local/Zeolite/test_zeolites/zsmfixed.cif
--------------------------------------------------


### Loading CIFs from folder.

It is often conveniet to load all the CIF files located in a specific folder. 

We can do this by using the **from_folder()** method to create the MofCollection object.

In [9]:
a_mof_collection = MofCollection.from_folder(collection_folder=path2test, analysis_folder="analysis_folder_example")

print("There are {} MOFs in the collection.".format(len(a_mof_collection)))

Loading CIF files...
100.0 %
All Done.
There are 1 MOFs in the collection.


### Loading selected CIFs from folder.

We might have a large number of CIFs in a folder but are only interested in examing a small subset of them. To accomplish this we can create a MofCollection from a folder as before but this time provide the additional argument __name_list__ which is a list of the names of MOFs we are interested in. 

For example:

In [10]:
a_mof_collection_name_list = MofCollection.from_folder(collection_folder=path2test, 
                                                       analysis_folder="analysis_folder_example",
                                                       name_list=["zsmfixed.cif"])

print("There are {} MOFs in the collection.".format(len(a_mof_collection_name_list)))

--------------------------------------------------
Using only MOFs in the name list.
--------------------------------------------------
Loading CIF files...
100.0 %
All Done.
There are 1 MOFs in the collection.


## Filtering a Collection
Before analyzing the MOFs in the collection we created we might want to filter the collection and keep only MOFs with certain characteristics, for example MOFs that contain certain metal atoms.

We can do this using the **filter_collection()** method on the collection object. The CIF files that match the filter will be included to this collection. 

### Keep CIF files in original location

By default the cif files will be still pointing to the original location. This might be useful if we want to analyze a subset of MOFs without copying over the files.

In [11]:
Fe_coll = a_mof_collection.filter_collection(using_filter={"metal_species":["Fe"]})

--------------------------------------------------

Validating property : "metal_species"
Validated 100 %                                                                                                      
--------------------------------------------------
Filtering collection.

1 MOFs were matched using the provided filter.
Returning a new collection using the matched MOFs.
Loading CIF files...
100.0 %
All Done.
--------------------------------------------------


### Define a new CIF folder when filtering
If the keyword **new_collection_folder** is set when filtering a collection the CIF files will be coppied to that folder and paths for the CIF files of the collection will be updated to point to the new location.

In [12]:
Fe_coll = a_mof_collection.filter_collection(using_filter={"metal_species":["Fe"]},new_collection_folder="Fe_mofs")

--------------------------------------------------

Validating property : "metal_species"
Validated 100 %                                                                                                      
--------------------------------------------------
Filtering collection.

1 MOFs were matched using the provided filter.
Returning a new collection using the matched MOFs.
Loading CIF files...
100.0 %
All Done.
--------------------------------------------------
--------------------------------------------------
The cif files for this collection will be copied to the specified folder:
"/Users/gjonesresearch/open_metal_detector/examples/Fe_mofs"
The cif paths will be updated.
--------------------------------------------------


### Copy filtered collection later

The third option is to create a filtered collection and explicitly copy the files at a later stage.

In [13]:
Fe_coll = a_mof_collection.filter_collection(using_filter={"metal_species":["Fe"], "non_metal_species":["O"]})
print(Fe_coll)

--------------------------------------------------

Validating properties : "metal_species, non_metal_species"
Validated 100 %                                                                                                      
--------------------------------------------------
Filtering collection.

1 MOFs were matched using the provided filter.
Returning a new collection using the matched MOFs.
Loading CIF files...
100.0 %
All Done.
--------------------------------------------------
--------------------------------------------------
This collection holds information for 1 MOFs.
Analysis folder is: /Users/gjonesresearch/open_metal_detector/examples/analysis_folder_example

List of cif files in collection:

/Users/gjonesresearch/FeO_Local/Zeolite/test_zeolites/zsmfixed.cif
--------------------------------------------------


In [14]:
Fe_coll.copy_cifs(target_folder="Fe_mofs")
print(Fe_coll)

--------------------------------------------------
The cif files for this collection will be copied to the specified folder:
"/Users/gjonesresearch/open_metal_detector/examples/Fe_mofs"
The cif paths will be updated.
--------------------------------------------------
--------------------------------------------------
This collection holds information for 1 MOFs.
Analysis folder is: /Users/gjonesresearch/open_metal_detector/examples/analysis_folder_example

List of cif files in collection:

/Users/gjonesresearch/open_metal_detector/examples/Fe_mofs/zsmfixed.cif
--------------------------------------------------


The same operations can be performed for any results that might be present using the **new_analysis_folder** keyword  or the **copy_results()** function.

## Analyze a MOF Collection

Once we have a MOF collection we can run the **analyse_mofs()** method on it, which will detect all the open metal sites (OMS) in the collection. 

In [15]:
a_mof_collection.analyse_mofs()

--------------------------------------------------
Running OMS Analysis...
--------------------------------------------------
1 batch requested. 
Overwrite is set to False. 
Storing results in analysis_folder_example/oms_results. 
--------------------------------------------------

Validating property : "load_balancing_index"
Validated 100 %                                                                                                      
--------------------------------------------------
Checking if results for any of the MOFs exist...
Will not skip any MOFs
--------------------------------------------------
Batch 1 has 1 MOFs
--------------------------------------------------
Batch 1 100.00 % : Analysing zsmfixed                                                                                                    



Batch 1 Finished.                                                                                                                        
Validating property : "has_oms"
Validated 100 %                                                                                                      

Analysis Finished. Time required:17.48 sec
--------------------------------------------------


### How to overwrite results

If we try to re-run the analysis code it will by default only analyze MOFs for which no results can be found. This makes it easy to resume a calculation that ended prematurely.

In [16]:
a_mof_collection.analyse_mofs()

--------------------------------------------------
Running OMS Analysis...
--------------------------------------------------
1 batch requested. 
Overwrite is set to False. 
Storing results in analysis_folder_example/oms_results. 
--------------------------------------------------

Validating property : "load_balancing_index"
Validated 100 %                                                                                                      
--------------------------------------------------
Checking if results for any of the MOFs exist...
Skipping 1 MOFs because results were found. 
--------------------------------------------------
Batch 1 has 0 MOFs
--------------------------------------------------
Batch 1 Finished.                                                                                                    



Batch 1 Finished.                                                                                                    
Validating property : "has_oms"
Validated 100 %                                                                                                      

Analysis Finished. Time required:3.67 sec
--------------------------------------------------


To control this behavior and force all the MOFs for which results exist to be reanalyzed we can set the keyword **overwrite** to True.

In [17]:
a_mof_collection.analyse_mofs(overwrite=True)

--------------------------------------------------
Running OMS Analysis...
--------------------------------------------------
1 batch requested. 
Overwrite is set to True. 
Storing results in analysis_folder_example/oms_results. 
--------------------------------------------------

Validating property : "load_balancing_index"
Validated 100 %                                                                                                      
--------------------------------------------------
--------------------------------------------------
Batch 1 has 1 MOFs
--------------------------------------------------
Batch 1 100.00 % : Analysing zsmfixed                                                                                                    



Batch 1 Finished.                                                                                                                        
Validating property : "has_oms"
Validated 100 %                                                                                                      

Analysis Finished. Time required:17.22 sec
--------------------------------------------------


### Run analysis in parallel

Since every MOF can be analyzed separately we can parallelize the analysis by splitting the structures in batches and running each batch as a separate process. The number of batches is specified using the **num_batches** keyword for which the default value is 1. The structures are first ordered based on the square of the number of atoms and then split into batches. This ensures that all the batches will run roughly in the same time, which results in a time efficient completion of the analysis.

In [23]:
# a_mof_collection.analyse_mofs(num_batches=3, overwrite=True)

## Summarizing Results

### Summary for each metal type

We can get a table that summarizes the findings for each metal type using the __summarize_results()__ function.

In [19]:
a_mof_collection.summarize_results()


Validating property : "has_oms"
Validated 100 %                                                                                                      
--------------------------------------------------
Number of total MOFs: 1
Number of total MOFs with open metal sites: 0
Number of total unique sites: 40
Number of total unique open metal sites: 0
--------------------------------------------------
Summary Table

       MOFs  MOFs_with_OMS  Metal Sites  OMS MOFs_with_OMS(%) OMS (%)
metal                                                                
Al        1              0            2    0           0.00 %  0.00 %
Fe        1              0            2    0           0.00 %  0.00 %
Si        1              0           36    0           0.00 %  0.00 %


### Summary for each MOF

We can obtain a DataFrame for the OMS for each MOF using the __mof_oms_df__ variable of the __MofCollection__ object.

In [20]:
mofs_df = a_mof_collection.mof_oms_df
print(mofs_df)


Validating property : "has_oms"
Validated 100 %                                                                                                      
         Metal Types Has OMS OMS Types
zsmfixed    Al,Fe,Si      No       N/A


We can then use standard pandas operations to select MOFs, for example, with certain metals or all MOFs that have OMS.

In [21]:
print("MOFs that contain Fe")
print(mofs_df[mofs_df["Metal Types"].str.contains("Fe")])
print("\nMOFs that have OMS")
print(mofs_df[mofs_df["Has OMS"] == "Yes"])

MOFs that contain Fe
         Metal Types Has OMS OMS Types
zsmfixed    Al,Fe,Si      No       N/A

MOFs that have OMS
Empty DataFrame
Columns: [Metal Types, Has OMS, OMS Types]
Index: []


### Filter collection using results

Finally, we can use the filter function to isolate, for example, MOFs that have Co metal sites and that contain open metal sites. We can also copy the cif files and result files for this subset to new locations by providing values for the **new_collection_folder** and **new_analysis_folder** keywords.

In [22]:
Fe_oms = a_mof_collection.filter_collection(using_filter={"metal_species":["Fe"], "has_oms":True},
                                            new_collection_folder='Fe_oms',
                                            new_analysis_folder='Fe_oms_analysis')

print(Fe_oms)

--------------------------------------------------

Validating properties : "metal_species, has_oms"
Validated 100 %                                                                                                      
--------------------------------------------------
Filtering collection.

No MOFs were matched using the provided filter.
No collection returned.
None
