In [14]:
import pathlib
import sys
import os

import copy
import itertools
import random
import collections
import pickle
from collections import defaultdict
from pathlib import Path
from itertools import chain

import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import ase
import ase.io
import ase.data
import ase.utils
import ase.visualize
import ase.neighborlist
try:
    from ase.utils import natural_cutoffs
except Exception as e:
    from ase.neighborlist import natural_cutoffs

import pymatgen as mg

import pormake
from pormake import *

import rodmof
from rodmof import *
from rodmof.rod_utils import *
pormake.log.disable_print()
pormake.log.disable_file_print()



In [15]:
def pick(list_like):
    return random.sample(list_like, 1)[0]

def count_normal_atoms(bb):
    if bb is None:
        return 0
    else:
        return np.sum(bb.atoms.get_chemical_symbols() != np.array("X"))

def calculate_n_atoms_of_mof(_topology, _node_bbs, _edge_bbs):
    nt_counts = {}
    for nt in _topology.unique_node_types:
        n_nt = np.sum(_topology.node_types == nt)
        nt_counts[nt] = n_nt

    et_counts = {}
    for et in _topology.unique_edge_types:
        n_et = np.sum(np.all(_topology.edge_types == et[np.newaxis, :], axis=1))
        et_counts[tuple(et)] = n_et

    counts = 0
    for nt, bb in _node_bbs.items():
        counts += nt_counts[nt] * count_normal_atoms(bb)

    for et, bb in _edge_bbs.items():
        counts += et_counts[et] * count_normal_atoms(bb)
        
    return counts

In [16]:
class RodTopology(Topology):
    def add_rod_info(self, indices):
        self.rod_edge_indices = indices

    def get_rod_permutation_info(self):
        """
        {n : [T,F,T,F]}     n -> slot, 
                            True -> Rod,
                            False-> Edge,
        """
        topo_permutations = dict()
        for i in self.node_indices:
            rod_check = [k in self.rod_edge_indices for k in self.get_neighbor_indices(i)]
            topo_permutations[i] = rod_check
        return topo_permutations


class RodBuildingBlock(BuildingBlock):
    def add_rod_info(self, indices):
        self.rod_indices = indices

    def get_permutation_for_slot(self, topo_info):
        """
        topo_info = list
        return [0, .., .., n]
        """
        permutation = [None] * len(topo_info)
        rod_count = sum(topo_info)
        
        bb_rod_indices = []
        bb_edge_indices = []
        for i, k in enumerate(self.connection_point_indices):
            if k in self.rod_indices:
                bb_rod_indices.append(i)
            else:
                bb_edge_indices.append(i)

        bb_rod_indices.reverse()
        bb_edge_indices.reverse()

        for i, t in enumerate(topo_info):
            if t:
                # rod
                index = bb_rod_indices.pop()
                permutation[i] = index
            else:
                # edge
                index = bb_edge_indices.pop()
                permutation[i] = index

        return permutation

In [17]:
topo_data = dict()
topo_data['cds'] = dict(
    rod_edge_index = [2,3], rod_node_index = [0]
)
topo_data['dia'] = dict(
    rod_edge_index = [19,8,23,15,13,22,10,17], rod_node_index = [0]
)
topo_data['mog'] = dict(
    rod_edge_index = [14,15,16,17], rod_node_index = [1]
)

In [18]:
topology_names = list(topo_data.keys())

topologies = dict()
for n in topology_names:
    t = RodTopology('../topos_and_bbs/topologies/%s.cgd'%n)
    t.add_rod_info(topo_data[n]['rod_edge_index'])
    topologies[n] = t

print(topologies)
print("%d topologies "%len(topologies))

{'cds': Topology cds, (4)-cn, num edge types: 1, 'dia': Topology dia, (4)-cn, num edge types: 1, 'mog': Topology mog, (4,4)-cn, num edge types: 2}
3 topologies 


In [19]:
rod_bbs = []

for bb in Path("../topos_and_bbs/rodbbs").glob("*.xyz"):
    bb = RodBuildingBlock(bb)
    bb.has_metal = True
    indices = [int(i) for i in bb.name.split('_')[-2:]]
    bb.add_rod_info(indices)
    rod_bbs.append(bb)

In [20]:
# gather asymmetric linkers
asym_linkers_df = pd.read_csv("./asymmetric_linker_name.csv", header=None, index_col=0, sep=",")
asym_bbs = list(asym_linkers_df.index)

In [21]:
'''
Regard porphyrin building block and "si" building block as organic building blocks

'''

porphyrin_bb_names = [
    'N13',
]

si_bb_names = [
    "N96",
    "N601",
    "N585",
]

remove_bbs = [
    'N575', 'E106',  'E118', 'E157',
    'E183', 'E196', 'E26', 'E58', 'E6', 'E94',
    'N20', 'N353', 'N57', 'E114', 'E122', 'E15',
    'E192', 'E220', 'E42', 'E67', 'E76', 'N255', 
    'N468', 'N82', 'E116', 'E146', 'E171', 'E193', 
    'E232', 'E46', 'E69', 'E83', 'N160', 'N28', 'N488',
    'N99', 'N190', 'N484', 'N403',
]

organic_bb_names = porphyrin_bb_names + si_bb_names
remove_bbs += asym_bbs
remove_bbs = list(set(remove_bbs))

node_bbs = []
for bb in Path("/home/lim/works/wrMOFs_CH4/rodmof_gen/PORMAKE/pormake/database/bbs/").glob("N*.xyz"):
    bb = BuildingBlock(bb)
    if bb.name in organic_bb_names:
        bb.has_metal = False
    if bb.name in remove_bbs:
        bb.has_metal = True
    if bb.has_metal == False:
        node_bbs.append(bb)

edge_bbs = []
for bb in Path("/home/lim/works/wrMOFs_CH4/rodmof_gen/PORMAKE/pormake/database/bbs/").glob("E*.xyz"):
    bb = BuildingBlock(bb)
    if bb.name in remove_bbs:
        bb.has_metal = True
    if bb.has_metal == False:
        edge_bbs.append(bb)

In [22]:
name2bb = {'E0':None}
for bb in rod_bbs+node_bbs+edge_bbs:
    name2bb[bb.name] = bb

rod_names = [i.name for i in rod_bbs]
node_names = [i.name for i in node_bbs]
edge_names = [i.name for i in edge_bbs]  
print("Rod BBs : %d"%len(rod_bbs))
print("Normal Node BBs : %d"%len(node_bbs))
print("Edge BBs : %d\n"%len(edge_bbs))
print("Total BBs : %d"%len(name2bb))

Rod BBs : 19
Normal Node BBs : 103
Edge BBs : 143

Total BBs : 266


In [23]:
ROD_RMSD_CUT = 0.3
NODE_RMSD_CUT = 0.3

locator = pormake.Locator()

rod_seed = dict()
node_seed = dict()

for toponame in topology_names:
    topo = topologies[toponame]
    seed = []
    for local in topo.unique_local_structures:
        matched_bbs = []
        for bb in rod_bbs:
            if len(local.positions) == bb.n_connection_points:
                rmsd = locator.calculate_rmsd(local, bb)
                if rmsd < ROD_RMSD_CUT:
                    matched_bbs.append(bb.name)
        seed.append(matched_bbs)
    rod_seed[toponame] = seed.copy()

for toponame in topology_names:
    topo = topologies[toponame]
    seed = []
    for local in topo.unique_local_structures:
        matched_bbs = []
        for bb in node_bbs:
            if len(local.positions) == bb.n_connection_points:
                rmsd = locator.calculate_rmsd(local, bb)
                if rmsd < NODE_RMSD_CUT:
                    matched_bbs.append(bb.name)
        seed.append(matched_bbs)
    node_seed[toponame] = seed.copy()

  U, rmsd = scipy.spatial.transform.Rotation.align_vectors(p, q)


# MOF name generation

In [24]:
mof_names = []
mof_names_per_topology = dict()

for toponame in topology_names:
    count = 0
    topo = topologies[toponame]
    node_sites = [i for i in topo.unique_node_types \
                  if not i in topo_data[toponame]['rod_node_index']]
    if node_sites:
        node_candidates = node_seed[toponame][node_sites[0]]
    else:
        node_candidates = ['']

    rod_sites = [i for i in topo.unique_node_types \
                  if i in topo_data[toponame]['rod_node_index']]
    rod_candidates = rod_seed[toponame][rod_sites[0]]

    for r in rod_candidates:
        for c in node_candidates:
            if c:
                # Organic node exists -> Allow [E0:None] edge
                non_edge_names = ['E0'] + edge_names
                for e in non_edge_names:
                    namelist = [toponame] + [r] + [c] + [e]
                    name = '+'.join(namelist)
                    mof_names.append(name)
                    count += 1
            else:
                # No organic node -> Not allow [E0:None] edge
                for e in edge_names:
                    namelist = [toponame] + [r] + [c] + [e]
                    name = '+'.join(namelist)
                    mof_names.append(name)
                    count += 1

    mof_names_per_topology[toponame] = copy.deepcopy(count)

In [25]:
print(len(mof_names))

print(mof_names_per_topology)

39005
{'cds': 715, 'dia': 2002, 'mog': 36288}


In [26]:
f = open("./bulk_generation.txt", "w")
for n in mof_names:
    f.write(n)
    f.write("\n")
    
f.close()

# NAME to CIF

In [27]:
from pormake.framework import Framework

def name_to_mof(_mof_name, _topologies=topologies, _name2bb=name2bb):
    # MOF name
    tokens = _mof_name.split("+")
    _toponame = tokens[0]
    _rod_names  = [tokens[1]]
    _node_names = []
    _edge_names = []
    for bb in tokens[2:]:
        if bb.startswith("N"):
            _node_names.append(bb)
        if bb.startswith("E"):
            _edge_names.append(bb)

    # Topology
    _topology = _topologies[_toponame]

    # Rod BB
    _rod = [_name2bb[n] for n in _rod_names]
    
    # Node, Edge
    _node = [_name2bb[n] for n in _node_names]
    _edge = [_name2bb[n] for n in _edge_names]
    
    # NODE INSERT
    _all_nodes = {}
    for i in _topology.unique_node_types:
        if i in topo_data[_toponame]['rod_node_index']:
            _all_nodes[i] = _rod[0]
        else:
            _all_nodes[i] = _node[0]


    _builder = Builder()
    _bbs = _builder.make_bbs_by_type(_topology, _all_nodes)

    # EDGE INSERT
    for i in _topology.edge_indices:
        if i not in _topology.rod_edge_indices:
            _bbs[i] = _edge[0]

    # PERMUTATION FOR ROD-MOF
    if _rod[0].n_connection_points == 4:
        _node_slot = _topology.get_rod_permutation_info()
        _permutation = dict()
        for i in _node_slot.keys():
            if sum(_node_slot[i]) == 2:
                _permutation[i] = _rod[0].get_permutation_for_slot(_node_slot[i])
    else:
        _permutation = None
    
    _rod_connection_index = [np.where(_rod[0].connection_point_indices == i)[0][0] for i in _rod[0].rod_indices]

    _mof = _builder.build(_topology, _bbs, permutations=_permutation, rod_connection_indices=_rod_connection_index)
    return _mof

def clean_rod_mof(mof):
    MM_bonds = []
    # Find Metal-Metal Bonds
    MM_neighbors = defaultdict(list)
    new_bonds = []
    new_bonds_types = []
    
    symbols = mof.atoms.get_chemical_symbols()
    if any(['Lr' in symbols, 'No' in symbols]):
        for (a, b), t in zip(mof.bonds, mof.bond_types):
            if all([mof.atoms[a].symbol in ['No', 'Lr'], mof.atoms[b].symbol in ['No', 'Lr']]):
                MM_bonds.append([a,b])
            elif mof.atoms[a].symbol in ['No', 'Lr']:
                MM_neighbors[a].append([b,t])
            elif mof.atoms[b].symbol in ['No', 'Lr']:
                MM_neighbors[b].append([a,t])
            else:
                new_bonds.append((a,b))
                new_bonds_types.append(t)
    else:
        for (a, b), t in zip(mof.bonds, mof.bond_types):
            if all([mof.atoms[a].symbol in METAL_LIKE, mof.atoms[b].symbol in METAL_LIKE]):
                MM_bonds.append([a,b])
                
        for (a, b), t in zip(mof.bonds, mof.bond_types):
            if any([a in np.unique(MM_bonds), b in np.unique(MM_bonds)]):
                if all([mof.atoms[a].symbol in METAL_LIKE, mof.atoms[b].symbol in METAL_LIKE]):
                    continue
                elif mof.atoms[a].symbol in METAL_LIKE:
                    MM_neighbors[a].append([b,t])
                elif mof.atoms[b].symbol in METAL_LIKE:
                    MM_neighbors[b].append([a,t])
            else:
                new_bonds.append((a,b))
                new_bonds_types.append(t)


    for a, b in MM_bonds:
        mof.atoms[b].symbol = 'He'
        for i, t in MM_neighbors[b]:
            new_bonds.append((a, i))
            new_bonds_types.append(t)
        for i, t in MM_neighbors[a]:
            new_bonds.append((a, i))
            new_bonds_types.append(t)
            
    for a in mof.atoms:
        if a.symbol == 'No':
            a.symbol = 'Cl'
        elif a.symbol == 'Lr':
            a.symbol = 'O'

    new_mof = Framework(
        mof.atoms,
        new_bonds,
        new_bonds_types,
        mof.info,
        wrap=True
    )
    return new_mof

In [28]:
def calculate_n_atoms_of_rodmof(_mof_name, _topologies=topologies, _name2bb=name2bb):
    # MOF name
    tokens = _mof_name.split("+")
    _toponame = tokens[0]
    _rod_names  = [tokens[1]]
    _node_names = []
    _edge_names = []
    for bb in tokens[2:]:
        if bb.startswith("N"):
            _node_names.append(bb)
        if bb.startswith("E"):
            _edge_names.append(bb)

    # Topology
    _topology = _topologies[_toponame]    
    # Rod BB
    _rod = [_name2bb[n] for n in _rod_names]
    # Node, Edge
    _node = [_name2bb[n] for n in _node_names]
    _edge = [_name2bb[n] for n in _edge_names]

    # NODE INSERT
    _all_nodes = {}
    for i in _topology.unique_node_types:
        if i in topo_data[_toponame]['rod_node_index']:
            _all_nodes[i] = _rod[0]
        else:
            _all_nodes[i] = _node[0]
    
    nt_counts = {}
    for nt in _topology.unique_node_types:
        n_nt = np.sum(_topology.node_types == nt)
        nt_counts[nt] = n_nt

    et_counts = len(_topology.edge_indices) - len(_topology.rod_edge_indices)

    counts = 0
    for nt, bb in _all_nodes.items():
        counts += nt_counts[nt] * count_normal_atoms(bb)

    counts += et_counts * count_normal_atoms(_edge[0])
    return counts

## Assign MOF names from text file

In [29]:
lines = open("./bulk_generation.txt",'r').readlines()
name_list = [line.rstrip() for line in lines]

In [30]:
ERROR_LOG = 'error_log.txt'
MOF_DIR = '/home/lim/works/wrMOFs_CH4/Re-rodmof_gen/bulk_gen/'

## Generation Rod-MOFs

In [31]:
error_file = open(ERROR_LOG, 'w')

mog_token = []

for i, name in enumerate(name_list):
    try:
        tokens = name.split("+")
        topo = tokens[0]
        
        _MOF_DIR = MOF_DIR+f"{topo}/"

        if not os.path.exists(_MOF_DIR):
            os.makedirs(_MOF_DIR)

        n_atoms = calculate_n_atoms_of_rodmof(name)
        if n_atoms > 1000:
            continue

        mof = name_to_mof(name)
        new_mof = clean_rod_mof(mof)

        new_mof.write_cif(_MOF_DIR+name+'.cif')
    
    except Exception as e:
        print(name)
        print(e)
        print(name, file=error_file)

error_file.close()

