In [1]:
import networkx as nx
import numpy as np
import os
import veloxchem as vlx
import re
import time
import datetime
from tests.MOF_builder.functions.bbcif_properties import selected_type_vecs
from tests.MOF_builder.functions.place_bbs import superimpose
from tests.MOF_builder.functions.frag_recognizer import process_linker_molecule
from tests.MOF_builder.functions._readcif import extract_type_atoms_fcoords_in_primitive_cell,read_cif,extract_atoms_fcoords_from_lines,extract_atoms_ccoords_from_lines
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.optimize import minimize
from tests.MOF_builder.functions.chainnode import process_chain_node
from tests.MOF_builder.functions.learn_template import extract_cluster_center_from_templatecif,make_supercell_3x3x3,find_pair_v_e,add_ccoords,set_DV_V,set_DE_E
from tests.MOF_builder.functions.learn_template import sort_nodes_by_type_connectivity,find_and_sort_edges_bynodeconnectivity
from tests.MOF_builder.functions.pillarstack_check import check_if_pillarstack,rotate_node_for_pillar
from tests.MOF_builder.functions.place_node_edge import addidx
from tests.MOF_builder.functions.node_rotation_matrix_optimizer import axis_optimize_rotations,apply_rotations_to_atom_positions,apply_rotations_to_xxxx_positions,update_ccoords_by_optimized_cell_params
from tests.MOF_builder.functions.place_node_edge import get_edge_lengths,update_node_ccoords,unit_cell_to_cartesian_matrix,cartesian_to_fractional,fractional_to_cartesian
from tests.MOF_builder.functions.place_node_edge import place_edgeinnodeframe
from tests.MOF_builder.functions.scale_cif_optimizer import optimize_cell_parameters

In [2]:
start_time = time.time()
linker_file = 'ndi.xyz'
molecule = vlx.Molecule.read_xyz_file(linker_file)
linker_topic =2
center_frag_nodes_num,center_Xs,single_frag_nodes_num,frag_Xs= process_linker_molecule(molecule,linker_topic)
#pillar stack structure:
#regarding chain_node cif file:
    #find the pillar direction vector which is defined by the Al-Al vectors in chain_node cif file 

#regarding the linker cif file:
    #find the length of the linker which should be the maximum length of X vectors in the linker cif file

#regarding the template cif file:
    #find the pillar direction which should be along the pure V-V direction (no Er node between) in the template cif file, 


#the unit_cell of the template cif file should be scaled to fit the length of the linker, and the box_vector which along pillar direction should be fixed


chain_node_cif = '21Alchain.cif'
template_cif = 'chain_rna.cif'
linker_cif = 'edges/diedge.cif'

#linker
linker_x_vecs = selected_type_vecs(linker_cif,'.','X',False)
#_,_, linker_x_vecs=extract_type_atoms_ccoords_in_primitive_cell(linker_cif, 'X')
#ditopic linker only has two x vectors
#linker_length = calc_edge_len(linker_cif,'.') #length of the edge should be x-x length in linker cif file, unit angstrom
linker_length = np.linalg.norm(linker_x_vecs[0]-linker_x_vecs[1])
linker_cell_info,_, linker_atom_site_sector = read_cif(linker_cif)

ll,linker_atom, linker_ccoords = extract_atoms_ccoords_from_lines(linker_cell_info,linker_atom_site_sector)
print(linker_length,'linker_length')


#chainnode

node_target_type = 'Al'
node_unit_cell,node_atom,node_pillar_fvec, node_x_vecs, chain_node_fcoords = process_chain_node(chain_node_cif, node_target_type)

#template cif 

template_cif_file ='MIL53templatecif.cif'
cluster_distance_threshhold = 0.1
vvnode,cell_info,unit_cell = extract_cluster_center_from_templatecif(template_cif_file, 'YY',1) # node com in template cif file, use fcluster to find cluster and the center of the cluster
eenode,_,_ = extract_cluster_center_from_templatecif(template_cif_file, 'XX',1) # edge com in template cif file, use fcluster to find the cluster and center of the cluster

#loop over super333xxnode and super333yynode to find the pair of x node in unicell which pass through the yynode
vvnode333 = make_supercell_3x3x3(vvnode)
eenode333 = make_supercell_3x3x3(eenode)
pair_vv_e,_,G=find_pair_v_e(vvnode333,eenode333)
G = add_ccoords(G,unit_cell)
G = set_DV_V(G)
G = set_DE_E(G)
#debug
#check_connectivity(G)
#check_edge(G)
#firstly, check if all V nodes have highest connectivity
#secondly, sort all DV nodes by connectivity

sorted_nodes = sort_nodes_by_type_connectivity(G)
#fix one direction of the box, which should be parallel to the pilalar direction
#rotate the node to pillar direction and put all nodes into the cartesian coordinate 
sorted_edges = find_and_sort_edges_bynodeconnectivity(G,sorted_nodes)


PILLAR,pillar_vec = check_if_pillarstack(G) 
pillar_oriented_node_xcoords,pillar_oriented_node_coords = rotate_node_for_pillar(G,node_unit_cell,node_pillar_fvec,pillar_vec,node_x_vecs,chain_node_fcoords)

nodexxxx = []
xxxx_positions_dict = {}
chain_node_positions_dict = {}
chain_node_positions = []
#reindex the nodes in the xxxx_positions with the index in the sorted_nodes, like G has 16 nodes[2,5,7], but the new dictionary should be [0,1,2]
xxxx_positions_dict = {sorted_nodes.index(n):addidx(G.nodes[n]['ccoords']+pillar_oriented_node_xcoords) for n in sorted_nodes}
chain_node_positions_dict = {sorted_nodes.index(n):G.nodes[n]['ccoords']+pillar_oriented_node_coords for n in sorted_nodes}
#reindex the edges in the G with the index in the sorted_nodes
sorted_edges_of_sortednodeidx = [(sorted_nodes.index(e[0]),sorted_nodes.index(e[1])) for e in sorted_edges]

def save_xyz(filename, rotated_positions_dict):
    """
    Save the rotated positions to an XYZ file for visualization.
    """
    with open(filename, "w") as file:
        num_atoms = sum(len(positions) for positions in rotated_positions_dict.values())
        file.write(f"{num_atoms}\n")
        file.write("Optimized structure\n")

        for node, positions in rotated_positions_dict.items():
            for pos in positions:
                file.write(f"X{node}   {pos[0]:.8f} {pos[1]:.8f} {pos[2]:.8f}\n")

# Optimize rotations
num_nodes = G.number_of_nodes()

###3D free rotation
#optimized_rotations,static_xxxx_positions = optimize_rotations(num_nodes,sorted_edges, xxxx_positions_dict)
###2D axis rotation
axis = pillar_vec  # Rotate around x-axis
optimized_rotations = axis_optimize_rotations(axis, num_nodes, G,sorted_nodes,sorted_edges_of_sortednodeidx, xxxx_positions_dict)

# Apply rotations
rotated_node_positions = apply_rotations_to_atom_positions(optimized_rotations, G, sorted_nodes,chain_node_positions_dict)

# Save results to XYZ
save_xyz("optimized_nodesstructure.xyz", rotated_node_positions)

rotated_xxxx_positions_dict,optimized_pair=apply_rotations_to_xxxx_positions(optimized_rotations, G, sorted_nodes, sorted_edges_of_sortednodeidx,xxxx_positions_dict)


start_node = sorted_edges[0][0]#find_nearest_node_to_beginning_point(G)
#loop all of the edges in G and get the lengths of the edges, length is the distance between the two nodes ccoords
edge_lengths,lengths = get_edge_lengths(G)

constant_length = 1.6
x_com_length = np.mean([np.linalg.norm(i) for i in pillar_oriented_node_xcoords])
new_edge_length = linker_length+2*constant_length+2*x_com_length
#update the node ccoords in G by loop edge, start from the start_node, and then update the connected node ccoords by the edge length, and update the next node ccords from the updated node


updated_ccoords,original_ccoords = update_node_ccoords(G,edge_lengths,start_node,new_edge_length)
updated_ccoords,original_ccoords
#exclude the start_node in updated_ccoords and original_ccoords
updated_ccoords = {k:v for k,v in updated_ccoords.items() if k!=start_node}
original_ccoords = {k:v for k,v in original_ccoords.items() if k!=start_node}


#use optimized_params to update all of nodes ccoords in G, according to the fccoords

optimized_params = optimize_cell_parameters(cell_info,original_ccoords,updated_ccoords)
sG,scaled_ccoords = update_ccoords_by_optimized_cell_params(G,optimized_params)
scaled_chain_node_positions_dict = {sorted_nodes.index(n):sG.nodes[n]['ccoords']+pillar_oriented_node_coords for n in sorted_nodes}
scaled_xxxx_positions_dict = {sorted_nodes.index(n):addidx(sG.nodes[n]['ccoords']+pillar_oriented_node_xcoords) for n in sorted_nodes}

# Apply rotations
scaled_rotated_chain_node_positions = apply_rotations_to_atom_positions(optimized_rotations, sG, sorted_nodes,scaled_chain_node_positions_dict)
scaled_rotated_xxxx_positions,optimized_pair = apply_rotations_to_xxxx_positions(optimized_rotations, sG,sorted_nodes, sorted_edges_of_sortednodeidx, scaled_xxxx_positions_dict)
# Save results to XYZ
save_xyz("scale_optimized_nodesstructure.xyz", scaled_rotated_chain_node_positions)

placed_node,placed_edge = place_edgeinnodeframe(sorted_nodes,optimized_pair,node_atom,linker_atom,linker_x_vecs,linker_ccoords,scaled_rotated_xxxx_positions,scaled_rotated_chain_node_positions)

center is a cycle
[9, 14]
ditopic linker: center is a cycle
find connected X in edge:   24
find connected X in edge:   39
edges/diedge.cif is writen
center_frag: 46 [18, 33]
15.754259612561931 linker_length
apply 7 symmetry operation
0  no symmetry operation
apply 7 symmetry operation
0  no symmetry operation
apply 16 symmetry operation
16  symmetry operation
apply 16 symmetry operation
16  symmetry operation
[[0.  0.5 0. ]
 [0.  1.  0. ]
 [0.5 0.  0.5]
 [0.5 0.5 0.5]]
[0.  0.5 0. ] E
[0. 1. 0.] E
[0.5 0.  0.5] E
[0.5 0.5 0.5] E
[0.5 1.  0.5] DE
[1.  0.5 0. ] DE
[1. 1. 0.] DE
[0. 0. 1.] DE
[0.  0.5 1. ] DE
[1. 0. 1.] DE
[1.  0.5 1. ] DE
pillar stack [1. 0. 0.]
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           16     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.25093D+03    |proj g|=  1.13165D+02

At iterate    1    f=  4.91013D+03    |proj g|=  1.26809D+02

At iterate    2    f=  4.25032D+03    |

 This problem is unconstrained.



At iterate   10    f=  3.78395D+03    |proj g|=  9.99080D-02

At iterate   11    f=  3.52336D+03    |proj g|=  1.46960D+02

At iterate   12    f=  3.52336D+03    |proj g|=  1.46945D+02

At iterate   13    f=  3.39166D+03    |proj g|=  1.31556D+02

At iterate   14    f=  3.26670D+03    |proj g|=  8.72173D+01

At iterate   15    f=  3.21842D+03    |proj g|=  1.75605D+01

At iterate   16    f=  3.21698D+03    |proj g|=  1.64241D+01

At iterate   17    f=  3.21561D+03    |proj g|=  2.23890D+00

At iterate   18    f=  3.21554D+03    |proj g|=  4.95675D-01

At iterate   19    f=  3.21554D+03    |proj g|=  9.85892D-02

At iterate   20    f=  3.21554D+03    |proj g|=  1.25056D-02

At iterate   21    f=  3.21554D+03    |proj g|=  2.31921D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized 

In [3]:
placed_edge.keys()



dict_keys([(2, 3), (1, 72), (1, 84), (2, 19), (3, 14), (0, 53), (0, 37), (5, 76), (5, 92), (4, 61), (4, 41)])

In [4]:

# Save results to XYZ
placed_all = []
with open("placed_structure.xyz", "w") as file:
    #node_num_atoms = sum(len(positions) for positions in scaled_rotated_chain_node_positions.values())
    node_num_atoms = sum(len(positions) for positions in placed_node.values())
    edge_num_atoms = sum(len(positions) for positions in placed_edge.values())
    num_atoms = node_num_atoms + edge_num_atoms
    file.write(f"{num_atoms}\n")
    file.write("Optimized structure\n")
    edge_idx = -2
    for node, positions in placed_node.items():
        if 'DV' in G.nodes[node]['type']:
            continue
        for pos in positions:
            file.write(f"{pos[0]}   {pos[1]:.8f} {pos[2]:.8f} {pos[3]:.8f}\n")
            #if pos[0]=='X':
            #    pos[0] = 'C'
            line = np.array([pos[0],pos[1],pos[2],pos[3],0,pos[0],node,'NODE'])
            placed_all.append(line)

    for edge,positions in placed_edge.items():
        
        if 'DE' in G.edges[edge]['type']:
           # pass
            continue
        for pos in positions:
            file.write(f"{pos[0]}   {pos[1]:.8f} {pos[2]:.8f} {pos[3]:.8f}\n")
            #if pos[0]=='X':
               # pos[0] = 'C'
            line = np.array([pos[0],pos[1],pos[2],pos[3],0,pos[0],edge_idx,'EDGE'])
            placed_all.append(line)
        edge_idx -= 1
            #placed_all.append(pos)

end_time =  time.time()
print('time cost:',end_time-start_time)


sc_unit_cell = unit_cell_to_cartesian_matrix(optimized_params[0],optimized_params[1],optimized_params[2],optimized_params[3],optimized_params[4],optimized_params[5])

def write_cif_nobond(placed_all, scaled_params, cifname):
	sc_a,sc_b,sc_c,sc_alpha,sc_beta,sc_gamma = scaled_params
	os.makedirs('output_cifs',exist_ok=True)

	opath = os.path.join('output_cifs', cifname)
	
	with open(opath, 'w') as out:
		out.write('data_' + cifname[0:-4] + '\n')
		out.write('_audit_creation_date              ' + datetime.datetime.today().strftime('%Y-%m-%d') + '\n')
		out.write("_audit_creation_method            'MOFbuilder_1.0'" + '\n')
		out.write("_symmetry_space_group_name_H-M    'P1'" + '\n')
		out.write('_symmetry_Int_Tables_number       1' + '\n')
		out.write('_symmetry_cell_setting            triclinic' + '\n')
		out.write('loop_' + '\n')
		out.write('_symmetry_equiv_pos_as_xyz' + '\n')
		out.write('  x,y,z' + '\n')
		out.write('_cell_length_a                    ' + str(sc_a) + '\n')
		out.write('_cell_length_b                    ' + str(sc_b) + '\n')
		out.write('_cell_length_c                    ' + str(sc_c) + '\n')
		out.write('_cell_angle_alpha                 ' + str(sc_alpha) + '\n')
		out.write('_cell_angle_beta                  ' + str(sc_beta) + '\n')
		out.write('_cell_angle_gamma                 ' + str(sc_gamma) + '\n')
		out.write('loop_' + '\n')
		out.write('_atom_site_label' + '\n')
		out.write('_atom_site_type_symbol' + '\n')
		out.write('_atom_site_fract_x' + '\n')
		out.write('_atom_site_fract_y' + '\n')
		out.write('_atom_site_fract_z' + '\n')
		

		for l in placed_all:

			vec = list(map(float, l[1:4]))
			cvec = np.dot(np.linalg.inv(sc_unit_cell), vec)
			cvec = np.mod(cvec, 1) # makes sure that all fractional coordinates are in [0,1]
			out.write('{:7} {:>4} {:>15} {:>15} {:>15}'.format(l[0], re.sub('[0-9]','',l[0]), "%.10f" % np.round(cvec[0],10), "%.10f" % np.round(cvec[1],10), "%.10f" % np.round(cvec[2],10)))
			out.write('\n')

		out.write('loop_' + '\n')
		out.write('_geom_bond_atom_site_label_1' + '\n')
		out.write('_geom_bond_atom_site_label_2' + '\n')
		out.write('_geom_bond_distance' + '\n')
		out.write('_geom_bond_site_symmetry_2' + '\n')
		out.write('_ccdc_geom_bond_type' + '\n')
write_cif_nobond(placed_all, optimized_params, 'placed_structure.cif')

time cost: 1.034386396408081


In [5]:
import os
import networkx as nx
import numpy as np
import re
import glob
import py3Dmol as p3d
from tests.MOF_builder.functions.ciftemplate2graph import ct2g
from tests.MOF_builder.functions.remove_dummy_atoms import remove_Fr
from tests.MOF_builder.functions.adjust_edges import adjust_edges,addxoo2edge,superimpose
from tests.MOF_builder.functions.write_cifs import write_cif_nobond, merge_catenated_cifs #write_check_cif,bond_connected_components, distance_search_bond, fix_bond_sym, merge_catenated_cifs
#from functions.scale_animation import scaling_callback_animation, write_scaling_callback_animation, animate_objective_minimization
import itertools
#from random import choice
from tests.MOF_builder.functions.cluster import cluster_supercell,placed_arr
from tests.MOF_builder.functions.supercell import find_new_node_beginning,Carte_points_generator
from tests.MOF_builder.functions.output import tempgro,temp_xyz,viewgro#,viewxyz
from tests.MOF_builder.functions.terminations import terminate_nodes,terminate_unsaturated_edges,add_node_terminations,exposed_xoo_cc,Xpdb,terminate_unsaturated_edges_CCO2
#from functions.filtX import filt_nodex_fvec,filt_closest_x_angle,filt_outside_edgex
#unctions.multitopic import 
from tests.MOF_builder.functions.isolated_node_cleaner import reindex_frag_array,get_frag_centers_fc,calculate_eG_net_ditopic,calculate_eG_net_ditopic_PILLAR
from tests.MOF_builder.functions.replace import fetch_by_idx_resname,sub_pdb


####### Global options #######


pi = np.pi

vname_dict = {'V':1,'Er':2,'Ti':3,'Ce':4,'S':5,
			  'H':6,'He':7,'Li':8,'Be':9,'B':10,
			  'C':11,'N':12,'O':13,'F':14,'Ne':15,
			  'Na':16,'Mg':17,'Al':18,'Si':19,'P':20 ,
			  'Cl':21,'Ar':22,'K':23,'Ca':24,'Sc':24,
			  'Cr':26,'Mn':27,'Fe':28,'Co':29,'Ni':30}

metal_elements = ['Ac','Ag','Al','Am','Au','Ba','Be','Bi',
				  'Bk','Ca','Cd','Ce','Cf','Cm','Co','Cr',
				  'Cs','Cu','Dy','Er','Es','Eu','Fe','Fm',
				  'Ga','Gd','Hf','Hg','Ho','In','Ir',
				  'K','La','Li','Lr','Lu','Md','Mg','Mn',
				  'Mo','Na','Nb','Nd','Ni','No','Np','Os',
				  'Pa','Pb','Pd','Pm','Pr','Pt','Pu','Ra',
				  'Rb','Re','Rh','Ru','Sc','Sm','Sn','Sr',
				  'Ta','Tb','Tc','Th','Ti','Tl','Tm','U',
				  'V','W','Y','Yb','Zn','Zr']




class MOF_ditopic:
    #def __init__(self,template,node,edge,node_topics):
    #    self.template = template
    #    self.node = node
    #    self.edge = edge
    #    self.linker_topics = 2 #ditopic class 
    #    self.node_topics = node_topics
	
    def __init__(self):
        self.linker_topics = 2
        pass
    def load(self,TG,placed_all,sc_unit_cell,placed_nodes,placed_edges):
        self.TG = TG
        self.placed_all = placed_all
        self.sc_unit_cell = sc_unit_cell
        self.placed_nodes = placed_nodes
        self.placed_edges = placed_edges

    def basic_supercell(self,supercell,term_file = 'data/methyl.pdb',boundary_cut_buffer = 0.00,edge_center_check_buffer = 0.0,cutxyz=[True,True,True]):
        linker_topics = self.linker_topics
        cutx,cuty,cutz = cutxyz
        TG = self.TG
        scalar = boundary_cut_buffer
        boundary_scalar = edge_center_check_buffer
        placed_edges = self.placed_edges
        placed_nodes = self.placed_nodes
        sc_unit_cell = self.sc_unit_cell
		
        frame_node_name= list(TG.nodes())
        frame_node_fc=np.asarray([TG.nodes[fn]['fcoords']for fn in frame_node_name])
    
        new_beginning_fc = find_new_node_beginning(frame_node_fc)		
        #new_beginning_fc = np.array([0,0,0])
        placed_nodes_arr,nodes_id=placed_arr(placed_nodes)
        placed_edges_arr,edges_id=placed_arr(placed_edges)		
        placed_nodes_fc = np.hstack((placed_nodes_arr[:,0:1],np.dot(np.linalg.inv(sc_unit_cell),placed_nodes_arr[:,1:4].T).T-new_beginning_fc,placed_nodes_arr[:,4:]))
        placed_edges_fc = np.hstack((placed_edges_arr[:,0:1],np.dot(np.linalg.inv(sc_unit_cell),placed_edges_arr[:,1:4].T).T-new_beginning_fc,placed_edges_arr[:,4:]))		
			
        target_all_fc = np.vstack((placed_nodes_fc,placed_edges_fc))
        #target_all_fc = np.vstack((placed_nodes_fc,tetratopic_edges_fcoords)) # the reason for use above version node is because we need xoo in node for terminations adding
        box_bound= supercell+1
        supercell_Carte = Carte_points_generator(supercell)		
        connected_nodeedge_fc, boundary_connected_nodes_res,eG,bare_nodeedge_fc_loose=cluster_supercell(sc_unit_cell,supercell_Carte,linker_topics,target_all_fc,box_bound,scalar,cutx,cuty,cutz,boundary_scalar)		
        terms_cc_loose = terminate_nodes(term_file,boundary_connected_nodes_res,connected_nodeedge_fc,sc_unit_cell,box_bound)

        connected_nodeedge_cc = np.hstack((connected_nodeedge_fc[:,:-3],np.dot(sc_unit_cell,connected_nodeedge_fc[:,-3:].T).T))
        #print(connected_nodeedge_cc.shape,terms_cc_loose.shape)

        node_edge_term_cc_loose = np.vstack((connected_nodeedge_cc,terms_cc_loose))		
        self.all_connected_node_edge_cc = connected_nodeedge_cc
        self.all_terms_cc_loose = terms_cc_loose
        self.all_N_E_T_cc = node_edge_term_cc_loose		
        self.bare_nodeedge_fc = bare_nodeedge_fc_loose

    def write_basic_supercell(self,gro,xyz):
        tempgro('20test.gro',self.all_connected_node_edge_cc[self.all_connected_node_edge_cc[:,5]==1])
        tempgro(gro,self.all_N_E_T_cc)
        temp_xyz(xyz,self.all_N_E_T_cc)
        viewgro("20test.gro")
        viewgro(gro)
	
	
    '''
        #Defective model: node/edge missing
            #delete nodes or edges
            #terminate nodes ROUND1
            # find main fragment
                # find unstaturated node uN1
                    # find uN1 neighbors and extract X in neighbor edge(E+int)
                        # filt exposed X sites in uN1
                            # add terminations

            #terminate edge ROUND2
                #find unsaturaed edge uE1
                    #find uE1 neighbors and extract X in neighbor node 'int'
                        # filt exposed X sites in uE1
                            # add terminations (-OH)

        #Defective model: linker exchange
        #   termination OO don't change, use X to set a range 
        #   atoms in outX_range stay
        #    then superimpose by X


            #superimpose for replacement
                #find X AND super impose
    '''
    def defect_missing(self,remove_node_list = [],remove_edge_list = []):
        bare_nodeedge_fc_loose = self.bare_nodeedge_fc
        linker_topics = self.linker_topics
        node_topics = self.node_topics
        sc_unit_cell =self.sc_unit_cell

        renode1_fcarr=reindex_frag_array(bare_nodeedge_fc_loose,'NODE')
        reedge1_fcarr=reindex_frag_array(bare_nodeedge_fc_loose,'EDGE')
        defective_node_fcarr = np.vstack(([i for i in renode1_fcarr if i[5] not in remove_node_list]))
        defective_edge_fcarr = np.vstack(([i for i in reedge1_fcarr if i[5] not in remove_edge_list]))
        renode_fcarr = reindex_frag_array(defective_node_fcarr,'NODE')
        reedge_fcarr = reindex_frag_array(defective_edge_fcarr,'EDGE')
        edgefc_centers = get_frag_centers_fc(reedge_fcarr)
        nodefc_centers = get_frag_centers_fc(renode_fcarr)

        #eG = calculate_eG_net_ditopic(edgefc_centers,nodefc_centers,linker_topics)
        eG = calculate_eG_net_ditopic_PILLAR(edgefc_centers,nodefc_centers,linker_topics)
        eG_subparts=[len(c) for c in sorted(nx.connected_components(eG), key=len, reverse=True)]
        #for pillar stack, add virtual edge to connect clostest nodes, set distance to 0.1
        if len(eG_subparts)>1:
            print(f'this MOF has {len(eG_subparts)} seperated fragments: {eG_subparts}')
        else:
            print(f'this MOF has {len(eG_subparts)} fragment')



        frags=[(len(c),c) for c in sorted(nx.connected_components(eG), key=len, reverse=True)]
        main_frag=list(sorted(nx.connected_components(eG), key=len, reverse=True)[0]) 
       

        main_frag_nodes = [i for i in main_frag if isinstance(i,int)]
        main_frag_edges = [i for i in main_frag if re.sub('[0-9]','',str(i)) == 'E']



        #delete "virtual" edges
        for edge_n in eG.edges():
            if eG.edges[edge_n]['type'] == 'virtual':
                eG.remove_edge(edge_n[0],edge_n[1])

        unsaturated_nodes = [(n,d) for n, d in eG.degree() if d <node_topics and isinstance(n,int)]
        unsaturated_edges = [(n,d) for n, d in eG.degree() if d <linker_topics and isinstance(n,str)]
        


        if len(unsaturated_edges) > 0 :
            print(f"UNsaturated edges(linkers) exist, need linker_termination <= {len(unsaturated_edges)}")
        else:
            print("only saturated edges(linkers) exist")

        if len(unsaturated_nodes) > 0 :
            print(f"UNsaturated nodes exist, <={len(unsaturated_nodes)} nodes need node_termination")
            print(unsaturated_nodes)
        else:
            print("only saturated nodes exist")



        unsaturated_main_frag_nodes = [i for i in unsaturated_nodes if i[0] in main_frag_nodes]
        unsaturated_main_frag_edges = [i for i in unsaturated_edges if i[0] in main_frag_edges]

        main_frag_half_edges_fc = np.vstack(([reedge_fcarr[reedge_fcarr[:,5]==int(ei[1:])]for ei in main_frag_edges]))
        main_frag_nodes_fc = np.vstack(([renode_fcarr[renode_fcarr[:,5]==ni]for ni in main_frag_nodes]))
        main_frag_edges_fc,xoo_dict,con_nodes_x_dict = addxoo2edge(eG,main_frag_nodes,main_frag_nodes_fc,main_frag_edges,main_frag_half_edges_fc,sc_unit_cell)

        main_frag_nodes_cc = np.hstack((main_frag_nodes_fc[:,:-3],np.dot(main_frag_nodes_fc[:,-3:],sc_unit_cell)))
        main_frag_edges_cc = np.hstack((main_frag_edges_fc[:,:-3],np.dot(main_frag_edges_fc[:,-3:],sc_unit_cell)))
        self.eG = eG
        self.main_frag_nodes = main_frag_nodes
        self.main_frag_edges = main_frag_edges
        self.unsaturated_main_frag_nodes = unsaturated_main_frag_nodes
        self.unsaturated_main_frag_edges = unsaturated_main_frag_edges
        self.node_xoo_dict = xoo_dict
        self.con_nodes_x_dict = con_nodes_x_dict
        self.main_frag_nodes_cc = main_frag_nodes_cc
        self.main_frag_edges_cc = main_frag_edges_cc   

    def term_defective_model(self,n_term_file = 'data/methyl.pdb',e_termfile = 'data/CCO2.pdb'):
        eG = self.eG
        unsaturated_main_frag_nodes = self.unsaturated_main_frag_nodes
        main_frag_nodes = self.main_frag_nodes
        main_frag_nodes_cc =self.main_frag_nodes_cc
        con_nodes_x_dict =self.con_nodes_x_dict
        xoo_dict = self.node_xoo_dict
        unsaturated_main_frag_edges = self.unsaturated_main_frag_edges
        main_frag_edges_cc = self.main_frag_edges_cc
        linker_topics = self.linker_topics
		
		# get indices for cleaved node atoms in main_frag_nodes  without xoo

        xoo_ind = []
        for key in list(xoo_dict):
            xoo_ind.append(key)
            xoo_ind += xoo_dict[key]
        single_node = main_frag_nodes_cc[main_frag_nodes_cc[:,5]==main_frag_nodes_cc[0,5]]
        node_nums = len(single_node)
        single_node_stay_ind=np.asarray([i for i in range(node_nums) if i not in xoo_ind])
        a =[]
        for node in range(len(main_frag_nodes)):
            a.append(node_nums*node+single_node_stay_ind)
        metal_node_indices = np.hstack(a)

        #add -methyl to terminate nodes find exposed xoo 
        ex_node_cxo_cc_loose=exposed_xoo_cc(eG,unsaturated_main_frag_nodes,main_frag_nodes_cc,con_nodes_x_dict,xoo_dict)
        n_terms_loose = add_node_terminations(n_term_file,ex_node_cxo_cc_loose)
        if len(n_terms_loose) > 0:
            n_terms_cc = np.vstack((n_terms_loose))
            self.n_terms_cc = n_terms_cc
        else:
            n_terms_cc = np.empty((0,9))
            self.n_terms_cc = n_terms_cc

        #add -COOH term to exposed edge and change edge name to HEDGE
        if os.path.basename(e_termfile)=='CCO2.pdb':
            cleaved_metal_node = main_frag_nodes_cc[metal_node_indices]
            t_edges = terminate_unsaturated_edges_CCO2(e_termfile,unsaturated_main_frag_edges,eG,main_frag_edges_cc,linker_topics)
            node_edge_term_cc= np.vstack((cleaved_metal_node,t_edges,n_terms_cc))
        else:
            cleaved_metal_node = main_frag_nodes_cc[metal_node_indices]
            t_edges = terminate_unsaturated_edges(e_termfile,unsaturated_main_frag_edges,eG,main_frag_edges_cc,linker_topics)
            node_edge_term_cc= np.vstack((cleaved_metal_node,t_edges,n_terms_cc))
        self.t_edges = t_edges
        self.tn_te_cc = node_edge_term_cc
			


    def write_tntemof(self,gro):
        tempgro(gro,self.tn_te_cc)
        #temp_xyz("303term_supercell.xyz",self.tn_te_cc)
        viewgro(gro)


    def defect_replace_linker(self,sub_file,sub_class,candidate_res_idx_list,sub_res_newname = 'SUB'):
        node_edge_term_cc = self.tn_te_cc
        for res_idx in candidate_res_idx_list:
            fetch_res_mask,other_mask = fetch_by_idx_resname(node_edge_term_cc,res_idx,sub_class)
            other_res = node_edge_term_cc[other_mask]
            selected_res = node_edge_term_cc[fetch_res_mask]
            if len(selected_res)==0:
                continue
            X_atoms_ind = [i for i in range(len(selected_res)) if selected_res[i,2][0] == 'X']
            outer_atoms_ind = [j for j in range(len(selected_res)) if (j > X_atoms_ind[0]) & (j not in X_atoms_ind)]
            stay_outer_atoms = selected_res[outer_atoms_ind]
            X_atoms = selected_res[X_atoms_ind]
            sub_data=sub_pdb(sub_file)
            subX,subX_ind= Xpdb(sub_data,'X')
            subX_coords_cc = subX[:,-3:]
            subX_coords_cc = subX_coords_cc.astype('float')
            X_atoms_coords_cc = X_atoms[:,-3:]
            X_atoms_coords_cc =  X_atoms_coords_cc.astype('float')

            _,rot,trans = superimpose(subX_coords_cc,X_atoms_coords_cc)
            sub_coords = sub_data[:,-3:]
            sub_coords = sub_coords.astype('float')
            placed_sub_data = np.hstack((sub_data[:,:-3],np.dot(sub_coords,rot)+trans))

            sub_edge = np.vstack((stay_outer_atoms,placed_sub_data))
            sub_edge[:,5]= stay_outer_atoms[0,5]
            sub_edge[:,4] = sub_res_newname
            for row in range(len(sub_edge)):
                sub_edge[row,2] = re.sub('[0-9]','',sub_edge[row,2])+str(row+1)
            node_edge_term_cc = np.vstack((other_res,sub_edge))
        self.replaced_tn_te_cc = node_edge_term_cc
    def write_view_replaced(self,gro):
        tempgro(gro,self.replaced_tn_te_cc)
        viewgro(gro)
        

In [6]:
placed_all = np.array(placed_all)
placed_N = placed_all[placed_all[:,7]=='NODE']
placed_E = placed_all[placed_all[:,7]=='EDGE']


In [11]:

n_term_file = 'data/terminations_database/methyl.pdb'
e_termfile = 'data/terminations_database/CCO2.pdb'

supercell = [2,1,1]

mof = MOF_ditopic()
mof.node_topics = 2
mof.load(sG,placed_all,sc_unit_cell,placed_N,placed_E)
mof.basic_supercell(np.asarray(supercell),term_file = 'data/terminations_database/methyl.pdb',boundary_cut_buffer = -0.6,edge_center_check_buffer = 0.20,cutxyz=[True,True,True])
mof.write_basic_supercell('20test.gro','20test.xyz')
mof.node_topics = 2
mof.defect_missing()
mof.term_defective_model(n_term_file=n_term_file,e_termfile=e_termfile)
mof.write_tntemof('30.gro')    
#  
#'''cleave all unsaturated linkers'''
u_edge_idx=[]
for i in mof.unsaturated_main_frag_edges:
    u_edge_idx.append(int(i[0][1:]))
print(u_edge_idx)

mof.defect_missing([],u_edge_idx)
mof.term_defective_model(n_term_file=n_term_file,e_termfile=e_termfile)
mof.write_tntemof('33.gro') 
 

row_diff_idx_loose [0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 204, 205, 206, 207, 208, 209, 211, 212, 213, 214, 215, 216, 217, 218, 219, 440, 441, 442, 443, 444, 445, 446, 447, 448, 458, 459, 460, 461, 462, 463, 464, 465, 466

E1 [ 0.497  0.751 -0.749] 1.1713287326792594
E2 [ 0.004  0.729 -1.236] 1.4349749126726918
E3 [-0.004  1.271 -1.236] 1.7728939618600994
E4 [ 0.497  0.232 -0.74 ] 0.9211042286299634
E5 [0.497 0.751 0.251] 0.9348855544931689
E6 [ 0.004  0.729 -0.236] 0.766259094562668
E7 [-0.004  1.271 -0.236] 1.2927308304515677
E8 [0.497 0.232 0.26 ] 0.6069868202852513
E9 [ 0.497  1.751 -0.749] 1.968250746221121
E10 [ 0.004  1.729 -1.236] 2.125359499002463
E11 [-0.004  2.271 -1.236] 2.585566282267774
E12 [ 0.497  1.232 -0.74 ] 1.5206686029506888
E13 [0.497 1.751 0.251] 1.837392445831864
E14 [ 0.004  1.729 -0.236] 1.745036675832345
E15 [-0.004  2.271 -0.236] 2.2832330148278777
E16 [0.497 1.232 0.26 ] 1.353673889827236
E17 [ 1.497  0.751 -0.749] 1.8346691799885886
E18 [ 1.004  0.729 -1.236] 1.7513289239888663
E19 [ 0.996  1.271 -1.236] 2.0335075608416116
E20 [ 1.497  0.232 -0.74 ] 1.685951660042482
E21 [1.497 0.751 0.251] 1.6935202980773512
E22 [ 1.004  0.729 -0.236] 1.2629936658590177
E23 [ 0.996  1.271 -

[2, 3, 4, 8, 10, 11, 15, 18, 19, 20, 24, 26, 27, 31, 34, 35, 36, 40, 42, 43, 47, 49, 53, 55, 56, 60, 62, 63, 67, 69]
E1 [ 0.497  0.751 -0.749] 1.1713287326792594
E2 [0.497 0.751 0.251] 0.9348855544931689
E3 [ 0.004  0.729 -0.236] 0.766259094562668
E4 [-0.004  1.271 -0.236] 1.2927308304515677
E5 [ 0.497  1.751 -0.749] 1.968250746221121
E6 [ 0.497  1.232 -0.74 ] 1.5206686029506888
E7 [0.497 1.751 0.251] 1.837392445831864
E8 [ 0.004  1.729 -0.236] 1.745036675832345
E9 [0.497 1.232 0.26 ] 1.353673889827236
E10 [ 1.497  0.751 -0.749] 1.8346691799885886
E11 [1.497 0.751 0.251] 1.6935202980773512
E12 [ 1.004  0.729 -0.236] 1.2629936658590177
E13 [ 0.996  1.271 -0.236] 1.6319169709271364
E14 [ 1.497  1.751 -0.749] 2.422397779061069
E15 [ 1.497  1.232 -0.74 ] 2.075194689661671
E16 [1.497 1.751 0.251] 2.317328418675264
E17 [ 1.004  1.729 -0.236] 2.013244396490401
E18 [1.497 1.232 0.26 ] 1.9561270408641664
E19 [ 2.497  0.751 -0.749] 2.712934020576247
E20 [2.497 0.751 0.251] 2.6195440442947318
E21