In [1]:
import os
import networkx as nx
import numpy as np
import re
import glob

####### Global options #######
import configuration

pi = np.pi

vname_dict = {'V':1,'Er':2,'Ti':3,'Ce':4,'S':5,
			  'H':6,'He':7,'Li':8,'Be':9,'B':10,
			  'C':11,'N':12,'O':13,'F':14,'Ne':15,
			  'Na':16,'Mg':17,'Al':18,'Si':19,'P':20 ,
			  'Cl':21,'Ar':22,'K':23,'Ca':24,'Sc':24,
			  'Cr':26,'Mn':27,'Fe':28,'Co':29,'Ni':30}

metal_elements = ['Ac','Ag','Al','Am','Au','Ba','Be','Bi',
				  'Bk','Ca','Cd','Ce','Cf','Cm','Co','Cr',
				  'Cs','Cu','Dy','Er','Es','Eu','Fe','Fm',
				  'Ga','Gd','Hf','Hg','Ho','In','Ir',
				  'K','La','Li','Lr','Lu','Md','Mg','Mn',
				  'Mo','Na','Nb','Nd','Ni','No','Np','Os',
				  'Pa','Pb','Pd','Pm','Pr','Pt','Pu','Ra',
				  'Rb','Re','Rh','Ru','Sc','Sm','Sn','Sr',
				  'Ta','Tb','Tc','Th','Ti','Tl','Tm','U',
				  'V','W','Y','Yb','Zn','Zr']


####### Global options #######
IGNORE_ALL_ERRORS = configuration.IGNORE_ALL_ERRORS
#PRINT = configuration.PRINT
PRINT =True
CONNECTION_SITE_BOND_LENGTH = configuration.CONNECTION_SITE_BOND_LENGTH
WRITE_CHECK_FILES = configuration.WRITE_CHECK_FILES
WRITE_CIF = configuration.WRITE_CIF
ALL_NODE_COMBINATIONS = configuration.ALL_NODE_COMBINATIONS
USER_SPECIFIED_NODE_ASSIGNMENT = configuration.USER_SPECIFIED_NODE_ASSIGNMENT
COMBINATORIAL_EDGE_ASSIGNMENT = configuration.COMBINATORIAL_EDGE_ASSIGNMENT
#CHARGES = configuration.CHARGES
CHARGES = False
SCALING_ITERATIONS = configuration.SCALING_ITERATIONS
SYMMETRY_TOL = configuration.SYMMETRY_TOL
BOND_TOL = configuration.BOND_TOL
ORIENTATION_DEPENDENT_NODES = configuration.ORIENTATION_DEPENDENT_NODES
PLACE_EDGES_BETWEEN_CONNECTION_POINTS = configuration.PLACE_EDGES_BETWEEN_CONNECTION_POINTS
RECORD_CALLBACK = configuration.RECORD_CALLBACK
OUTPUT_SCALING_DATA = configuration.OUTPUT_SCALING_DATA
FIX_UC = configuration.FIX_UC
MIN_CELL_LENGTH = configuration.MIN_CELL_LENGTH
OPT_METHOD = configuration.OPT_METHOD
PRE_SCALE = configuration.PRE_SCALE
SINGLE_METAL_MOFS_ONLY = configuration.SINGLE_METAL_MOFS_ONLY
MOFS_ONLY = configuration.MOFS_ONLY
MERGE_CATENATED_NETS = configuration.MERGE_CATENATED_NETS
RUN_PARALLEL = configuration.RUN_PARALLEL
REMOVE_DUMMY_ATOMS = configuration.REMOVE_DUMMY_ATOMS

from ciftemplate2graph import ct2g
from vertex_edge_assign import vertex_assign, assign_node_vecs2edges
from cycle_cocyle import cycle_cocyle, Bstar_alpha
from bbcif_properties import cncalc, bbelems
from SBU_geometry import SBU_coords
from scale import scale
from scaled_embedding2coords import omega2coords
from place_bbs import scaled_node_and_edge_vectors, place_nodes_tetra, place_edges,place_nodes_tri
from remove_net_charge import fix_charges
from remove_dummy_atoms import remove_Fr
from adjust_edges import adjust_edges
from write_cifs import write_check_cif, write_cif_nobond, bond_connected_components, distance_search_bond, fix_bond_sym, merge_catenated_cifs
from scale_animation import scaling_callback_animation, write_scaling_callback_animation, animate_objective_minimization
import itertools
from random import choice
from ditopic import *
from supercell import Carte_points_generator
from output import temp_xyz,tempgro
from terminations import *

In [2]:
template = 'spn.cif'
supercell= np.array([0,0,0])
box_bound= supercell+1
nodes_saturation = 6
linker_saturation =3

templates_dir = 'template_database'
#template = 'rna.cif'
nodes_dir = 'nodes'
edges_dir = 'edges'


In [None]:

PLACE_EDGES_BETWEEN_CONNECTION_POINTS = True
PRINT=False
print()
print('=========================================================================================================')
print('template :',template)                                          
print('=========================================================================================================')
print()
	
cat_count = 0
for net in ct2g(template,templates_dir):

		cat_count += 1
		TG, start, unit_cell, TVT, TET, TNAME, a, b, c, ang_alpha, ang_beta, ang_gamma, max_le, catenation = net

		TVT = sorted(TVT, key=lambda x:x[0], reverse=True) # sort node with connected degree, the first one is the highest(full)-coordinated node
		TET = sorted(TET, reverse=True) #sort node_pair by the node_index
		#get node cif information from node dir
		
		node_cns = [(cncalc(node, nodes_dir), node) for node in os.listdir(nodes_dir)]

		print('Number of vertices = ', len(TG.nodes()))
		print('Number of edges = ', len(TG.edges()))
		print()

		edge_counts = dict((data['type'],0) for e0,e1,data in TG.edges(data=True))
		for e0,e1,data in TG.edges(data=True):
			edge_counts[data['type']] += 1
		
		if PRINT:
	
			print('There are', len(TG.nodes()), 'vertices in the voltage graph:')
			print()
			v = 0
	
			for node in TG.nodes():
				v += 1
				print(v,':',node)
				node_dict = TG.nodes[node]
				print('type : ', node_dict['type'])
				print('cartesian coords : ', node_dict['ccoords'])
				print('fractional coords : ', node_dict['fcoords'])
				#print('degree : ', node_dict['cn'][0])
				print()
	
			print('There are', len(TG.edges()), 'edges in the voltage graph:')
			print()
	
			for edge in TG.edges(data=True,keys=True):
				edge_dict = edge[3]
				ind = edge[2]
				print(ind,':',edge[0],edge[1])
				print('length : ',edge_dict['length'])
				print('type : ',edge_dict['type'])
				print('label : ',edge_dict['label'])
				print('positive direction :',edge_dict['pd'])
				print('cartesian coords : ',edge_dict['ccoords'])
				print('fractional coords : ',edge_dict['fcoords'])
				print()
	
		vas = vertex_assign(nodes_dir,TG, TVT, node_cns, unit_cell, USER_SPECIFIED_NODE_ASSIGNMENT, SYMMETRY_TOL, ALL_NODE_COMBINATIONS)
		CB,CO = cycle_cocyle(TG)

		for va in vas:
			if len(va) == 0:
				print('At least one vertex does not have a building block with the correct number of connection sites.')
				print('Moving to the next template...')
				print()
				continue
	
		if len(CB) != (len(TG.edges()) - len(TG.nodes()) + 1):
			print('The cycle basis is incorrect.')
			print('The number of cycles in the cycle basis does not equal the rank of the cycle space.')
			print('Moving to the next template...')
			continue
		
		num_edges = len(TG.edges())
		Bstar, alpha = Bstar_alpha(CB,CO,TG,num_edges)

		if PRINT:
			print('B* (top) and alpha (bottom) for the barycentric embedding are:')
			print()
			for i in Bstar:
				print(i)
			print()
			for i in alpha:
				print(i)
			print()
	
		num_vertices = len(TG.nodes())
	
		if COMBINATORIAL_EDGE_ASSIGNMENT:
			eas = list(itertools.product([e for e in os.listdir('edges')], repeat = len(TET)))
		else:
			edge_files = sorted([e for e in os.listdir('edges')])
			eas = []
			i = 0
			while len(eas) < len(TET):
				eas.append(edge_files[i])
				i += 1
				if i == len(edge_files):
					i = 0
			eas = [eas]
	
		g = 0

		for va in vas:
			#check if assigned node has metal element 
			node_elems = [bbelems(i[1], nodes_dir) for i in va]
			metals = [[i for i in j if i in metal_elements] for j in node_elems]
			metals = list(set([i for j in metals for i in j]))
			#set node cif files as vertex assignment
			v_set0 = [('v' + str(vname_dict[re.sub('[0-9]','',i[0])]), i[1]) for i in va]
			v_set1 = sorted(list(set(v_set0)), key=lambda x: x[0])
			v_set = [v[0] + '-' + v[1] for v in v_set1]
	
			print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
			print('vertex assignment : ',v_set)
			print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
			print()

			if SINGLE_METAL_MOFS_ONLY and len(metals) != 1:
				print(v_set, 'contains no metals or multiple metal elements, no cif will be written')
				print()
				continue

			if MOFS_ONLY and len(metals) < 1:
				print(v_set, 'contains no metals, no cif will be written')
				print()
				continue
			
			# add cifname to TG.nodes
			for v in va:
				for n in TG.nodes(data=True):
					if v[0] == n[0]:
						n[1]['cifname'] = v[1]
			
			for ea in eas:
	
				g += 1
	
				print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
				print('edge assignment : ',ea)
				print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
				print()
				
				type_assign = dict((k,[]) for k in sorted(TET, reverse=True))
				for k,m in zip(TET,ea):
					type_assign[k] = m
				
				# add cifname to TG.edge
				for e in TG.edges(data=True):
					ty = e[2]['type']
					for k in type_assign:
						if ty == k or (ty[1],ty[0]) == k:
							e[2]['cifname'] = type_assign[k]

				num_possible_XX_bonds = 0
				for edge_type, cifname in zip(TET, ea):
					if cifname == 'ntn_edge.cif':
						factor = 1
					else:
						factor = 2
					edge_type_count = edge_counts[edge_type]
					num_possible_XX_bonds += factor * edge_type_count

				ea_dict = assign_node_vecs2edges(nodes_dir,TG, unit_cell, SYMMETRY_TOL, template)
				all_SBU_coords = SBU_coords(TG, ea_dict, CONNECTION_SITE_BOND_LENGTH)
				sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma, sc_covar, Bstar_inv, max_length, callbackresults, ncra, ncca, scaling_data = scale(all_SBU_coords,a,b,c,ang_alpha,ang_beta,ang_gamma,max_le,num_vertices,Bstar,alpha,num_edges,FIX_UC,SCALING_ITERATIONS,PRE_SCALE,MIN_CELL_LENGTH,OPT_METHOD)
		
				print('*******************************************')
				print('The scaled unit cell parameters are : ')
				print('*******************************************')
				print('a    :', np.round(sc_a, 5))
				print('b    :', np.round(sc_b, 5))
				print('c    :', np.round(sc_c, 5))
				print('alpha:', np.round(sc_alpha, 5))
				print('beta :', np.round(sc_beta, 5))
				print('gamma:', np.round(sc_gamma, 5))
				print()
	
				for sc, name in zip((sc_a, sc_b, sc_c), ('a', 'b', 'c')):
					cflag = False
					if sc == MIN_CELL_LENGTH:
						print('unit cell parameter', name, 'may have collapsed during scaling!')
						print('try re-running with', name, 'fixed or a larger MIN_CELL_LENGTH')
						print('no cif will be written')
						cflag = True
	
				if cflag:
					continue
	
				scaled_params = [sc_a,sc_b,sc_c,sc_alpha,sc_beta,sc_gamma]
			
				sc_Alpha = np.r_[alpha[0:num_edges-num_vertices+1,:], sc_covar]
				sc_omega_plus = np.dot(Bstar_inv, sc_Alpha)
			
				ax = sc_a
				ay = 0.0
				az = 0.0
				bx = sc_b * np.cos(sc_gamma * pi/180.0)
				by = sc_b * np.sin(sc_gamma * pi/180.0)
				bz = 0.0
				cx = sc_c * np.cos(sc_beta * pi/180.0)
				cy = (sc_c * sc_b * np.cos(sc_alpha * pi/180.0) - bx * cx) / by
				cz = (sc_c ** 2.0 - cx ** 2.0 - cy ** 2.0) ** 0.5
				sc_unit_cell = np.asarray([[ax,ay,az],[bx,by,bz],[cx,cy,cz]]).T
				
				scaled_coords = omega2coords(start, TG, sc_omega_plus, (sc_a,sc_b,sc_c,sc_alpha,sc_beta,sc_gamma), num_vertices,templates_dir, template, g, WRITE_CHECK_FILES)
				nvecs,evecs,node_placed_edges = scaled_node_and_edge_vectors(scaled_coords, sc_omega_plus, sc_unit_cell, ea_dict)
				
				placed_nodes, tri_node,frame_nbb_node,tri_node_name,node_bonds = place_nodes_tri(nvecs, nodes_dir,CHARGES, ORIENTATION_DEPENDENT_NODES)
				placed_edges, edge_bonds = place_edges(evecs, edges_dir,CHARGES, len(placed_nodes))
				#print(f"place_edges{place_edges}")
	
				if RECORD_CALLBACK:
	
					vnames = '_'.join([v.split('.')[0] for v in v_set])
	
					if len(ea) <= 5:
						enames = '_'.join([e[0:-4] for e in ea])
					else:
						enames = str(len(ea)) + '_edges'
	
					prefix = template[0:-4] + '_' +  vnames + '_' + enames
	
					frames = scaling_callback_animation(callbackresults, alpha, Bstar_inv, ncra, ncca, num_vertices, num_edges, TG, template, g, False)
					write_scaling_callback_animation(frames, prefix)
					animate_objective_minimization(callbackresults, prefix)
	
				if PLACE_EDGES_BETWEEN_CONNECTION_POINTS:
					placed_edges,cleaved_placed_edges,cleaved_placed_nodes,X_Opair = adjust_edges(placed_edges, placed_nodes, sc_unit_cell)
				
				# add classifination 
				#cleaved_placed_edges = placed_OXedges
				cleaved_placed_nodes = np.c_[cleaved_placed_nodes, np.array(['NODE' for i in range(len(cleaved_placed_nodes))])]
				cleaved_placed_edges = np.c_[cleaved_placed_edges, np.array(['EDGE' for i in range(len(cleaved_placed_edges))])]
				placed_nodes = np.c_[placed_nodes, np.array(['node' for i in range(len(placed_nodes))])]
				placed_edges = np.c_[placed_edges, np.array(['edge' for i in range(len(placed_edges))])]
				

				placed_all = list(placed_nodes) + list(placed_edges)
				bonds_all = node_bonds + edge_bonds
		
				if WRITE_CHECK_FILES:
					write_check_cif(template, placed_nodes, placed_edges, g, scaled_params, sc_unit_cell)
			
				if REMOVE_DUMMY_ATOMS:
					placed_all, bonds_all, nconnections = remove_Fr(placed_all,bonds_all)
				
				print('computing X-X bonds...')
				print()
				print('*******************************************')
				print('Bond formation : ')
				print('*******************************************')
				
				#fixed_bonds, nbcount, bond_check_passed = bond_connected_components(placed_all, bonds_all, sc_unit_cell, max_length, BOND_TOL, nconnections, num_possible_XX_bonds)
				#print('there were ', nbcount, ' X-X bonds formed')
				#bond_check_passed =False
				#if bond_check_passed:
				#	print('bond check passed')
				#	bond_check_code = ''
				#else:
				#	print('bond check failed, attempting distance search bonding...')
				#	fixed_bonds, nbcount = distance_search_bond(placed_all, bonds_all, sc_unit_cell, 2.5)
				#	bond_check_code = '_BOND_CHECK_FAILED'
				#	print('there were', nbcount, 'X-X bonds formed')
				#print()
		
				if CHARGES:
					fc_placed_all, netcharge, onetcharge, rcb = fix_charges(placed_all)
				else:
					fc_placed_all = placed_all
			
				fc_placed_all = placed_all
				#fixed_bonds = fix_bond_sym(fixed_bonds, placed_all, sc_unit_cell)
	
				if CHARGES:
					print('*******************************************')
					print('Charge information :                       ')
					print('*******************************************')
					print('old net charge                  :', np.round(onetcharge, 5))
					print('rescaling magnitude             :', np.round(rcb, 5))
			
					remove_net = choice(range(len(fc_placed_all)))
					fc_placed_all[remove_net][4] -= np.round(netcharge, 4)
			
					print('new net charge (after rescaling):', np.sum([li[4] for li in fc_placed_all]))
					print()

				vnames = '_'.join([v.split('.')[0] for v in v_set])
				enames_list = [e[0:-4] for e in ea]
				enames_grouped = [list(edge_gr) for ind,edge_gr in itertools.groupby(enames_list)]
				enames_grouped = [(len(edge_gr), list(set(edge_gr))) for edge_gr in enames_grouped]
				enames_flat = [str(L) + '-' + '_'.join(names) for L,names in enames_grouped]
				enames = '_'.join(enames_flat)
				bond_check_code = 'nobond'
				if catenation:
					outcifname = template[0:-4] + '_' +  vnames + '_' + enames + bond_check_code + '_' + 'CAT' + str(cat_count) + '.cif'
				else:
					outcifname = template[0:-4] + '_' +  vnames + '_' + enames + bond_check_code + '.cif'
		
				if WRITE_CIF:
					print('writing cif...')
					print()
					if len(cifname) > 255:
						cifname = cifname[0:241]+'_truncated.cif'
					write_cif_nobond(fc_placed_all, scaled_params, sc_unit_cell, outcifname, CHARGES, wrap_coords=False)

if catenation and MERGE_CATENATED_NETS:
	
	print('merging catenated cifs...')
	cat_cifs = glob.glob('output_cifs/*_CAT*.cif')

	for comb in itertools.combinations(cat_cifs, cat_count):

		builds = [name[0:-9] for name in comb]

		print(set(builds))

		if len(set(builds)) == 1:
			pass
		else:
			continue

		merge_catenated_cifs(comb, CHARGES)

	#for cif in cat_cifs:
	#	os.remove(cif)

In [4]:
def limit_x(x):
    while x>0.5:
        x=x-1
    while x< -0.5:
        x = x+1
    return x

def centerize_edges_cc(target_edges_list,target_node_c,sc_unit_cell):
    edges_update=[]
    edges_update_append = edges_update.append
    for te_ccord in target_edges_list:
        te = te_ccord[:,1:4]- target_node_c
        te_fvec = np.dot(te,np.linalg.inv(sc_unit_cell))
        edge_c_fvec = np.mean(te_fvec,axis=0).tolist()
        cx,cy,cz = edge_c_fvec
        cx1 = limit_x(cx)
        cy1 = limit_x(cy)
        cz1 = limit_x(cz)
        differ = np.asarray([cx1,cy1,cz1])-np.asarray(edge_c_fvec)
        te_update = np.hstack((te_ccord[:,0:1],te+np.dot(differ,sc_unit_cell)+target_node_c,te_ccord[:,4:]))
        edges_update_append(te_update)
        #print(differ)
    return np.vstack((edges_update))

def centerize_edges_fc(target_edges_list,target_node_c_fc):
    edges_update=[]
    edges_update_append = edges_update.append
    for te_fcord in target_edges_list:
        te_fvec = te_fcord[:,1:4]- target_node_c_fc
        edge_c_fvec = np.mean(te_fvec,axis=0).tolist()
        cx,cy,cz = edge_c_fvec
        cx1 = limit_x(cx)
        cy1 = limit_x(cy)
        cz1 = limit_x(cz)
        differ = np.asarray([cx1,cy1,cz1])-np.asarray(edge_c_fvec)
        te_update = np.hstack((te_fcord[:,0:1],te_fvec+differ+target_node_c_fc,te_fcord[:,4:]))
        edges_update_append(te_update)
        #print(differ)
    return np.vstack((edges_update))

def merge_porphyrin_node_edge(TG,porphyrin_node_name,placed_nodes_arr,placed_edges_arr,sc_unit_cell):
    porphyrins=[]
    porphyrins_append = porphyrins.append
    edges_dict_list=list(TG.edges(data=True,keys=True))
    for pcnode in porphyrin_node_name:
        pcnode_idx=TG.nodes[pcnode]['index']
        linked_porphyrin = []
        linked_porphyrin_append= linked_porphyrin.append
        for i in range(len(edges_dict_list)):
            e_dict=edges_dict_list[i]
            check = (pcnode in e_dict[0:2])
            if check:
                linked_porphyrin_append(e_dict[2][0])
        porphyrins_append((pcnode,pcnode_idx,linked_porphyrin))

    porphyrin_edges = []
    porphyrin_edges_append = porphyrin_edges.append
    for i in range(len(porphyrins)):
        pcnode = porphyrins[i]
        node_idx = [pcnode[1]]
        linked_p_idx = pcnode[2]
        target_nodes = fetch_node_withidx(placed_nodes_arr,node_idx)
        target_nodes_c = np.mean(target_nodes[:,1:4],axis=0)
        target_nodes_fc = np.dot(target_nodes_c,np.linalg.inv(sc_unit_cell))
        moded_trans_fc = np.mod(target_nodes_fc,1) - target_nodes_fc
        target_nodes[:,1:4] = target_nodes[:,1:4] + np.dot(moded_trans_fc,sc_unit_cell)

        target_edge_list = fetch_edge_withidx_sep(placed_edges_arr,linked_p_idx)
        
        target_nodes_c = np.mean(target_nodes[:,1:4],axis=0)
        target_edges = centerize_edges_cc(target_edge_list,target_nodes_c,sc_unit_cell)
    
        porphyrin_edge = np.vstack((target_nodes,target_edges))
        porphyrin_edge[:,6]=[-1*i-2]*len(porphyrin_edge)
        porphyrin_edge[:,7]=['EDGE']*len(porphyrin_edge)
        #fvec=np.dot(porphyrin_edge[:,1:4],np.linalg.inv(sc_unit_cell))
        #fvec=np.mod(fvec,1)
        #porphyrin_edge[:,1:4] = np.dot(fvec,sc_unit_cell)
        porphyrin_edges_append(porphyrin_edge)

    porphyrin_edges_ccoords = np.vstack(porphyrin_edges)

    return porphyrin_edges_ccoords

def merge_multitopic_node_edge_fc(TG,multi_node_name,placed_nodes_arr_fc,placed_edges_arr_fc):
    multitopics=[]
    edges_dict_list=list(TG.edges(data=True,keys=True))
    for c_node in multi_node_name:
        c_node_idx=TG.nodes[c_node]['index']
        linked_multitopic = []
        for i in range(len(edges_dict_list)):
            e_dict=edges_dict_list[i]
            check = (c_node in e_dict[0:2])
            if check:
                linked_multitopic.append(e_dict[2][0])
        multitopics.append((c_node,c_node_idx,linked_multitopic))

    multitopic_edges = []
    for i in range(len(multitopics)):
        c_node = multitopics[i]
        node_idx = [c_node[1]]
        linked_edge_idx = c_node[2]
        target_nodes = fetch_node_withidx(placed_nodes_arr_fc,node_idx)
        target_nodes_c_fc = np.mean(target_nodes[:,1:4],axis=0)
        moded_trans_fc = np.mod(target_nodes_c_fc,1) - target_nodes_c_fc
        target_nodes[:,1:4] = target_nodes[:,1:4] + moded_trans_fc

        target_edge_list = fetch_edge_withidx_sep(placed_edges_arr_fc,linked_edge_idx)
        target_nodes_c_fc = np.mean(target_nodes[:,1:4],axis=0)
        target_edges = centerize_edges_fc(target_edge_list,target_nodes_c_fc)
    
        multitopic_edge = np.vstack((target_nodes,target_edges))
        multitopic_edge[:,6]=[-1*i-2]*len(multitopic_edge)
        multitopic_edge[:,7]=['EDGE']*len(multitopic_edge)
        multitopic_edges.append(multitopic_edge)

    multitopic_edges_fcoords = np.vstack(multitopic_edges)

    return multitopic_edges_fcoords

In [5]:
def find_new_node_beginning(arr):
    #find min x,y,z see if [x,y,z] in
    #if not in then find minx then find miny then find minz 
    min_xyz=np.min(arr,axis=0)
    if any(np.array_equal(row, min_xyz) for row in arr):
        return min_xyz
    else:
        min_x = np.min(arr[:,0])
        min_x_rows = arr[arr[:,0]==min_x]
        min_xyz = np.min(min_x_rows,axis=0)
        if  any(np.array_equal(row, min_xyz) for row in arr):
            return min_xyz
        else:
            min_y = np.min(min_x_rows[:,1])
            min_xy_rows = min_x_rows[min_x_rows[:,1]==min_y]
            min_xyz = np.min(min_xy_rows,axis=0)
            return min_xyz



In [6]:

frame_node_name=[i for i in list(TG.nodes()) if i not in tri_node_name]
frame_node_fc=np.asarray([TG.nodes[fn]['fcoords']for fn in frame_node_name])

new_beginning_fc = find_new_node_beginning(frame_node_fc)

placed_nodes_arr,nodes_id=placed_arr(cleaved_placed_nodes)
placed_edges_arr,edges_id=placed_arr(cleaved_placed_edges)

placed_nodes_fc = np.hstack((placed_nodes_arr[:,0:1],np.dot(placed_nodes_arr[:,1:4],np.linalg.inv(sc_unit_cell))-new_beginning_fc,placed_nodes_arr[:,4:]))
placed_edges_fc = np.hstack((placed_edges_arr[:,0:1],np.dot(placed_edges_arr[:,1:4],np.linalg.inv(sc_unit_cell))-new_beginning_fc,placed_edges_arr[:,4:]))

frame_node_ccoords= np.c_[frame_nbb_node,['NODE']*len(frame_nbb_node)]
placed_frame_node,_ = placed_arr(frame_node_ccoords)
placed_frame_node_fc = np.hstack((placed_frame_node[:,0:1],np.dot(placed_frame_node[:,1:4],np.linalg.inv(sc_unit_cell))-new_beginning_fc,placed_frame_node[:,4:]))

tritopic_edges_fcoords = merge_multitopic_node_edge_fc(TG,tri_node_name,placed_nodes_fc,placed_edges_fc)

target_all_fc = np.vstack((placed_frame_node_fc,tritopic_edges_fcoords))
#target_all_fc = np.vstack((placed_nodes_fc,tritopic_edges_fcoords)) # the reason for use above version node is because we need xoo in node for terminations adding
supercell_Carte = Carte_points_generator(supercell)


In [7]:
def no_overlap_atoms_indices(arr):
    arr=arr.astype('float')
    arr=np.round(arr,4)
    _,indices=np.unique(arr,axis=0,return_index=True)
    return indices

In [8]:
def outxyz(output,all_array):
    atoms_number = len(all_array)
    newxyz = []
    with open(output, "w") as fp:
        newxyz.append(str(atoms_number) + "\n" + "generated by MOF_BUILD" + "\n")
        for i in range(atoms_number):
            row = all_array[i]
            value_label = row[0]   # atom_label
            value_label = re.sub(r"\d", "", value_label)
            value_x = float(row[1])  # x
            value_y = float(row[2])  # y
            value_z = float(row[3])  # z
            formatted_line = "%-5s%8.3f%8.3f%8.3f" % (
                value_label,
                value_x,
                value_y,
                value_z,
            )
            newxyz.append(formatted_line + "\n")
        fp.writelines(newxyz)


In [None]:
from isolated_node_cleaner import *
linker_topics = 3
scalar = 0.00
cutx,cuty,cutz = True, True, True
boundary_scalar = 0.00
term_file = 'methyl.pdb'


def cluster_supercell(supercell_Carte,linker_topics,target_all_fc,box_bound,scalar,cutx,cuty,cutz):
    s_fvec_all_loose,row_diff_idx_loose = supercell_nodeedge_fc_loose_check(supercell_Carte,target_all_fc,box_bound,scalar,cutx,cuty,cutz) 
    #print("row_diff_idx_loose",row_diff_idx_loose)

    safe_res_fc_loose,extra_res_fc_loose,boundary_node_res_loose = filt_boundary_res_loose_check(s_fvec_all_loose,row_diff_idx_loose,box_bound,scalar,cutx,cuty,cutz,boundary_scalar)
    bare_nodeedge_fc_loose=np.vstack((safe_res_fc_loose,extra_res_fc_loose))

    connected_nodeedge_fc_loose, boundary_connected_nodes_res,eG = filter_connected_node_loose(bare_nodeedge_fc_loose,boundary_node_res_loose,linker_topics)

    return connected_nodeedge_fc_loose, boundary_connected_nodes_res,eG


connected_nodeedge_fc_loose, boundary_connected_nodes_res,eG=cluster_supercell(supercell_Carte,linker_topics,target_all_fc,box_bound,scalar,cutx,cuty,cutz)



def terminate_nodes(term_file,boundary_connected_nodes_res,connected_nodeedge_fc_loose,sc_unit_cell,box_bound):
    ex_node_cxo_cc_loose = exposed_Xs_Os_boundary_node(boundary_connected_nodes_res,connected_nodeedge_fc_loose,sc_unit_cell,box_bound)
    terms_loose = add_terminations(term_file,ex_node_cxo_cc_loose)
    terms_cc_loose = np.vstack((terms_loose))
    return terms_cc_loose

terms_cc_loose = terminate_nodes(term_file,boundary_connected_nodes_res,connected_nodeedge_fc_loose,sc_unit_cell,box_bound)



node_edge_term_cc_loose = np.vstack((connected_nodeedge_fc_loose,terms_cc_loose))

tempgro('30te.gro',connected_nodeedge_fc_loose)
tempgro('31te.gro',node_edge_term_cc_loose)
temp_xyz("31term_supercell.xyz",node_edge_term_cc_loose)
import py3Dmol as p3d
def viewgro(groname):
         viewer = p3d.view(width=600, height=600)
         viewer.addModelsAsFrames(open(groname, "r").read(), "gro", {"keepH": True})
         viewer.setStyle({"stick": {}, "sphere": {"scale": 0.25}})
         viewer.zoomTo()
         viewer.show()
viewgro("30te.gro")
viewgro("31te.gro")




In [10]:
def exposed_Xs_Os_mainfrag_node(unsaturated_main_frag_nodes,eG,node_fc,edge_fc,sc_unit_cell):
    '''look for two nearest Oxys for every exposed(unsaturated X) in unsaturated main frag nodes'''
    ex_node_cxo_cc=[]
    ex_node_cxo_cc_append=ex_node_cxo_cc.append

    for uN in unsaturated_main_frag_nodes:
        i = uN[0] #res_number
        degree_of_nodes=nx.degree(eG,i)
        neighbor_edges = list(nx.neighbors(eG,i))
        
        print(f'degree of node {i} is {degree_of_nodes}, neighbor eadges are {neighbor_edges}')
        node=node_fc[node_fc[:,5]==i]
        node_center_fc = np.mean(node[:,-3:],axis=0)
        Xs_fc = np.asarray([k[-3:] for k in node if re.sub(r'\d','',k[2]) == 'X'])
        Os_fc = np.asarray([g[-3:] for g in node if re.sub(r'\d','',g[2]) == 'O'])
        Os_cc = np.dot(Os_fc,sc_unit_cell)
        #Xs_fc = np.dot(Xs,np.linalg.inv(sc_unit_cell))
        Xs_fc = Xs_fc.astype(float)
        #exposed_Xs_fc=[x for x in Xs_fc if not check_nodex_inbox(x.round(4),box_bound)]
        #if len(exposed_Xs_fc)>0:
        exposed_Xs_cc=np.dot(Xs_fc,sc_unit_cell) 
        #print( neighbor_edges)
        neighbor_edges_list = [int(re.sub('\D','',e)) for e in neighbor_edges]
        neighbor_edge_fc=np.vstack(([edge_fc[edge_fc[:,5]==ei]for ei in neighbor_edges_list]))
        neighbor_edgex_fvec = filt_edgex_fvec(neighbor_edge_fc)
        edgex_cvec_array = np.dot(neighbor_edgex_fvec[:,-3:],sc_unit_cell)
        for x in exposed_Xs_cc:
                if check_overlapX(edgex_cvec_array,x): #if x in node and x in edge overlap then this site is occupied and continue
                    continue
                else: #this x in node is unsaturated site
                    cdist_xos = []
                    cdist_xos_sort = []
                    cdist_xos_append=cdist_xos.append
                    cdist_xos_sort_append=cdist_xos_sort.append
                    for j in range(len(Os_cc)):
                        cvec_o= Os_cc[j]
                        #cvec_xo = np.asarray(cvec_o)-np.asarray(x) 	
                        cvec_xo = cvec_o-x
                        cdist_xo = np.linalg.norm(cvec_xo)
                        cdist_xos_append(cdist_xo)
                        cdist_xos_sort_append(cdist_xo)
                    cdist_xos_sort.sort()
                    cdist_xos_sort3rd=cdist_xos_sort[2]
                    node_ovecs_idx=[index for index,value in enumerate(cdist_xos) if value < cdist_xos_sort3rd]
                    #print(i,n,len(node_ovecs_idx))
                    node_ovecs_cc=[Os_cc[o] for o in node_ovecs_idx]
        
                    ex_node_cxo_cc_append((node_center_fc,len(exposed_Xs_cc),'exposed_X',x,'node_Opair',node_ovecs_cc,node_ovecs_idx))
        
                    #print(f"center{node_center},Xs{len(exposed_Xs_fc)},'\n'{exposed_Xs_cc}")
            #print(res_s.shape)
    return ex_node_cxo_cc

In [11]:

def filt_nodex_fvec(array):
    nodex_fvec=np.asarray([i for i in array if i[4]=='NODE' and re.sub('[0-9]','',i[2]) == 'X'])
    return nodex_fvec


def check_overlapX2(edgex_cvec,nodex_cvec):
    dist_arr=edgex_cvec-nodex_cvec
    for i in dist_arr:
        if np.linalg.norm(i) <2:
            return True
    return False


def filt_outside_edgex(Xs_fc,edge_center_fc,linker_topics):
    lcs_list = []
    lcs = []
    for i in range(len(Xs_fc)):
        lc = np.linalg.norm(Xs_fc[i]-edge_center_fc)
        lcs_list.append((i,lc))
        lcs.append(lc)
    lcs.sort(reverse=True)
    outside_edgex_indices=[i[0] for i in lcs_list if i[1]>lcs[linker_topics]]
    return lcs,outside_edgex_indices

def exposed_Os_mainfrag_edge(unsaturated_main_frag_edges,eG,node_fc,edge_fc,sc_unit_cell):
    '''look for two nearest Oxys for every exposed(unsaturated X) in unsaturated main frag edges and then add 1A away H as termination'''
    ex_edge_xh=[]
    ex_edge_xh_append=ex_edge_xh.append

    for uE in unsaturated_main_frag_edges:
        i = uE[0] #res_number
        degree_of_edges=nx.degree(eG,i)
        neighbor_nodes = list(nx.neighbors(eG,i))
        print(f'degree of edge {i} is {degree_of_edges}, neighbor nodes are {neighbor_nodes}')
        edge=edge_fc[edge_fc[:,5]==int(i[1:])]
        edge_center_fc = np.mean(edge[:,-3:],axis=0)
        Xs_fc = np.asarray([k[-3:] for k in edge if re.sub(r'\d','',k[2]) == 'X'])
        Os_fc = np.asarray([g[-3:] for g in edge if re.sub(r'\d','',g[2]) == 'O'])
        Os_fc=Os_fc.astype('float')
        unique_Os_fc,Os_indices=np.unique(np.round(Os_fc,6),axis=0,return_index=True)
        Os_cc = np.dot(unique_Os_fc,sc_unit_cell)
        Xs_fc = Xs_fc.astype(float)
        _,outside_edgex_indices=filt_outside_edgex(Xs_fc,edge_center_fc,linker_topics)
        exposed_Xs_cc=np.dot(Xs_fc[outside_edgex_indices],sc_unit_cell) 
        neighbor_nodes_list = [e for e in neighbor_nodes]
        neighbor_nodes_fc=np.vstack(([node_fc[node_fc[:,5]==ei]for ei in neighbor_nodes_list]))
        neighbor_nodex_fvec = filt_nodex_fvec(neighbor_nodes_fc)
        nodex_cvec_array = np.dot(neighbor_nodex_fvec[:,-3:],sc_unit_cell)
        h_ccs=[]
        h_ccs_append=h_ccs.append
        for x in exposed_Xs_cc:
                if check_overlapX2(nodex_cvec_array,x): #if x in node and x in edge overlap then this site is occupied and continue
                    continue
                else: #this x in node is unsaturated site
                    cdist_xos = []
                    cdist_xos_sort = []
                    cdist_xos_append=cdist_xos.append
                    cdist_xos_sort_append=cdist_xos_sort.append
                    for j in range(len(Os_cc)):
                        cvec_o= Os_cc[j]
                        #cvec_xo = np.asarray(cvec_o)-np.asarray(x) 	
                        cvec_xo = cvec_o-x
                        cdist_xo = np.linalg.norm(cvec_xo)
                        cdist_xos_append(cdist_xo)
                        cdist_xos_sort_append(cdist_xo)
                    cdist_xos_sort.sort()
                    cdist_xos_sort3rd=cdist_xos_sort[2] #get the 3rd farest distance from O atoms to x
                    edge_ovecs_idx=[index for index,value in enumerate(cdist_xos) if value < cdist_xos_sort3rd]
                    #print(x,edge_ovecs_idx,cdist_xos_sort3rd,cdist_xos_sort[:4])
                    edge_ovecs_cc=[Os_cc[o] for o in edge_ovecs_idx]
                    
                    #add extra H in edge as termination
                    for eo_cc in edge_ovecs_cc:
                        h_cc=eo_cc+(eo_cc-x)/np.linalg.norm(eo_cc-x)
                        h_ccs_append(h_cc)

        ex_edge_xh_append((edge_center_fc,uE[0],'exposed_X',x,'node_hpair',h_ccs))
    return ex_edge_xh



def terminate_exedge(ex_edge_xh,unsaturated_edges,main_frag_edges_cc):
    unsaturated_edges_idx = [int(ue[0][1:]) for ue in unsaturated_edges]
    sa_edges=main_frag_edges_cc[~np.isin(main_frag_edges_cc[:, 5], unsaturated_edges_idx)] 
    usa_edges = main_frag_edges_cc[np.isin(main_frag_edges_cc[:, 5], unsaturated_edges_idx)] 

    t_usa_edges=[]
    t_usa_edges_append =t_usa_edges.append
    for i_exedge in range(len(ex_edge_xh)):
        exedge=usa_edges[usa_edges[:,5]==int(ex_edge_xh[i_exedge][1][1:])]
        hs_cc = ex_edge_xh[i_exedge][-1]
        t_h_lines = []
        t_h_lines_append = t_h_lines.append
        for i_h in range(len(hs_cc)):
            h = hs_cc[i_h]
            hx,hy,hz = h
            H_name='H'+str(i_h+1+len(exedge))
            t_h=np.hstack((H_name,0.00,'H',exedge[0,3],exedge[0,4],exedge[0,5],hx,hy,hz))
            t_h=t_h.astype('O')
            t_h_lines_append(t_h)

        t_usa_edges_append(np.vstack((exedge,np.asarray(t_h_lines))))
    t_usa_edges_arr=np.vstack(t_usa_edges)
    t_usa_edges_arr[:,4]='HEDGE'


    t_edges=np.vstack((sa_edges,t_usa_edges_arr))
    return t_edges

In [None]:
renode1_fcarr=reindex_frag_array(bare_nodeedge_fc_loose,'NODE')
reedge1_fcarr=reindex_frag_array(bare_nodeedge_fc_loose,'EDGE')

#
## make defective node and edge 
remove_node_list = [1,2]#[1]
remove_edge_list = []#[1,2,3,4,5]
defective_node_fcarr = np.vstack(([i for i in renode1_fcarr if i[5] not in remove_node_list]))
defective_edge_fcarr = np.vstack(([i for i in reedge1_fcarr if i[5] not in remove_edge_list]))

renode_fcarr = reindex_frag_array(defective_node_fcarr,'NODE')
reedge_fcarr = reindex_frag_array(defective_edge_fcarr,'EDGE')
#

edgefc_centers = get_frag_centers_fc(reedge_fcarr)
nodefc_centers = get_frag_centers_fc(renode_fcarr)

eG = calculate_eG_net(edgefc_centers,nodefc_centers,linker_topics)


eG_subparts=[len(c) for c in sorted(nx.connected_components(eG), key=len, reverse=True)]
if len(eG_subparts)>1:
    print(f'this MOF has {len(eG_subparts)} seperated fragments: {eG_subparts}')
else:
    print(f'this MOF has {len(eG_subparts)} fragment')

unsaturated_nodes = [(n,d) for n, d in eG.degree() if d <nodes_saturation and isinstance(n,int)]
unsaturated_edges = [(n,d) for n, d in eG.degree() if d <linker_saturation and isinstance(n,str)]
if len(unsaturated_edges) > 0 :
    print(f"UNsaturated edges(linkers) exist, need linker_termination <= {len(unsaturated_edges)}")
else:
    print(f"only saturated edges(linkers) exist")

if len(unsaturated_nodes) > 0 :
    print(f"UNsaturated nodes exist, <={len(unsaturated_nodes)} nodes need node_termination")
else:
    print(f"only saturated nodes exist")


frags=[(len(c),c) for c in sorted(nx.connected_components(eG), key=len, reverse=True)]
main_frag=list(sorted(nx.connected_components(eG), key=len, reverse=True)[0])
main_frag_nodes = [i for i in main_frag if isinstance(i,int)]
main_frag_edges = [i for i in main_frag if re.sub('[0-9]','',str(i)) == 'E']
unsaturated_main_frag_nodes = [i for i in unsaturated_nodes if i[0] in main_frag_nodes]
unsaturated_main_frag_edges = [i for i in unsaturated_edges if i[0] in main_frag_edges]

main_frag_edges_fc = np.vstack(([reedge_fcarr[reedge_fcarr[:,5]==int(ei[1:])]for ei in main_frag_edges]))
main_frag_nodes_fc = np.vstack(([renode_fcarr[renode_fcarr[:,5]==ni]for ni in main_frag_nodes]))

main_frag_nodes_cc = np.hstack((main_frag_nodes_fc[:,:-3],np.dot(main_frag_nodes_fc[:,-3:],sc_unit_cell)))
main_frag_edges_cc = np.hstack((main_frag_edges_fc[:,:-3],np.dot(main_frag_edges_fc[:,-3:],sc_unit_cell)))

ex_node_cxo_cc_loose = exposed_Xs_Os_mainfrag_node(unsaturated_main_frag_nodes,eG,renode_fcarr,reedge_fcarr,sc_unit_cell)


In [None]:
term_file = 'methyl.pdb'
terms_loose = add_terminations(term_file,ex_node_cxo_cc_loose)
terms_cc_loose = np.vstack((terms_loose))




#add H term to exposed edge and change edge name to HEDGE
ex_edge_xh = exposed_Os_mainfrag_edge(unsaturated_main_frag_edges,eG,renode_fcarr,reedge_fcarr,sc_unit_cell)
t_edges=terminate_exedge(ex_edge_xh,unsaturated_edges,main_frag_edges_cc)
ct_edges=t_edges[no_overlap_atoms_indices(t_edges[:,-3:])]

node_edge_term_cc_loose = np.vstack((main_frag_nodes_cc,t_edges,terms_cc_loose))


tempgro('311te.gro',node_edge_term_cc_loose)
temp_xyz("311term_supercell.xyz",node_edge_term_cc_loose)

import py3Dmol as p3d
def viewgro(groname):
         viewer = p3d.view(width=600, height=600)
         viewer.addModelsAsFrames(open(groname, "r").read(), "gro", {"keepH": True})
         viewer.setStyle({"stick": {}, "sphere": {"scale": 0.25}})
         viewer.zoomTo()
         viewer.show()
#viewgro("301te.gro")
viewgro("311te.gro")

In [14]:
#Defective model: node/edge missing
    #delete nodes or edges
    #terminate nodes ROUND1
    # find main fragment
        # find unstaturated node uN1
            # find uN1 neighbors and extract X in neighbor edge(E+int)
                # filt exposed X sites in uN1
                    # add terminations

    #terminate edge ROUND2
        #find unsaturaed edge uE1
            #find uE1 neighbors and extract X in neighbor node 'int'
                # filt exposed X sites in uE1
                    # add terminations (-OH)

#Defective model: linker exchange
#   termination OO don't change, use X to set a range 
#   atoms in outX_range stay
#    then superimpose by X



In [None]:

output = 'Xs2.xyz'
placed_all=t_edges

atoms_number = len(placed_all)
newxyz = []
with open( output, "w") as fp:
    newxyz.append(str(atoms_number) + "\n" + "generated by MOF_BUILD" + "\n")
    for i in range(atoms_number):
        value_label = placed_all[i][0]   # atom_label
        value_label = re.sub('[0-9]','',value_label)
        value_x = float(placed_all[i][-3])  # x
        value_y = float(placed_all[i][-2])   # y
        value_z = float(placed_all[i][-1])   # z
        formatted_line = "%-5s%8.3f%8.3f%8.3f" % (
            value_label,
            value_x,
            value_y,
            value_z,
        )
        newxyz.append(formatted_line + "\n")
    fp.writelines(newxyz)

def viewxyz(xyzname):
         viewer = p3d.view(width=600, height=600)
         viewer.addModelsAsFrames(open(xyzname, "r").read(), "xyz", {"keepH": True})
         viewer.setStyle({"stick": {}, "sphere": {"scale": 0.25}})
         viewer.zoomTo()
         viewer.show()
#viewgro("301te.gro")
viewxyz(output)

In [None]:
def readpdb(filename):
        inputfile = str(filename)
        with open(inputfile, "r") as fp:
            content = fp.readlines()
            #linesnumber = len(content)
        data = []
        for line in content:
            line = line.strip()
            if len(line)>0: #skip blank line
                if line[0:6] == "ATOM" or line[0:6] == "HETATM":
                    value_atom = line[12:16].strip()  # atom_label
                    #resname
                    #value2 = 'MOL'  # res_name

                    value_x = float(line[30:38])  # x
                    value_y = float(line[38:46])  # y
                    value_z = float(line[46:54])  # z
                    value_charge = float(line[61:66]) 
                    value_note = line[67:80].strip() # atom_note
                    #resnumber
                    try:
                        value_res_num = int(line[22:26])
                    except ValueError:
                        value_res_num = 1 
                    data.append([value_atom,value_x,value_y,value_z,value_charge,value_note,value_res_num,'TERM'])
        return data
new_edge= 

In [139]:
def seperate_atoms_by_cos(arr,arr_center,arr_x,lc_max):
    indices=[]
    for i in range(len(arr)):
        for j in range(len(arr_x)):
            vec_ac=arr[i]-arr_center
            vec_xc=arr_x[j]-arr_center
            if np.linalg.norm(vec_ac)>lc_max and np.dot(vec_ac,vec_xc) > np.linalg.norm(vec_xc):
                    #print(np.dot(vec_ac,vec_xc) ,np.linalg.norm(vec_xc))
                    indices.append(i)
    return indices

In [None]:
print(np.linalg.norm(x_edge[3,-3:]-center_of_edge))

for i in range(len(o_edge)):
    d=o_edge[i,-3:]-center_of_edge
    print(i,d,np.linalg.norm(d))


In [None]:
res_index=[1,2,3,4,5,6,7,8,9,10,11]


or_edges=ct_edges[np.isin(ct_edges[:,5],res_index)]

for ix in res_index:
    or_edge=or_edges[or_edges[:,5]==ix]

    x_edge = np.asarray([row for row in or_edge if row[2].startswith('X')]) #find X atoms
    o_edge = np.asarray([row for row in or_edge if row[2].startswith('O')]) #find non-X atoms
    center_of_edge= np.mean(x_edge[:,-3:],axis=0) # calculate center by all xtoms
    #
    print(len(o_edge),o_edge)
    lc_max,x_indices=filt_outside_edgex(x_edge[:,-3:],center_of_edge,linker_topics) # find outside x atoms as location assistance atoms for superimpose

    
    outside_atom_indices = seperate_atoms_by_cos(o_edge[:,-3:],center_of_edge,x_edge[x_indices][:,-3:],lc_max[3]) # filt atoms in old edges by x atoms range, out of x atoms will stay 
    
    #print(len(outside_atom_indices),nox_edge[outside_atom_indices])
    out_atoms=o_edge[outside_atom_indices]
    out_atoms_cc = np.round(out_atoms[:,-3:].astype('float'),4)
    v,out_indices=np.unique(out_atoms_cc,axis=0,return_index=True)
    #print(len(out_atoms_cc)-len(v),'\n')#,out_atoms[out_indices])

    
    or_xs=or_edge[x_indices]
    or_xs_cc=or_xs[:,-3:]
    #_,rot,trans = superimpose(or_xs_cc,new_xs_cc)
    



In [15]:
#super impose for replacement
    #find x AND super impose

In [None]:
x_data = np.asarray([row for row in data if row[4].startswith('E')])