In [1]:
# import sys
# sys.path.append('/Applications/anaconda3/lib/python3.8/site-packages')

from rdkit import Chem 
from rdkit.Chem import AllChem as rdkit
from collections import defaultdict
from rdkit.Chem import rdFMCS
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdDistGeom
IPythonConsole.ipython_3d = True

import py3Dmol
from IPython.display import Image
import matplotlib.pyplot as plt
import subprocess
import time
import stk
import stko
import os
import spindry as spd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdDistGeom
from rdkit.Chem import rdMolAlign
from rdkit import RDLogger
import logging

# RDkit logger
rdkit_logger = RDLogger.logger()
rdkit_logger.setLevel(RDLogger.CRITICAL)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
%matplotlib inline

def show_stk_mol(stk_mol):
    data = rdkit.MolToMolBlock(stk_mol.to_rdkit_mol())
    p = py3Dmol.view(
        data=data,
        style={'stick':{'colorscheme':'cyanCarbon'}}, 
        width=400,
        height=400,
    )
    p.setBackgroundColor('0xeeeeee')
    p.zoomTo()
    p.show()



def rdkit_op(bb):
    rdkit_bb = bb.to_rdkit_mol()
    rdkit.SanitizeMol(rdkit_bb)
    rdkit.MMFFOptimizeMolecule(rdkit_bb)

    # stk molecules are immutable. with_position_matrix returns a
    # a clone, holding the new position matrix.
    bb = bb.with_position_matrix(
        position_matrix=rdkit_bb.GetConformer().GetPositions(),
    )

    return bb

In [2]:
metal_name = ['Fe','Co','Ni','Cu','Zn']


# Metal centre porphyrin
metal_centers = [
    stk.BuildingBlock(
        smiles='[Fe+2]',
        functional_groups=(
            stk.SingleAtom(stk.Fe(0, charge=2))
            for i in range(6)
        ),
        position_matrix=[[0, 0, 0]],
    ),

    stk.BuildingBlock(
        smiles='[Co+2]',
        functional_groups=(
            stk.SingleAtom(stk.Co(0, charge=2))
            for i in range(6)
        ),
        position_matrix=[[0, 0, 0]],
    ),

    stk.BuildingBlock(
        smiles='[Ni+2]',
        functional_groups=(
            stk.SingleAtom(stk.Ni(0, charge=2))
            for i in range(6)
        ),
        position_matrix=[[0, 0, 0]],
    ),

    stk.BuildingBlock(
        smiles='[Cu+2]',
        functional_groups=(
            stk.SingleAtom(stk.Cu(0, charge=2))
            for i in range(6)
        ),
        position_matrix=[[0, 0, 0]],
    ),

    stk.BuildingBlock(
        smiles='[Zn+2]',
        functional_groups=(
            stk.SingleAtom(stk.Zn(0, charge=2))
            for i in range(6)
        ),
        position_matrix=[[0, 0, 0]],
    ),

]

TPP = stk.BuildingBlock(
    smiles= 'C1(/C2=CC=CC=C2)=C3C=CC(/C(C4=CC=CC=C4)=C5C=C/C([N]/5)=C(C6=CC=CC=C6)/C(C=C/7)=NC7=C(C8=CC=CC=C8)/C9=CC=C1[N]/9)=N/3',
    functional_groups=(
        stk.SmartsFunctionalGroupFactory(
            smarts='[#6!H]~[#7]~[#6!H]',
            bonders=(1,),
            deleters=(),
        ),

    ),
)
TPP = stko.UFF().optimize(TPP)

# Over metal atoms
for ma in range(0,len(metal_centers)):
    M_TPP = stk.ConstructedMolecule(
        topology_graph=stk.metal_complex.Porphyrin(
            metals=metal_centers[ma],
            ligands=TPP,
            optimizer=stk.MCHammer(),
            #optimizer=stk.Collapser(scale_steps=False),
        )
    )

    molecule_name = f'{metal_name[ma]}_TPP'
    print(molecule_name)
                    
  
    # Write to files.
    #porphyrin_noM.write(f'{molecule_name}.mol')
    #M_TPP.write(f'{molecule_name}.xyz')
    #os.makedirs(f'{molecule_name}')
    #os.chdir(f'{molecule_name}')
    #os.system(f'mv ../{molecule_name}.xyz .')
    #os.environ['XTBHOME'] = "/home/xwu/miniconda3/pkgs/xtb-6.4.1-hf06ca72_0/share/xtb"
    #os.system(f'xtb {molecule_name}.xyz --gfn 1 --opt > output_{molecule_name}.txt && xtb xtbopt.xyz --gfn 1 --vipea > vipea_{molecule_name}.txt')
    #os.chdir('../') 
    show_stk_mol(M_TPP)
  



Fe_TPP


Co_TPP


Ni_TPP


Cu_TPP


Zn_TPP


In [61]:
print(molecule_name)

Fe_TPP


In [39]:
import glob
#print(os.getcwd())
#os.chdir('Other')
#print(os.getcwd())
all_gap_value = []
for metal in metal_name:
    file_location = os.path.join(f'{metal}_TPP',f'output_{metal}_TPP.txt')
    print(file_location)
    filenames = glob.glob(file_location)
    print(filenames)
    datafile = open('all_HL-gap.txt', 'w')
    #for ma in filenames:
    output = open(file_location,'r')
    data = output.readlines()
    output.close()
    for line in data:
        if 'HOMO-LUMO GAP' in line:
            gap = line
            print(line)
            words = gap.split()
            gap_value =float(words[3])
            print(gap_value)
            gap_value_metal = f'H-L gap {metal} = {gap_value}'
            print(gap_value_metal)
            #datafile.write(f'H-L gap {metal} = {gap_value}')
            all_gap_value.append(gap_value_metal)
            list = all_gap_value
            print(list)
            datafile.write("\n".join([i for i in list[1:]]))
    datafile.close()



Fe_TPP/output_Fe_TPP.txt
['Fe_TPP/output_Fe_TPP.txt']
          | HOMO-LUMO GAP               0.218572427439 eV   |

0.218572427439
H-L gap Fe = 0.218572427439
['H-L gap Fe = 0.218572427439']
Co_TPP/output_Co_TPP.txt
['Co_TPP/output_Co_TPP.txt']
          | HOMO-LUMO GAP               1.355585721611 eV   |

1.355585721611
H-L gap Co = 1.355585721611
['H-L gap Fe = 0.218572427439', 'H-L gap Co = 1.355585721611']
Ni_TPP/output_Ni_TPP.txt
['Ni_TPP/output_Ni_TPP.txt']
          | HOMO-LUMO GAP               1.695807179461 eV   |

1.695807179461
H-L gap Ni = 1.695807179461
['H-L gap Fe = 0.218572427439', 'H-L gap Co = 1.355585721611', 'H-L gap Ni = 1.695807179461']
Cu_TPP/output_Cu_TPP.txt
['Cu_TPP/output_Cu_TPP.txt']
          | HOMO-LUMO GAP               0.572075849502 eV   |

0.572075849502
H-L gap Cu = 0.572075849502
['H-L gap Fe = 0.218572427439', 'H-L gap Co = 1.355585721611', 'H-L gap Ni = 1.695807179461', 'H-L gap Cu = 0.572075849502']
Zn_TPP/output_Zn_TPP.txt
['Zn_TPP/output_Zn_TP

In [58]:
gap_value = []
output = open(f'output_Zn_TPP.txt','r')
data = output.readlines()
output.close()
for line in data:
    if 'HOMO-LUMO GAP' in line:
        gap = line
        words = gap.split()
        gap_value = gap_value + [words[3]]
        print(gap_value)

['1.598943650332']


In [1]:
metal_name = ['Fe','Co','Ni','Cu','Zn']

import glob
all_gap_value = []
for metal in metal_name:
    file_location = os.path.join(f'{metal}_TPP',f'output_{metal}_TPP.txt')
    #print(file_location)
    filenames = glob.glob(file_location)
    #print(filenames)
    datafile1 = open('all_HL-gap_TPP.txt', 'w')
    #for ma in filenames:
    output = open(file_location,'r')
    data = output.readlines()
    output.close()
    for line in data:
        if 'HOMO-LUMO GAP' in line:
            gap = line
            #print(line)
            words = gap.split()
            gap_value =float(words[3])
            #print(gap_value)
            gap_value_free = f'H-L gap {metal}_TPP = {gap_value}'
            #print(gap_value_free)
            #datafile.write(f'H-L gap {metal} = {gap_value}')
            all_gap_value.append(gap_value_free)
            list = all_gap_value
            #print(list)
            datafile1.write("\n".join([i for i in list[0:]]))
    datafile1.close()


all_EA_value = []
all_IP_value = []
for metal in metal_name:
            file_location = os.path.join(f'{metal}_TPP',f'vipea_{metal}_TPP.txt')
            #print(file_location)
            filenames = glob.glob(file_location)
            #print(filenames)
            datafile2 = open('all_EA_TPP.txt', 'w')
            datafile3 = open('all_IP_TPP.txt', 'w')
            #for ma in filenames:
            output = open(file_location,'r')
            data = output.readlines()
            output.close()
            for line in data:
                if 'delta SCC EA (eV):' in line:
                    EA = line
                    #print(line)
                    words = EA.split()
                    EA_value =float(words[4])
                    #print(gap_value)
                    EA_value_free = f'EA {metal}_TPP = {EA_value}'
                    #print(gap_value_free)
                    #datafile.write(f'H-L gap {metal} = {gap_value}')
                    all_EA_value.append(EA_value_free)
                    list = all_EA_value
                    #print(list)
                    datafile2.write("\n".join([i for i in list[0:]]))

                if 'delta SCC IP (eV):' in line:
                    IP = line
                    #print(line)
                    words = IP.split()
                    IP_value =float(words[4])
                    #print(gap_value)
                    IP_value_free = f'IP {metal}_TPP = {IP_value}'
                    #print(gap_value_free)
                    #datafile.write(f'H-L gap {metal} = {gap_value}')
                    all_IP_value.append(IP_value_free)
                    list = all_IP_value
                    #print(list)
                    datafile3.write("\n".join([i for i in list[0:]]))
            datafile2.close()
            datafile3.close()



In [2]:
from rdkit import Chem 
from rdkit.Chem import Descriptors
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.ML.Descriptors import MoleculeDescriptors
import numpy as np

def get_descriptors(rdmols):

#returns the physicochemical properties for the molecules
    
    descriptors = [
        'ExactMolWt', # The exact molecular weight of the molecule
        'NumValenceElectrons', # The number of valence electrons the molecule has
        #'BertzCT', # A topological index meant to quantify "complexity" of molecules.
        #'Ipc', # This returns the information content of the coefficients of the characteristic polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
        'HeavyAtomCount', # Number of heavy atoms a molecule.
        'NHOHCount', # Number of NHs or OHs
        'NOCount', # Number of Ns and Os
        #'NumAliphaticCarbocycles', # CalcNumAliphaticCarbocycles( (Mol)mol) -> int : returns the number of aliphatic (containing at least one non-aromatic bond) carbocycles for a molecule
        #'NumAliphaticHeterocycles', # CalcNumAliphaticHeterocycles( (Mol)mol) -> int : returns the number of aliphatic (containing at least one non-aromatic bond) heterocycles for a molecule
        #'NumAliphaticRings', # CalcNumAliphaticRings( (Mol)mol) -> int : returns the number of aliphatic (containing at least one non-aromatic bond) rings for a molecule
        #'NumAromaticCarbocycles', # CalcNumAromaticCarbocycles( (Mol)mol) -> int : returns the number of aromatic carbocycles for a molecule
        #'NumAromaticHeterocycles', # CalcNumAromaticHeterocycles( (Mol)mol) -> int : returns the number of aromatic heterocycles for a molecule
        #'NumAromaticRings', # CalcNumAromaticRings( (Mol)mol) -> int : returns the number of aromatic rings for a molecule
        'NumHAcceptors', # Number of Hydrogen Bond Acceptors
        #'NumHDonors Number', # of Hydrogen Bond Donors
        'NumHDonors', # of Hydrogen Bond Donors
        'NumHeteroatoms', # Number of Heteroatoms
        #'NumRotatableBonds', # Number of Rotatable Bonds]
        #'NumSaturatedCarbocycles', # CalcNumSaturatedCarbocycles( (Mol)mol) -> int : returns the number of saturated carbocycles for a molecule
        #'NumSaturatedHeterocycles', # CalcNumSaturatedHeterocycles( (Mol)mol) -> int : returns the number of saturated heterocycles for a molecule
        #'NumSaturatedRings', # CalcNumSaturatedRings( (Mol)mol) -> int : returns the number of saturated rings for a molecule
        #'RingCount',
        #'MolLogP', # Wildman-Crippen LogP value
        'fr_Al_COO', # Number of aliphatic carboxylic acids
        'fr_Al_OH', # Number of aliphatic hydroxyl groups
        'fr_Al_OH_noTert', # Number of aliphatic hydroxyl groups excluding tert-OH
        #'fr_ArN', # Number of N functional groups attached to aromatics
        #'fr_Ar_COO', # Number of Aromatic carboxylic acide
        #'fr_Ar_N', # Number of aromatic nitrogens
        #'fr_Ar_NH', # Number of aromatic amines
        #'fr_Ar_OH', # Number of aromatic hydroxyl groups
        'fr_COO', # Number of carboxylic acids
        'fr_COO2', # Number of carboxylic acids
        'fr_C_O', # Number of carbonyl O
        'fr_C_O_noCOO', # Number of carbonyl O, excluding COOH
        #'fr_C_S', # Number of thiocarbonyl
        #'fr_HOCCN', # Number of C(OH)CCN-Ctert-alkyl or  C(OH)CCNcyclic
        #'fr_Imine', # Number of Imines
        'fr_NH0', # Number of Tertiary amines
        'fr_NH1', # Number of Secondary amines
        'fr_NH2', # Number of Primary amines
        #'fr_N_O', # Number of hydroxylamine groups
        #'fr_Ndealkylation1', # Number of XCCNR groups
        #'fr_Ndealkylation2', # Number of tert-alicyclic amines (no heteroatoms, not quinine-like bridged N)
        #'fr_Nhpyrrole', # Number of H-pyrrole nitrogens
        #'fr_SH', # Number of thiol groups
        #'fr_aldehyde', # Number of aldehydes
        #'fr_alkyl_carbamate', # Number of alkyl carbamates (subject to hydrolysis)
        #'fr_alkyl_halide', # Number of alkyl halides
        'fr_allylic_oxid', # Number of allylic oxidation sites excluding steroid dienone
        #'fr_amide', # Number of amides
        #'fr_amidine', # Number of amidine groups
        #'fr_aniline', # Number of anilines
        #'fr_aryl_methyl', # Number of aryl methyl sites for hydroxylation
        #'fr_benzene', # Number of benzene rings
        #'fr_benzodiazepine', # Number of benzodiazepines with no additional fused rings
        #'fr_bicyclic', # Bicyclic
        #'fr_diazo', # Number of diazo groups
        'fr_dihydropyridine', # Number of dihydropyridines                
        #'fr_hdrzine', # Number of hydrazine groups
        #'fr_hdrzone', # Number of hydrazone groups
        #'fr_imidazole', # Number of imidazole rings
        #'fr_imide', # Number of imide groups 
        'fr_methoxy', # Number of methoxy groups -OCH3
        #'fr_morpholine', # Number of morpholine rings
        'fr_nitrile', # Number of nitriles
        'fr_nitro', # Number of nitro groups
        #'fr_nitro_arom', # Number of nitro benzene ring substituents
        #'fr_nitro_arom_nonortho', # Number of non-ortho nitro benzene ring substituents
        'fr_nitroso', # Number of nitroso groups, excluding NO2
        'fr_piperdine', # Number of piperdine rings
        #'fr_piperzine', # Number of piperzine rings
        #'fr_priamide', # Number of primary amides
        #'fr_pyridine', # Number of pyridine rings
        #'fr_quatN', # Number of quarternary nitrogens
        #'fr_unbrch_alkane', # Number of unbranched alkanes  of at least 4 members (excludes halogenated alkanes)

    ]
    calculator = MoleculeDescriptors.MolecularDescriptorCalculator(descriptors)
        
    Desc_values = calculator.CalcDescriptors(rdmols)
    all_dec = []
    for i in Desc_values:
        Desc_values_V2 = round(i,2)
        all_dec.append(Desc_values_V2)
    return all_dec


In [28]:

import glob
all_desc_value = []
for metal in metal_name:
    file_location = os.path.join(f'{metal}_TPP','xtbtopo.mol')
    #print(file_location)
    filenames = glob.glob(file_location)
    #print(filenames)
    #datafile4 = open('all_desc_TPP.txt', 'w')
    #for ma in filenames:
    output = Chem.MolFromMolFile(file_location, sanitize=False, strictParsing=False)
    all_desc_value.append(get_descriptors(output))
    #datafile4.write("\n".join(all_desc_value))

HLvalue = [round(float(x.split(' ')[4].strip('\n')),3) for x in open('all_HL-gap_TPP.txt').readlines()]
IPvalue = [round(float(x.split(' ')[3].strip('\n')),3) for x in open('all_IP_TPP.txt').readlines()]
EAvalue = [round(float(x.split(' ')[3].strip('\n')),3) for x in open('all_EA_TPP.txt').readlines()]

together = []
for i in range(len(HLvalue)):
    all_values = open('all_data_TPP.txt', 'w+')
    all_values.write(f'IP, EA, H-L gap\n')
    #print(i)
    values = [IPvalue[i], EAvalue[i], HLvalue[i]]
    #print(values)
    together.append(str(values))
#print(together)
    all_values.write("\n".join(together))
all_values.close()


together_V2 = []
for i in range(len(HLvalue)):
    #all_values = open('all_data_metal.txt', 'w+')
    #all_values.write(f'H-L gap, IP, EA\n')
    #print(i)
    values = [IPvalue[i], EAvalue[i], HLvalue[i]]
    #print(values)
    together_V2.append(values)


name_no = []
metal_AN = ['26','27','28','29','30']
for AN in metal_AN:
    label = [int(AN),int(0),int(1)]
    name_no.append(label)


total = []
for i in range(len(HLvalue)):
    tot_values = open('total_data_TPP.txt', 'w+')
    #tot_values.write(f'H-L gap, IP, EA\n')
    allv = all_desc_value[i] + name_no[i] + together_V2[i]
    total.append(str(allv))
    tot_values.write("\n".join(total))
tot_values.close()

