In [1]:
import os
 
import pandas as pd
import numpy as np
import plotly.express as px
 
import rdkit.Chem as Chem
from rdkit.Chem import MACCSkeys, AllChem
from rdkit.DataStructs.cDataStructs import ExplicitBitVect
from rdkit.Chem.AtomPairs import Pairs, Torsions
from rdkit.Chem import rdMolDescriptors
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem.Draw import IPythonConsole
 
from mordred import Calculator, descriptors
 
import pickle
 
# Mordred Descriptor Calculation Function
 
def mordred(smiles_list, name='', dropna=False):
    """
    Compute all mordred descriptors for a list of smiles strings.
    """
    
    smiles_list = list(smiles_list)
    
    # Initialize descriptor calculator with all descriptors
 
    calc = Calculator(descriptors)
    
    output = []
    for entry in smiles_list:
        try:
            data_i = calc(Chem.MolFromSmiles(entry)).fill_missing()
        except:
            data_i = np.full(len(calc.descriptors),np.NaN)
            
        output.append(list(data_i))
        
    descriptor_names = list(calc.descriptors)
    columns = []
    for entry in descriptor_names:
        columns.append(name + str(entry))
        
    df = pd.DataFrame(data=output, columns=columns)
    df.insert(0, name + 'SMILES', smiles_list)
    
    if dropna == True:
        df = df.dropna(axis=1)
    
    return df
 
def Pred_RXN_NiBor(smiles_list):
    Pred_NiBor_MorVal = mordred(smiles_list, dropna = True)
    Pred_NiBor_MorVal.columns = Pred_NiBor_MorVal.columns.astype(str)
    tag = 'Electrophile_'
    Pred_NiBor_MorVal.columns = [tag + x for x in Pred_NiBor_MorVal.columns]
    Pred_NiBor_MorVal = pd.concat([Pred_NiBor_MorVal]*48, ignore_index=True)
    Pred_NiBor_MorVal_retain = Pred_NiBor_MorVal
    Pred_NiBor_MorVal.drop(['Electrophile_SMILES'], axis = 1, inplace = True)
    Pred_NiBor_PredSpace = pd.read_csv('Prediction_RXN_NiBorylationPredictionSet.csv')
    Pred_NiBor_PredDF = Pred_NiBor_PredSpace[['Ligand_inchi', 'MeOH', 'Ligand']]
    Pred_Solvent = Pred_NiBor_PredDF.loc[:,'MeOH']
    Pred_NiBor_PredDF['Solvent'] = Pred_Solvent
    Pred_NiBor_PredDF.loc[Pred_NiBor_PredDF.Solvent == 1, 'Solvent'] = 'Methanol'
    Pred_NiBor_PredDF.loc[Pred_NiBor_PredDF.Solvent == 0, 'Solvent'] = 'Ethanol'
    Pred_NiBor_PredSpace = Pred_NiBor_PredSpace[Pred_NiBor_PredSpace.columns.drop(list(Pred_NiBor_PredSpace.filter(regex = 'Electrophile')))]
    Pred_NiBor_PredSpace = Pred_NiBor_PredSpace.drop(['Ligand_inchi', 'Ligand'], axis = 1)
    Pred_NiBor_PredSpace[[]] = Pred_NiBor_PredSpace[[]].apply(pd.to_numeric)
    ML = Pred_NiBor_MorVal.join(Pred_NiBor_PredSpace)
    Colnames = pd.read_csv('Prediction_RXN_NiBorylationColnames.csv')
    Colnames = Colnames.drop(['Electrophile_inchi', 'Ligand_inchi'], axis = 1)
    Colnames = list(Colnames.columns.values)
    ML = ML[Colnames]
    ML[[]] = ML[[]].apply(pd.to_numeric)
    with open("Predict_RXN_NiBorylation.sav", 'rb') as file:  
        model = pickle.load(file)
    Pred_NiBor_PredDF['GBM_Prediction'] = model.predict(ML)
    return Pred_NiBor_PredDF

In [7]:
df_21 = Pred_RXN_NiBor(['BrC1=CC=C(C(F)(F)F)C=C1'])
df_21['Electrophile_inchi'] = 'InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h1-4H'
df_21.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,Ligand_inchi,MeOH,Ligand,Solvent,GBM_Prediction,Electrophile_inchi
0,InChI=1S/C24H31P/c1-4-12-20(13-5-1)23-18-10-11...,0,Cy-JohnPhos,Ethanol,66.010356,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
1,InChI=1S/C26H23O2P/c1-27-23-17-11-18-24(28-2)2...,0,Ph-SPhos,Ethanol,62.374434,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
2,InChI=1S/C36H39N4P/c1-6-16-28(17-7-1)34-36(35(...,0,Cy-BippyPhos,Ethanol,73.763022,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
3,InChI=1S/C35H39OP/c1-36-31-23-13-12-22-30(31)3...,0,MeO-KITPHOS,Ethanol,66.412118,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
4,InChI=1S/C25H33P/c1-20-12-8-9-17-23(20)24-18-1...,0,MePhos,Ethanol,55.115893,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."


In [8]:
df_6 = Pred_RXN_NiBor(['O=S(OC1=CC=C(C2=CC=CC=C2)C=C1)(N(C)C)=O'])
df_6['Electrophile_inchi'] = 'InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-8-13(9-11-14)12-6-4-3-5-7-12/h3-11H,1-2H3'
df_6.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,Ligand_inchi,MeOH,Ligand,Solvent,GBM_Prediction,Electrophile_inchi
0,InChI=1S/C24H31P/c1-4-12-20(13-5-1)23-18-10-11...,0,Cy-JohnPhos,Ethanol,54.472874,"InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-..."
1,InChI=1S/C26H23O2P/c1-27-23-17-11-18-24(28-2)2...,0,Ph-SPhos,Ethanol,42.041647,"InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-..."
2,InChI=1S/C36H39N4P/c1-6-16-28(17-7-1)34-36(35(...,0,Cy-BippyPhos,Ethanol,33.576155,"InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-..."
3,InChI=1S/C35H39OP/c1-36-31-23-13-12-22-30(31)3...,0,MeO-KITPHOS,Ethanol,44.839523,"InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-..."
4,InChI=1S/C25H33P/c1-20-12-8-9-17-23(20)24-18-1...,0,MePhos,Ethanol,35.200716,"InChI=1S/C14H15NO3S/c1-15(2)19(16,17)18-14-10-..."


In [9]:
df_23 = Pred_RXN_NiBor(['FC1=CC=C(Cl)C=C1OC'])
df_23['Electrophile_inchi'] = 'InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4H,1H3'
df_23.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,Ligand_inchi,MeOH,Ligand,Solvent,GBM_Prediction,Electrophile_inchi
0,InChI=1S/C24H31P/c1-4-12-20(13-5-1)23-18-10-11...,0,Cy-JohnPhos,Ethanol,66.419079,InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4...
1,InChI=1S/C26H23O2P/c1-27-23-17-11-18-24(28-2)2...,0,Ph-SPhos,Ethanol,60.804868,InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4...
2,InChI=1S/C36H39N4P/c1-6-16-28(17-7-1)34-36(35(...,0,Cy-BippyPhos,Ethanol,62.468358,InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4...
3,InChI=1S/C35H39OP/c1-36-31-23-13-12-22-30(31)3...,0,MeO-KITPHOS,Ethanol,62.29279,InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4...
4,InChI=1S/C25H33P/c1-20-12-8-9-17-23(20)24-18-1...,0,MePhos,Ethanol,53.66936,InChI=1S/C7H6ClFO/c1-10-7-4-5(8)2-3-6(7)9/h2-4...


In [10]:
df_19 = Pred_RXN_NiBor(['BrC1=CC(OCO2)=C2C=C1'])
df_19['Electrophile_inchi'] = 'InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1-3H,4H2'
df_19.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,Ligand_inchi,MeOH,Ligand,Solvent,GBM_Prediction,Electrophile_inchi
0,InChI=1S/C24H31P/c1-4-12-20(13-5-1)23-18-10-11...,0,Cy-JohnPhos,Ethanol,79.658597,InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1...
1,InChI=1S/C26H23O2P/c1-27-23-17-11-18-24(28-2)2...,0,Ph-SPhos,Ethanol,77.202233,InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1...
2,InChI=1S/C36H39N4P/c1-6-16-28(17-7-1)34-36(35(...,0,Cy-BippyPhos,Ethanol,76.627528,InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1...
3,InChI=1S/C35H39OP/c1-36-31-23-13-12-22-30(31)3...,0,MeO-KITPHOS,Ethanol,72.292269,InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1...
4,InChI=1S/C25H33P/c1-20-12-8-9-17-23(20)24-18-1...,0,MePhos,Ethanol,62.212495,InChI=1S/C7H5BrO2/c8-5-1-2-6-7(3-5)10-4-9-6/h1...


In [12]:
df_val = df_21.append(df_6)
df_val = df_val.copy
df_val.head()

Unnamed: 0,Ligand_inchi,MeOH,Ligand,Solvent,GBM_Prediction,Electrophile_inchi
0,InChI=1S/C24H31P/c1-4-12-20(13-5-1)23-18-10-11...,0,Cy-JohnPhos,Ethanol,66.010356,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
1,InChI=1S/C26H23O2P/c1-27-23-17-11-18-24(28-2)2...,0,Ph-SPhos,Ethanol,62.374434,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
2,InChI=1S/C36H39N4P/c1-6-16-28(17-7-1)34-36(35(...,0,Cy-BippyPhos,Ethanol,73.763022,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
3,InChI=1S/C35H39OP/c1-36-31-23-13-12-22-30(31)3...,0,MeO-KITPHOS,Ethanol,66.412118,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
4,InChI=1S/C25H33P/c1-20-12-8-9-17-23(20)24-18-1...,0,MePhos,Ethanol,55.115893,"InChI=1S/C7H4BrF3/c8-6-3-1-5(2-4-6)7(9,10)11/h..."
