In [1]:
import pandas as pd
from rdkit import Chem
import rdkit.Chem.rdMolDescriptors as MolDescriptors
import numpy as np
from copy import deepcopy
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
dfLNMO = pd.read_excel('measuredPerformance.xlsx')
dfLNMO['Add1Mult'] = dfLNMO['AddWeightPercent1']/0.25    # Normalize to 0.25 wt% increments
dfLNMO['Add1Mult'] = dfLNMO['Add1Mult'].astype(int)
dfLNMO['Add2Mult'] = dfLNMO['AddWeightPercent2']/0.25
dfLNMO['Add2Mult'] = dfLNMO['Add2Mult'].astype(int)
dfLNMO[['Name','AddWeightPercent1','AddWeightPercent2','IniImpedance','FinImpedance','DeltaImpedance']]

Unnamed: 0,Name,AddWeightPercent1,AddWeightPercent2,IniImpedance,FinImpedance,DeltaImpedance
0,LiDFOB_1+LDF_1,1.0,1.0,33.59,50.79,17.2
1,VC_1+LDF_1,1.0,1.0,58.34,65.58,7.24
2,PBE_1+LDF_1,1.0,1.0,43.76,71.36,27.6
3,tVCBO_0.25+LDF_1,0.25,1.0,35.36,61.18,25.82
4,LiDFOB_1+MS_1,1.0,1.0,28.98,44.21,15.23
5,VC_1+MS_1,1.0,1.0,36.17,51.01,14.84
6,PBE_1+MS_1,1.0,1.0,44.63,71.76,27.13
7,tVCBO_0.25+MS_1,0.25,1.0,32.44,51.26,18.82
8,LiDFOB_1+TMSPi_1,1.0,1.0,28.4,49.01,20.61
9,VC_1+TMSPi_1,1.0,1.0,36.65,60.01,23.36


In [3]:
# Load the cathode and anode additives
dfCathodeMatrix = pd.read_excel('AnodeCathodeMatrix.xlsx', sheet_name='cathode additives')
dfAnodeMatrix = pd.read_excel('AnodeCathodeMatrix.xlsx', sheet_name='anode additives')

In [4]:
# Make anode and cathode dictionaries
anodeDict = {}
cathodeDict = {}
allSMILES = []
for name,sml in zip(dfAnodeMatrix.Name, dfAnodeMatrix.SMILES):
    canonSml = Chem.MolToSmiles(Chem.MolFromSmiles(sml))
    anodeDict[name]=canonSml
    allSMILES.append(canonSml)
for name,sml in zip(dfCathodeMatrix.Name, dfCathodeMatrix.SMILES):
    canonSml = Chem.MolToSmiles(Chem.MolFromSmiles(sml))
    cathodeDict[name]=canonSml
    allSMILES.append(canonSml)

In [5]:
atomFeatList = []
for sml in allSMILES:
    mol = Chem.MolFromSmiles(sml)
    atom_seq = mol.GetAtoms()
    for n1, atom in enumerate(atom_seq):
        charge = atom.GetFormalCharge()
        inRing = atom.IsInRing()
        atomID = atom.GetSymbol()
        if charge!=0:
            if charge>0: atomID = atomID + '[+' + str(charge) + ']'
            else: atomID = atomID + '[' + str(charge) + ']'
        if atom.GetDegree()>0:
            atomID = atomID + '_' + str(atom.GetDegree())
        if inRing: atomID += '_inRing'
        if atomID not in atomFeatList: atomFeatList.append(atomID)
atomFeatList += ['aromaticRing', 'aliphaticRing']

In [6]:
featDict  = {feat : np.zeros(dfLNMO.shape[0]).astype(int) for feat in atomFeatList}    # Set up empty dict to count atom features

In [7]:
def computeAtomFeats(smilesList): 
    """Compute features from SMILES based on atom frequency

    Args:
    smilesList: list of SMILES to compute features

    Returns:
    A dataframe of features
    """
    featDict  = {feat : np.zeros(len(smilesList)).astype(int) for feat in atomFeatList}
    for i,sml in enumerate(smilesList):
        atomIDList = []
        n_aromaRing = []
        n_aliphRing = []
        if sml=='None' or pd.isnull(sml):
            for x in range(len(smilesList)):
                atomIDList.append(0)
        else:
            print(sml)
            mol = Chem.MolFromSmiles(sml)
            atom_seq = mol.GetAtoms()
            for n1, atom in enumerate(atom_seq):
                charge = atom.GetFormalCharge()
                inRing = atom.IsInRing()
                atomID = atom.GetSymbol()
                if charge!=0:
                    if charge>0: atomID = atomID + '[+' + str(charge) + ']'
                    else: atomID = atomID + '[' + str(charge) + ']'
                if atom.GetDegree()>0:
                    atomID = atomID + '_' + str(atom.GetDegree())     # GetDegree() does not count bonded Hs
                if inRing: atomID += '_inRing'
                atomIDList.append(atomID)
            n_aromaRing = MolDescriptors.CalcNumAromaticRings(mol)
            for a in range(n_aromaRing):
                atomIDList.append('n_aromaticRing')
            n_aliphRing = MolDescriptors.CalcNumAliphaticRings(mol)
            for a in range(n_aliphRing):
                atomIDList.append('n_aliphaticRing')
        for key in featDict.keys():
            featDict[key][i]=atomIDList.count(key)
        
    return pd.DataFrame.from_dict(featDict)


In [8]:
add1Feats = computeAtomFeats(dfLNMO['SMILES1'])
add1Feats = add1Feats.mul(dfLNMO['Add1Mult'], axis=0)

add2Feats = computeAtomFeats(dfLNMO['SMILES2'])
add2Feats = add2Feats.mul(dfLNMO['Add2Mult'], axis=0)

addFeats = add1Feats + add2Feats

outDF = deepcopy(addFeats)

outDF['Name'] = dfLNMO['Name']
outDF['SMILES1'] = dfLNMO['SMILES1']
outDF['SMILES2'] = dfLNMO['SMILES2']
for col_to_move in ['SMILES2', 'SMILES1', 'Name']:
    outDF.insert(0, col_to_move, outDF.pop(col_to_move))
outDF.to_excel('features.xlsx', index=None)

 O=C1O[B-](F)(OC1=O)F.[Li+]
O=C1OC=CO1
C1(B2OCCCO2)=CC=CC=C1
C=CB1OB(C=C)OB(C=C)O1.C2=CC=CC=N2
O=C1O[B-](F)(OC1=O)F.[Li+]
O=C1OC=CO1
C1(B2OCCCO2)=CC=CC=C1
C=CB1OB(C=C)OB(C=C)O1.C2=CC=CC=N2
O=C1O[B-](F)(OC1=O)F.[Li+]
O=C1OC=CO1
C1(B2OCCCO2)=CC=CC=C1
C=CB1OB(C=C)OB(C=C)O1.C2=CC=CC=N2
C=CB1OB(C=C)OB(C=C)O1.C2=CC=CC=N2
O=C1O[B-](F)(OC1=O)F.[Li+]
O=S1(OCC=C1)=O
[O-]C(C([O-])=O)=O.[Li+].[Li+]
O=C1C(O[B-]2(OC(C(O2)=O)=O)O1)=O.[Li+]
CN(C1)CC(OB(C=C)OC1=O)=O
O=C1C(O[B-]2(OC(C(O2)=O)=O)O1)=O.[Li+]
O=C1O[B-](F)(OC1=O)F.[Li+]
CN(C1)CC(OB(C=C)OC1=O)=O
O=[P-](F)(F)=O.[Li+]
O=[P-](F)(F)=O.[Li+]
O=[P-](F)(F)=O.[Li+]
O=[P-](F)(F)=O.[Li+]
O=C1CC(O[P-](F)(F)(F)(F)O1)=O.[Li+]
O=C1CC(O[P-](F)(F)(F)(F)O1)=O.[Li+]
O=C1CC(O[P-](F)(F)(F)(F)O1)=O.[Li+]
O=C1CC(O[P-](F)(F)(F)(F)O1)=O.[Li+]
C[Si](C)(C)OP(O[Si](C)(C)C)O[Si](C)(C)C
C[Si](C)(C)OP(O[Si](C)(C)C)O[Si](C)(C)C
C[Si](C)(C)OP(O[Si](C)(C)C)O[Si](C)(C)C
C[Si](C)(C)OP(O[Si](C)(C)C)O[Si](C)(C)C
C[Si](C)(C)OP(O[Si](C)(C)C)O[Si](C)(C)C
CC1=CC=C(S(=O)(N=C=O)=O)C=C

In [9]:
import itertools
addMixCombs = list(itertools.product(anodeDict.keys(), cathodeDict.keys()))    # All possible combinations of cathode and anode additives
print(addMixCombs,len(addMixCombs))

nameList = []
smiles1 = []
smiles2 = []
add1wtpc = []
add2wtpc = []
add1mult = []
add2mult = []
for combo in addMixCombs:
    for add1wt in [1.]:
        for add2wt in [1.]:
            name = combo[0]+'_'+str(int(add1wt))+'+'+combo[1]+'_'+str(int(add2wt))
            if name not in dfLNMO['Name'].to_list():
                nameList.append(name)
                smiles1.append(anodeDict[combo[0]])
                smiles2.append(cathodeDict[combo[1]])
                add1wtpc.append(add1wt)
                add1mult.append(int(add1wt/0.25))
                add2wtpc.append(add2wt)
                add2mult.append(int(add2wt/0.25))
            else:
                print(name,'already in dataset')

dfCombo = pd.DataFrame(list(zip(nameList,smiles1,smiles2,add1wtpc,add2wtpc,add1mult,add2mult)), columns=['Name','SMILES1','SMILES2','AddWeightPercent1','AddWeightPercent2','Add1Mult','Add2Mult'])

[('LiDFOB', 'LDF'), ('LiDFOB', 'TMSPi'), ('LiDFOB', 'MS'), ('LiDFOB', 'DDD'), ('LiDFOB', 'LiBFEP'), ('LiDFOB', 'BA'), ('LiDFOB', 'PTSI'), ('LiDFOB', 'MPS'), ('LiDFOB', 'LiOtC4F9'), ('LiDFOB', 'DEPP'), ('LiDFOB', 'DPDMS'), ('LiDFOB', 'SA'), ('LiDFOB', 'TPP'), ('LiDFOB', 'TMSPa'), ('VC', 'LDF'), ('VC', 'TMSPi'), ('VC', 'MS'), ('VC', 'DDD'), ('VC', 'LiBFEP'), ('VC', 'BA'), ('VC', 'PTSI'), ('VC', 'MPS'), ('VC', 'LiOtC4F9'), ('VC', 'DEPP'), ('VC', 'DPDMS'), ('VC', 'SA'), ('VC', 'TPP'), ('VC', 'TMSPa'), ('PBE', 'LDF'), ('PBE', 'TMSPi'), ('PBE', 'MS'), ('PBE', 'DDD'), ('PBE', 'LiBFEP'), ('PBE', 'BA'), ('PBE', 'PTSI'), ('PBE', 'MPS'), ('PBE', 'LiOtC4F9'), ('PBE', 'DEPP'), ('PBE', 'DPDMS'), ('PBE', 'SA'), ('PBE', 'TPP'), ('PBE', 'TMSPa'), ('tVCBO', 'LDF'), ('tVCBO', 'TMSPi'), ('tVCBO', 'MS'), ('tVCBO', 'DDD'), ('tVCBO', 'LiBFEP'), ('tVCBO', 'BA'), ('tVCBO', 'PTSI'), ('tVCBO', 'MPS'), ('tVCBO', 'LiOtC4F9'), ('tVCBO', 'DEPP'), ('tVCBO', 'DPDMS'), ('tVCBO', 'SA'), ('tVCBO', 'TPP'), ('tVCBO', 'TMSP

In [10]:
newAdd1Feats = computeAtomFeats(dfCombo['SMILES1'])
newAdd1Feats = newAdd1Feats.mul(dfCombo['Add1Mult'], axis=0)

newAdd2Feats = computeAtomFeats(dfCombo['SMILES2'])
newAdd2Feats = newAdd2Feats.mul(dfCombo['Add2Mult'], axis=0)

newAddFeats = newAdd1Feats + newAdd2Feats

outDF = deepcopy(newAddFeats)

outDF['Name'] = dfCombo['Name']
outDF['SMILES1'] = dfCombo['SMILES1']
outDF['SMILES2'] = dfCombo['SMILES2']
for col_to_move in ['SMILES2', 'SMILES1', 'Name']:
    outDF.insert(0, col_to_move, outDF.pop(col_to_move))
outDF.to_excel('UnknownFeatures.xlsx', index=None)

O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=C1O[B-](F)(F)OC1=O.[Li+]
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
O=c1occo1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
c1ccc(B2OCCCO2)cc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(C=C)OB(C=C)O1.c1ccncc1
C=CB1OB(