Skip to content

Commit 7d698ef

Browse files
rajarshiegonw
authored andcommitted
Updated bcut descriptor to check for undefined values before getting eigenvalues. Added test file and test case. Addresses bug 3489559
Change-Id: Ic3fd58f387f8b8fb060d07accb790f27c2d8a473 Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
1 parent 7c7248a commit 7d698ef

File tree

3 files changed

+158
-13
lines changed

3 files changed

+158
-13
lines changed

src/main/org/openscience/cdk/qsar/descriptors/molecular/BCUTDescriptor.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
@TestClass("org.openscience.cdk.qsar.descriptors.molecular.BCUTDescriptorTest")
129129
public class BCUTDescriptor implements IMolecularDescriptor {
130130
private static ILoggingTool logger =
131-
LoggingToolFactory.createLoggingTool(BCUTDescriptor.class);
131+
LoggingToolFactory.createLoggingTool(BCUTDescriptor.class);
132132

133133
// the number of negative & positive eigenvalues
134134
// to return for each class of BCUT descriptor
@@ -201,7 +201,7 @@ public Object[] getParameters() {
201201
return (params);
202202
}
203203

204-
@TestMethod(value="testNamesConsistency")
204+
@TestMethod("testNamesConsistency")
205205
public String[] getDescriptorNames() {
206206
String[] names;
207207
String[] suffix = {"w", "c", "p"};
@@ -250,12 +250,20 @@ public Object getParameterType(String name) {
250250
return (object);
251251
}
252252

253+
private boolean hasUndefined(double[][] m) {
254+
for (double[] aM : m) {
255+
for (int j = 0; j < m[0].length; j++) {
256+
if (Double.isNaN(aM[j]) || Double.isInfinite(aM[j])) return true;
257+
}
258+
}
259+
return false;
260+
}
253261

254262
static private class BurdenMatrix {
255263

256264
static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
257265
IAtomContainer local = AtomContainerManipulator.removeHydrogens(atomContainer);
258-
266+
259267
int natom = local.getAtomCount();
260268
double[][] matrix = new double[natom][natom];
261269
for (int i = 0; i < natom; i++) {
@@ -303,7 +311,7 @@ static double[][] evalMatrix(IAtomContainer atomContainer, double[] vsd) {
303311
* @return An ArrayList containing the descriptors. The default is to return
304312
* all calculated eigenvalues of the Burden matrices in the order described
305313
* above. If a parameter list was supplied, then only the specified number
306-
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
314+
* of highest and lowest eigenvalues (for each class of BCUT) will be returned.
307315
*/
308316
@TestMethod("testCalculate_IAtomContainer")
309317
public DescriptorValue calculate(IAtomContainer container) {
@@ -318,7 +326,7 @@ public DescriptorValue calculate(IAtomContainer container) {
318326

319327
// add H's in case they're not present
320328
try {
321-
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
329+
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
322330
CDKHydrogenAdder hAdder = CDKHydrogenAdder.getInstance(molecule.getBuilder());
323331
hAdder.addImplicitHydrogens(molecule);
324332
AtomContainerManipulator.convertImplicitToExplicitHydrogens(molecule);
@@ -336,7 +344,7 @@ public DescriptorValue calculate(IAtomContainer container) {
336344
try {
337345
CDKHueckelAromaticityDetector.detectAromaticity(molecule);
338346
} catch (CDKException e) {
339-
return getDummyDescriptorValue(new CDKException("Error in aromaticity perception: "+e.getMessage()));
347+
return getDummyDescriptorValue(new CDKException("Error in aromaticity perception: " + e.getMessage()));
340348
}
341349
}
342350

@@ -364,10 +372,12 @@ public DescriptorValue calculate(IAtomContainer container) {
364372
}
365373

366374
double[][] burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
375+
if (!hasUndefined(burdenMatrix))
376+
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
367377
Matrix matrix = new Matrix(burdenMatrix);
368378
EigenvalueDecomposition eigenDecomposition = new EigenvalueDecomposition(matrix);
369379
double[] eval1 = eigenDecomposition.getRealEigenvalues();
370-
380+
371381
// get charge weighted BCUT
372382
LonePairElectronChecker lpcheck = new LonePairElectronChecker();
373383
GasteigerPEPEPartialCharges pepe;
@@ -393,7 +403,9 @@ public DescriptorValue calculate(IAtomContainer container) {
393403
diagvalue[counter] = molecule.getAtom(i).getCharge();
394404
counter++;
395405
}
396-
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
406+
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
407+
if (!hasUndefined(burdenMatrix))
408+
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
397409
matrix = new Matrix(burdenMatrix);
398410
eigenDecomposition = new EigenvalueDecomposition(matrix);
399411
double[] eval2 = eigenDecomposition.getRealEigenvalues();
@@ -405,11 +417,13 @@ public DescriptorValue calculate(IAtomContainer container) {
405417
Polarizability pol = new Polarizability();
406418
counter = 0;
407419
for (int i = 0; i < molecule.getAtomCount(); i++) {
408-
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
420+
if (molecule.getAtom(i).getSymbol().equals("H")) continue;
409421
diagvalue[counter] = pol.calculateGHEffectiveAtomPolarizability(molecule, molecule.getAtom(i), false, topoDistance);
410422
counter++;
411423
}
412424
burdenMatrix = BurdenMatrix.evalMatrix(molecule, diagvalue);
425+
if (!hasUndefined(burdenMatrix))
426+
return getDummyDescriptorValue(new CDKException("Burden matrix has undefined values"));
413427
matrix = new Matrix(burdenMatrix);
414428
eigenDecomposition = new EigenvalueDecomposition(matrix);
415429
double[] eval3 = eigenDecomposition.getRealEigenvalues();
@@ -435,7 +449,7 @@ public DescriptorValue calculate(IAtomContainer container) {
435449
enhigh = 0;
436450
}
437451

438-
DoubleArrayResult retval = new DoubleArrayResult( (lnlow+enlow+lnhigh+enhigh) * 3);
452+
DoubleArrayResult retval = new DoubleArrayResult((lnlow + enlow + lnhigh + enhigh) * 3);
439453

440454
for (int i = 0; i < lnlow; i++) retval.add(eval1[i]);
441455
for (int i = 0; i < enlow; i++) retval.add(Double.NaN);
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
2+
OpenBabel02181210582D
3+
4+
10 12 0 0 0 0 0 0 0 0999 V2000
5+
1.7872 -1.2465 0.0000 As 0 0 0 0 0
6+
1.7549 -2.3765 0.0000 O 0 0 0 0 0
7+
2.8039 -2.4283 0.0000 As 0 0 0 0 0
8+
1.4020 -3.2378 0.0000 O 0 0 0 0 0
9+
0.0000 -2.4283 0.0000 As 0 0 0 0 0
10+
0.7414 -1.8034 0.0000 O 0 0 0 0 0
11+
0.0000 -0.8094 0.0000 O 0 0 0 0 0
12+
1.4020 0.0000 0.0000 As 0 0 0 0 0
13+
0.9260 -0.8256 0.0000 O 0 0 0 0 0
14+
2.8039 -0.8094 0.0000 O 0 0 0 0 0
15+
1 2 1 0 0 0
16+
1 6 1 0 0 0
17+
1 9 1 0 0 0
18+
2 3 1 0 0 0
19+
3 4 1 0 0 0
20+
3 10 1 0 0 0
21+
4 5 1 0 0 0
22+
5 6 1 0 0 0
23+
5 7 1 0 0 0
24+
7 8 1 0 0 0
25+
8 9 1 0 0 0
26+
8 10 1 0 0 0
27+
M END
28+
> <DSSTox_RID>
29+
20103
30+
31+
> <DSSTox_CID>
32+
103
33+
34+
> <DSSTox_Generic_SID>
35+
20103
36+
37+
> <DSSTox_FileID>
38+
106_CPDBAS_v5d
39+
40+
> <STRUCTURE_Formula>
41+
As4O6
42+
43+
> <STRUCTURE_MolecularWeight>
44+
395.6828
45+
46+
> <STRUCTURE_ChemicalType>
47+
inorganic
48+
49+
> <STRUCTURE_Shown>
50+
tested chemical
51+
52+
> <TestSubstance_ChemicalName>
53+
Arsenious oxide
54+
55+
> <TestSubstance_CASRN>
56+
1327-53-3
57+
58+
> <TestSubstance_Description>
59+
single chemical compound
60+
61+
> <STRUCTURE_ChemicalName_IUPAC>
62+
tricyclo[3.3.1.1~3,7~]tetraarsoxane
63+
64+
> <STRUCTURE_SMILES>
65+
[As]21O[As]3O[As](O1)O[As](O2)O3
66+
67+
> <STRUCTURE_InChI>
68+
InChI=1/As4O6/c5-1-6-3-8-2(5)9-4(7-1)10-3
69+
70+
> <STRUCTURE_InChIKey>
71+
KTTMEOWBIWLMSE-UHFFFAOYAT
72+
73+
> <StudyType>
74+
Carcinogenicity
75+
76+
> <Endpoint>
77+
TD50; Tumor Target Sites
78+
79+
> <Species>
80+
mouse
81+
82+
> <ActivityScore_CPDBAS_Mouse>
83+
0
84+
85+
> <TD50_Mouse_Note>
86+
no positive results
87+
88+
> <TargetSites_Mouse_Male>
89+
no positive results
90+
91+
> <TargetSites_Mouse_Female>
92+
no positive results
93+
94+
> <ActivityOutcome_CPDBAS_Mouse>
95+
inactive
96+
97+
> <ActivityOutcome_CPDBAS_SingleCellCall>
98+
inactive
99+
100+
> <ActivityOutcome_CPDBAS_MultiCellCall>
101+
inactive
102+
103+
> <ActivityOutcome_CPDBAS_MultiCellCall_Details>
104+
multisex inactive
105+
106+
> <ChemicalPage_URL>
107+
http://potency.berkeley.edu/chempages/ARSENIOUS%20OXIDE.html
108+
109+
$$$$

src/test/org/openscience/cdk/qsar/descriptors/molecular/BCUTDescriptorTest.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
*/
2020
package org.openscience.cdk.qsar.descriptors.molecular;
2121

22-
import java.io.InputStream;
23-
import java.util.List;
24-
2522
import org.junit.Assert;
2623
import org.junit.Before;
2724
import org.junit.Test;
@@ -33,11 +30,15 @@
3330
import org.openscience.cdk.interfaces.IMolecule;
3431
import org.openscience.cdk.io.HINReader;
3532
import org.openscience.cdk.io.ISimpleChemObjectReader;
33+
import org.openscience.cdk.io.MDLV2000Reader;
3634
import org.openscience.cdk.qsar.DescriptorValue;
3735
import org.openscience.cdk.qsar.result.DoubleArrayResult;
3836
import org.openscience.cdk.smiles.SmilesParser;
3937
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
4038

39+
import java.io.InputStream;
40+
import java.util.List;
41+
4142

4243
/**
4344
* TestSuite that runs all QSAR tests.
@@ -155,5 +156,26 @@ public void testBCUT() throws Exception {
155156
DoubleArrayResult result1 = (DoubleArrayResult) descriptor.calculate(mol).getValue();
156157
for (int i = 0; i < result1.length(); i++) Assert.assertTrue( result1.get(i) != Double.NaN);
157158
}
159+
160+
/**
161+
* @cdk.bug 3489559
162+
*/
163+
@Test
164+
public void testUndefinedValues() throws Exception {
165+
String filename = "data/mdl/burden_undefined.sdf";
166+
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
167+
ISimpleChemObjectReader reader = new MDLV2000Reader(ins);
168+
ChemFile content = reader.read(new ChemFile());
169+
List cList = ChemFileManipulator.getAllAtomContainers(content);
170+
IAtomContainer ac = (IAtomContainer) cList.get(0);
171+
172+
Assert.assertNotNull(ac);
173+
addExplicitHydrogens(ac);
174+
CDKHueckelAromaticityDetector.detectAromaticity(ac);
175+
176+
Exception e = descriptor.calculate(ac).getException();
177+
Assert.assertNotNull(e);
178+
Assert.assertEquals("Burden matrix has undefined values", e.getMessage());
179+
}
158180
}
159181

0 commit comments

Comments
 (0)