Skip to content

Commit 133f314

Browse files
rajarshiegonw
authored andcommitted
Added similarity method for Lingo's
1 parent f487c30 commit 133f314

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/* $Revision$ $Author$ $Date$
2+
*
3+
* Copyright (C) 2010 Rajarshi Guha <rajarshi.guha@gmail.com>
4+
*
5+
* Contact: rajarshi.guha@gmail.com
6+
*
7+
* This program is free software; you can redistribute it and/or
8+
* modify it under the terms of the GNU Lesser General Public License
9+
* as published by the Free Software Foundation; either version 2.1
10+
* of the License, or (at your option) any later version.
11+
* All I ask is that proper credit is given for my work, which includes
12+
* - but is not limited to - adding the above copyright notice to the beginning
13+
* of your source code files, and to any copyright notice that you may distribute
14+
* with programs based on this work.
15+
*
16+
* This program is distributed in the hope that it will be useful,
17+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
* GNU Lesser General Public License for more details.
20+
*
21+
* You should have received a copy of the GNU Lesser General Public License
22+
* along with this program; if not, write to the Free Software
23+
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24+
*/
25+
package org.openscience.cdk.similarity;
26+
27+
import org.openscience.cdk.annotations.TestClass;
28+
import org.openscience.cdk.annotations.TestMethod;
29+
30+
import java.util.Map;
31+
import java.util.TreeSet;
32+
33+
/**
34+
* A class to evaluate the similarity between two LINGO's as described in {@cdk.cite Vidal2005}.
35+
*
36+
* @author Rajarshi Guha
37+
* @cdk.githash
38+
* @cdk.keyword lingo
39+
* @cdk.keyword similarity, tanimoto
40+
* @cdk.module fingerprint
41+
*/
42+
@TestClass("org.openscience.cdk.similarity.LingoTest")
43+
public class LingoSimilarity {
44+
@TestMethod("testLingoSim")
45+
public static float calculate(Map<String, Integer> features1, Map<String, Integer> features2) {
46+
TreeSet<String> keys = new TreeSet<String>(features1.keySet());
47+
keys.addAll(features2.keySet());
48+
49+
float sum = 0.0f;
50+
for (String key : keys) {
51+
Integer c1 = features1.get(key);
52+
Integer c2 = features2.get(key);
53+
54+
c1 = c1 == null ? 0 : c1;
55+
c2 = c2 == null ? 0 : c2;
56+
sum += 1.0 - Math.abs(c1 - c2) / (c1 + c2);
57+
}
58+
59+
return sum / keys.size();
60+
}
61+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/* $Revision$ $Author$ $Date$
2+
*
3+
* Copyright (C) 2010 Rajarshi Guha <rajarshi.guha@gmail.com>
4+
*
5+
* Contact: rajarshi.guha@gmail.com
6+
*
7+
* This program is free software; you can redistribute it and/or
8+
* modify it under the terms of the GNU Lesser General Public License
9+
* as published by the Free Software Foundation; either version 2.1
10+
* of the License, or (at your option) any later version.
11+
* All I ask is that proper credit is given for my work, which includes
12+
* - but is not limited to - adding the above copyright notice to the beginning
13+
* of your source code files, and to any copyright notice that you may distribute
14+
* with programs based on this work.
15+
*
16+
* This program is distributed in the hope that it will be useful,
17+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19+
* GNU Lesser General Public License for more details.
20+
*
21+
* You should have received a copy of the GNU Lesser General Public License
22+
* along with this program; if not, write to the Free Software
23+
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24+
*/
25+
26+
package org.openscience.cdk.similarity;
27+
28+
import org.junit.Assert;
29+
import org.junit.Test;
30+
import org.openscience.cdk.CDKTestCase;
31+
import org.openscience.cdk.Molecule;
32+
import org.openscience.cdk.fingerprint.LingoFingerprinter;
33+
import org.openscience.cdk.templates.MoleculeFactory;
34+
35+
import java.util.Map;
36+
37+
/**
38+
* @cdk.module test-fingerprint
39+
*/
40+
public class LingoSimilarityTest extends CDKTestCase {
41+
42+
boolean standAlone = false;
43+
44+
@Test
45+
public void testLingoSim() throws Exception {
46+
Molecule mol1 = MoleculeFactory.makeIndole();
47+
Molecule mol2 = MoleculeFactory.makeIndole();
48+
LingoFingerprinter fingerprinter = new LingoFingerprinter();
49+
Map<String, Integer> bs1 = fingerprinter.getRawFingerprint(mol1);
50+
Map<String, Integer> bs2 = fingerprinter.getRawFingerprint(mol2);
51+
float lingosim = LingoSimilarity.calculate(bs1, bs2);
52+
if (standAlone) System.out.println("LingoSimilarity similarity: " + lingosim);
53+
if (!standAlone) Assert.assertEquals(1.0, lingosim, 0.01);
54+
}
55+
56+
57+
}

0 commit comments

Comments
 (0)