Skip to content

Commit

Permalink
new package for weka libio. And tests
Browse files Browse the repository at this point in the history
git-svn-id: https://cdk.svn.sourceforge.net/svnroot/cdk/trunk/cdk@6317 eb4e18e3-b210-0410-a6ab-dec725e4b171
  • Loading branch information
miguelrojasch committed May 26, 2006
1 parent bffad08 commit 5df2c32
Show file tree
Hide file tree
Showing 13 changed files with 419 additions and 1 deletion.
7 changes: 6 additions & 1 deletion build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@
<antcall target="compile-module"><param name="module" value="extra"/></antcall>
<antcall target="compile-module"><param name="module" value="smiles"/></antcall>
<antcall target="compile-module"><param name="module" value="libio-cml"/></antcall>
<antcall target="compile-module"><param name="module" value="libio-weka"/></antcall>
<antcall target="compile-module"><param name="module" value="pdb-cml"/></antcall>
<antcall target="compile-module"><param name="module" value="reaction"/></antcall>
<antcall target="compile-module"><param name="module" value="charges"/></antcall>
Expand All @@ -616,6 +617,7 @@
<antcall target="compile-module"><param name="module" value="test-io"/></antcall>
<antcall target="compile-module"><param name="module" value="test-extra"/></antcall>
<antcall target="compile-module"><param name="module" value="test-smiles"/></antcall>
<antcall target="compile-module"><param name="module" value="test-libio-weka"/></antcall>
<antcall target="compile-module"><param name="module" value="test-reaction"/></antcall>
<antcall target="compile-module"><param name="module" value="test-forcefield"/></antcall>
<antcall target="compile-module"><param name="module" value="test-valencycheck"/></antcall>
Expand Down Expand Up @@ -644,6 +646,7 @@
<includesfile name="${metainf}/io-jmol.libdepends"/>
<includesfile name="${metainf}/io.libdepends"/>
<includesfile name="${metainf}/libio-cml.libdepends"/>
<includesfile name="${metainf}/libio-weka.libdepends"/>
<includesfile name="${metainf}/nonotify.libdepends"/>
<includesfile name="${metainf}/pdb-cml.libdepends"/>
<includesfile name="${metainf}/pdb.libdepends"/>
Expand Down Expand Up @@ -675,6 +678,7 @@
<include name="cdk-qsar.jar"/>
<include name="cdk-qsar-cml.jar"/>
<include name="cdk-qsar-pdb.jar"/>
<include name="cdk-libio-weka.jar"/>
<include name="cdk-reaction.jar"/>
<include name="cdk-render.jar"/>
<include name="cdk-standard.jar"/>
Expand Down Expand Up @@ -854,7 +858,8 @@
<antcall target="test-module"><param name="module" value="forcefield"/></antcall>
<antcall target="test-module"><param name="module" value="extra"/></antcall>
<antcall target="test-module"><param name="module" value="valencycheck"/></antcall>
<antcall target="test-module"><param name="module" value="reaction"/></antcall>
<antcall target="test-module"><param name="module" value="reaction"/></antcall>
<antcall target="test-module"><param name="module" value="libio-weka"/></antcall>
<antcall target="test-module"><param name="module" value="smiles"/></antcall>
<antcall target="test-module"><param name="module" value="experimental"/></antcall>
<antcall target="test-module"><param name="module" value="qsar"/></antcall>
Expand Down
Binary file added jar/weka.jar
Binary file not shown.
2 changes: 2 additions & 0 deletions src/META-INF/libio-weka.cdkdepends
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


1 change: 1 addition & 0 deletions src/META-INF/libio-weka.datafiles
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/arff/**
2 changes: 2 additions & 0 deletions src/META-INF/libio-weka.libdepends
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
weka.jar

4 changes: 4 additions & 0 deletions src/META-INF/test-libio-weka.cdkdepends
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cdk-core.jar
cdk-data.jar
cdk-libio-weka.jar
cdk-test-core.jar
1 change: 1 addition & 0 deletions src/META-INF/test-libio-weka.datafiles
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/arff/**
2 changes: 2 additions & 0 deletions src/META-INF/test-libio-weka.devellibdepends
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
junit.jar

2 changes: 2 additions & 0 deletions src/META-INF/test-libio-weka.libdepends
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
weka.jar

27 changes: 27 additions & 0 deletions src/data/arff/Table1.arff
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
@relation 'table1'
@attribute 'EffectivePolarizabilityDescriptor' real
@attribute 'SigmaElectronegativityDescriptor' real
@attribute 'ProtonTotalPartialChargeDescriptor' real
@attribute 'predicted valency' real
@data
0.39 , 9.62 , -0.15 ,12.74,
1.64 , 9.77 , -0.13 ,11.3 ,
1.06 , 12.56 , -0.16 ,13.0 ,
1.26 , 10.51 , -0.05 ,12.6 ,
1.45 , 10.15 , -0.09 ,11.9 ,
2.27 , 9.8 , -0.13 ,11.01,
1.89 , 10.54 , -0.05 ,12.5 ,
1.06 , 10.15 , 0.01 ,12.9 ,
2.59 , 9.81 , -0.13 ,10.88,
2.74 , 9.81 , -0.13 ,10.84,
2.79 , 9.85 , -0.12 ,11.21,
0.39 , 9.14 , -0.11 ,11.67,
1.64 , 9.28 , -0.1 ,10.53,
1.06 , 10.34 , 0.03 ,12.0 ,
1.06 , 10.15 , 0.01 ,11.83,
2.27 , 9.31 , -0.09 ,10.28,
2.59 , 9.31 , -0.09 ,10.18,
2.74 , 9.31 , -0.09 ,10.15,
2.79 , 9.35 , -0.09 ,10.43,
0.39 , 9.06 , -0.11 ,10.38,
1.64 , 9.2 , -0.09 ,9.5 ,
7 changes: 7 additions & 0 deletions src/data/arff/Table2.arff
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@relation 'table1'
@attribute 'EffectivePolarizabilityDescriptor' real
@attribute 'SigmaElectronegativityDescriptor' real
@attribute 'ProtonTotalPartialChargeDescriptor' real
@attribute 'ProtonTotalPa' real
@data
0.39 , 9.06 , -0.11,0.0
226 changes: 226 additions & 0 deletions src/org/openscience/cdk/libio/weka/Weka.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/* $RCSfile$
* $Author: egonw $
* $Date: 2006-05-01 10:43:42 +0200 (Mo, 01 Mai 2006) $
* $Revision: 6095 $
*
* Copyright (C) 2003-2006 The Chemistry Development Kit (CDK) project
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.libio.weka;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;

import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;


/**
* <p>Weka class is a library which use the program WEKA: a Machine Learning Project.</p>
* To inizalizate weka class is typically done like: <pre>
* Classifier lr = new LinearRegression();
* weka.setDataset("/some/where/dataTraining.arff", lr);
* String testARFF = "/some/where/dataTest.arff";
* double[] result = weka.getPrediction(testARFF);
* </pre>
* You have also the possibility to introduce directly values, done like:
* <pre>
* Classifier lr = new LinearRegression();
* String[] attrib = {"aX","bX","cX","PY" };
* int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC, };
* double[][] x = {{0.39,9.62 ,-0.15 },
* {1.64,9.77 ,-0.13},
* {1.06,12.56,-0.16},
* double[] y = {12.74,11.3 ,13.0};
* weka.setDataset(attrib, typAttrib, y, x, lr);
* double[] testX = {0.39,9.06,-0.11};
* double resultY = weka.getPrediction(testX);
* </pre>
* @author Miguel Rojas
* @cdk.created 2006-05-23
* @cdk.module libio-weka
* @cdk.keyword weka,Machine Learning
* @cdk.depends weka.jar
*/
public class Weka {

public static final int NUMERIC = 0;
public static final int NOMINAL = 1;

/** type of classifier*/
private Classifier classifier;

private Instances labeled;
/**
* Constructor of the Weka
*/
public Weka() {
}
/**
* Set the file format arff to analize which contains the dataset and the type of classifier
*
* @param setDataset Path of the dataset file format arff to train
* @param classifier Type of Classifier
* @return The Instances value
* @throws Exception
*/
public Instances setDataset(String pathTable, Classifier classifier) throws Exception{
this.classifier = classifier;
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathTable);
Reader insr = new InputStreamReader(ins);
Instances instances = new Instances(new BufferedReader(insr));
instances.setClassIndex(instances.numAttributes() - 1);

labeled = new Instances(instances);
classifier.buildClassifier(labeled);

for (int i = 0; i < instances.numInstances(); i++) {
double clsLabel = classifier.classifyInstance(instances.instance(i));
labeled.instance(i).setClassValue(clsLabel);
}
return labeled;
}
/**
*
* Set the array which contains the dataset and the type of classifier. This method
* will be used for classifier which work with numerical values.
*
* @param attrib String with the attribut names
* @param typAttrib Attribute type: NUMERICAL or NOMINAL.
* @param x An array of independent variables. The observations should be in the rows
* and the variables should be in the columns
* @param y An array containing the dependent variable. It is possible numeric or string.
* @param classifier Type of Classifier
* @return The Instances value
* @throws Exception
*/
public Instances setDataset(String[] attrib, int[] typAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{
return setDataset(attrib, typAttrib ,null,y,x,classifier);
}
/**
*
* Set the array which contains the dataset and the type of classifier.This method
* will be used for classifier which work with String values.
*
* @param attrib String with the attribut names.
* @param typAttrib Attribute type: NUMERICAL or NOMINAL.
* @param classAttrib String with the attribut class.
* @param x An array of independent variables. The observations should be in the rows
* and the variables should be in the columns
* @param y An array containing the dependent variable. It is possible numeric or string.
* @param classifier Type of classifier
* @return The Instances value
* @throws Exception
*/
public Instances setDataset(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{
this.classifier = classifier;
Reader reader = createAttributes(attrib,typAttrib,classAttrib,y,x);
Instances instances = new Instances(reader);
instances.setClassIndex(instances.numAttributes() - 1);
labeled = new Instances(instances);
classifier.buildClassifier(labeled);

for (int i = 0; i < instances.numInstances(); i++) {
double clsLabel = classifier.classifyInstance(instances.instance(i));
labeled.instance(i).setClassValue(clsLabel);
}
return labeled;
}
/**
* Return of the predicted value
*
* @param value An array of independent variables which contians the values with whose to test
* @return Result of the prediction
* @throws Exception
*/
public double getPrediction(double[] value) throws Exception{
Instance instance = new Instance(labeled.numAttributes());
instance.setDataset(labeled);
for(int i = 0 ; i < value.length ; i++)
instance.setValue(i, value[i]);
// instance.setValue(value.length, 0.0);
return classifier.classifyInstance(instance);
}
/**
* Return of the predicted value
*
* @param pathARRF path of the file format arff which contians the values with whose to test.
* @return Result of the prediction.
* @throws Exception
*/
public double[] getPrediction(String pathARFF) throws Exception{
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathARFF);
Reader insr = new InputStreamReader(ins);
Instances test = new Instances(new BufferedReader(insr));
double[] result = new double[test.numInstances()];
for(int i = 0 ; i < test.numInstances(); i++){
result[i] = classifier.classifyInstance(test.instance(i));
}
return result;
}
/**
* create a Reader with necessary attributes to iniziate a Instances for weka.
*
* @param attrib String with the attribut class
* @param typAttrib Attribute type: NOMINAL or NUMERIC.
* @param y An array containing the independent variable.
* @param x An array of dependent variables.
* @return The Reader containing the attributes
* @throws IOException
*/
private Reader createAttributes(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x) throws IOException{
String string ="@relation table1 \n";
for(int i = 0; i < attrib.length ; i++){
string += ("@attribute "+attrib[i]);
if(typAttrib[i] == NUMERIC)
string += " numeric \n";
else if(typAttrib[i] == NOMINAL)
string += " string \n";
}

if(classAttrib != null){
string += "@attribute class ";
string += "{";
for(int i = 0; i < classAttrib.length ; i++){
string += (classAttrib[i]);
if(i != classAttrib.length -1)
string += ",";
}
string += "}\n";
}

string += ("@data ");
if(x != null && y != null){
for(int j = 0 ; j < x.length; j++){
for(int i = 0 ; i < x[0].length ; i++){
string += x[j][i]+",";
}
string += y[j]+", \n";
}
}


Reader reader = new StringReader(string);
return reader;
}
}
Loading

0 comments on commit 5df2c32

Please sign in to comment.