-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new package for weka libio. And tests
git-svn-id: https://cdk.svn.sourceforge.net/svnroot/cdk/trunk/cdk@6317 eb4e18e3-b210-0410-a6ab-dec725e4b171
- Loading branch information
miguelrojasch
committed
May 26, 2006
1 parent
bffad08
commit 5df2c32
Showing
13 changed files
with
419 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
data/arff/** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
weka.jar | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
cdk-core.jar | ||
cdk-data.jar | ||
cdk-libio-weka.jar | ||
cdk-test-core.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
data/arff/** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
junit.jar | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
weka.jar | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
@relation 'table1' | ||
@attribute 'EffectivePolarizabilityDescriptor' real | ||
@attribute 'SigmaElectronegativityDescriptor' real | ||
@attribute 'ProtonTotalPartialChargeDescriptor' real | ||
@attribute 'predicted valency' real | ||
@data | ||
0.39 , 9.62 , -0.15 ,12.74, | ||
1.64 , 9.77 , -0.13 ,11.3 , | ||
1.06 , 12.56 , -0.16 ,13.0 , | ||
1.26 , 10.51 , -0.05 ,12.6 , | ||
1.45 , 10.15 , -0.09 ,11.9 , | ||
2.27 , 9.8 , -0.13 ,11.01, | ||
1.89 , 10.54 , -0.05 ,12.5 , | ||
1.06 , 10.15 , 0.01 ,12.9 , | ||
2.59 , 9.81 , -0.13 ,10.88, | ||
2.74 , 9.81 , -0.13 ,10.84, | ||
2.79 , 9.85 , -0.12 ,11.21, | ||
0.39 , 9.14 , -0.11 ,11.67, | ||
1.64 , 9.28 , -0.1 ,10.53, | ||
1.06 , 10.34 , 0.03 ,12.0 , | ||
1.06 , 10.15 , 0.01 ,11.83, | ||
2.27 , 9.31 , -0.09 ,10.28, | ||
2.59 , 9.31 , -0.09 ,10.18, | ||
2.74 , 9.31 , -0.09 ,10.15, | ||
2.79 , 9.35 , -0.09 ,10.43, | ||
0.39 , 9.06 , -0.11 ,10.38, | ||
1.64 , 9.2 , -0.09 ,9.5 , |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@relation 'table1' | ||
@attribute 'EffectivePolarizabilityDescriptor' real | ||
@attribute 'SigmaElectronegativityDescriptor' real | ||
@attribute 'ProtonTotalPartialChargeDescriptor' real | ||
@attribute 'ProtonTotalPa' real | ||
@data | ||
0.39 , 9.06 , -0.11,0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
/* $RCSfile$ | ||
* $Author: egonw $ | ||
* $Date: 2006-05-01 10:43:42 +0200 (Mo, 01 Mai 2006) $ | ||
* $Revision: 6095 $ | ||
* | ||
* Copyright (C) 2003-2006 The Chemistry Development Kit (CDK) project | ||
* | ||
* Contact: cdk-devel@lists.sourceforge.net | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public | ||
* License along with this library; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
*/ | ||
package org.openscience.cdk.libio.weka; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.io.Reader; | ||
import java.io.StringReader; | ||
|
||
import weka.classifiers.Classifier; | ||
import weka.core.Instance; | ||
import weka.core.Instances; | ||
|
||
|
||
/** | ||
* <p>Weka class is a library which use the program WEKA: a Machine Learning Project.</p> | ||
* To inizalizate weka class is typically done like: <pre> | ||
* Classifier lr = new LinearRegression(); | ||
* weka.setDataset("/some/where/dataTraining.arff", lr); | ||
* String testARFF = "/some/where/dataTest.arff"; | ||
* double[] result = weka.getPrediction(testARFF); | ||
* </pre> | ||
* You have also the possibility to introduce directly values, done like: | ||
* <pre> | ||
* Classifier lr = new LinearRegression(); | ||
* String[] attrib = {"aX","bX","cX","PY" }; | ||
* int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC, }; | ||
* double[][] x = {{0.39,9.62 ,-0.15 }, | ||
* {1.64,9.77 ,-0.13}, | ||
* {1.06,12.56,-0.16}, | ||
* double[] y = {12.74,11.3 ,13.0}; | ||
* weka.setDataset(attrib, typAttrib, y, x, lr); | ||
* double[] testX = {0.39,9.06,-0.11}; | ||
* double resultY = weka.getPrediction(testX); | ||
* </pre> | ||
* @author Miguel Rojas | ||
* @cdk.created 2006-05-23 | ||
* @cdk.module libio-weka | ||
* @cdk.keyword weka,Machine Learning | ||
* @cdk.depends weka.jar | ||
*/ | ||
public class Weka { | ||
|
||
public static final int NUMERIC = 0; | ||
public static final int NOMINAL = 1; | ||
|
||
/** type of classifier*/ | ||
private Classifier classifier; | ||
|
||
private Instances labeled; | ||
/** | ||
* Constructor of the Weka | ||
*/ | ||
public Weka() { | ||
} | ||
/** | ||
* Set the file format arff to analize which contains the dataset and the type of classifier | ||
* | ||
* @param setDataset Path of the dataset file format arff to train | ||
* @param classifier Type of Classifier | ||
* @return The Instances value | ||
* @throws Exception | ||
*/ | ||
public Instances setDataset(String pathTable, Classifier classifier) throws Exception{ | ||
this.classifier = classifier; | ||
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathTable); | ||
Reader insr = new InputStreamReader(ins); | ||
Instances instances = new Instances(new BufferedReader(insr)); | ||
instances.setClassIndex(instances.numAttributes() - 1); | ||
|
||
labeled = new Instances(instances); | ||
classifier.buildClassifier(labeled); | ||
|
||
for (int i = 0; i < instances.numInstances(); i++) { | ||
double clsLabel = classifier.classifyInstance(instances.instance(i)); | ||
labeled.instance(i).setClassValue(clsLabel); | ||
} | ||
return labeled; | ||
} | ||
/** | ||
* | ||
* Set the array which contains the dataset and the type of classifier. This method | ||
* will be used for classifier which work with numerical values. | ||
* | ||
* @param attrib String with the attribut names | ||
* @param typAttrib Attribute type: NUMERICAL or NOMINAL. | ||
* @param x An array of independent variables. The observations should be in the rows | ||
* and the variables should be in the columns | ||
* @param y An array containing the dependent variable. It is possible numeric or string. | ||
* @param classifier Type of Classifier | ||
* @return The Instances value | ||
* @throws Exception | ||
*/ | ||
public Instances setDataset(String[] attrib, int[] typAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{ | ||
return setDataset(attrib, typAttrib ,null,y,x,classifier); | ||
} | ||
/** | ||
* | ||
* Set the array which contains the dataset and the type of classifier.This method | ||
* will be used for classifier which work with String values. | ||
* | ||
* @param attrib String with the attribut names. | ||
* @param typAttrib Attribute type: NUMERICAL or NOMINAL. | ||
* @param classAttrib String with the attribut class. | ||
* @param x An array of independent variables. The observations should be in the rows | ||
* and the variables should be in the columns | ||
* @param y An array containing the dependent variable. It is possible numeric or string. | ||
* @param classifier Type of classifier | ||
* @return The Instances value | ||
* @throws Exception | ||
*/ | ||
public Instances setDataset(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{ | ||
this.classifier = classifier; | ||
Reader reader = createAttributes(attrib,typAttrib,classAttrib,y,x); | ||
Instances instances = new Instances(reader); | ||
instances.setClassIndex(instances.numAttributes() - 1); | ||
labeled = new Instances(instances); | ||
classifier.buildClassifier(labeled); | ||
|
||
for (int i = 0; i < instances.numInstances(); i++) { | ||
double clsLabel = classifier.classifyInstance(instances.instance(i)); | ||
labeled.instance(i).setClassValue(clsLabel); | ||
} | ||
return labeled; | ||
} | ||
/** | ||
* Return of the predicted value | ||
* | ||
* @param value An array of independent variables which contians the values with whose to test | ||
* @return Result of the prediction | ||
* @throws Exception | ||
*/ | ||
public double getPrediction(double[] value) throws Exception{ | ||
Instance instance = new Instance(labeled.numAttributes()); | ||
instance.setDataset(labeled); | ||
for(int i = 0 ; i < value.length ; i++) | ||
instance.setValue(i, value[i]); | ||
// instance.setValue(value.length, 0.0); | ||
return classifier.classifyInstance(instance); | ||
} | ||
/** | ||
* Return of the predicted value | ||
* | ||
* @param pathARRF path of the file format arff which contians the values with whose to test. | ||
* @return Result of the prediction. | ||
* @throws Exception | ||
*/ | ||
public double[] getPrediction(String pathARFF) throws Exception{ | ||
InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathARFF); | ||
Reader insr = new InputStreamReader(ins); | ||
Instances test = new Instances(new BufferedReader(insr)); | ||
double[] result = new double[test.numInstances()]; | ||
for(int i = 0 ; i < test.numInstances(); i++){ | ||
result[i] = classifier.classifyInstance(test.instance(i)); | ||
} | ||
return result; | ||
} | ||
/** | ||
* create a Reader with necessary attributes to iniziate a Instances for weka. | ||
* | ||
* @param attrib String with the attribut class | ||
* @param typAttrib Attribute type: NOMINAL or NUMERIC. | ||
* @param y An array containing the independent variable. | ||
* @param x An array of dependent variables. | ||
* @return The Reader containing the attributes | ||
* @throws IOException | ||
*/ | ||
private Reader createAttributes(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x) throws IOException{ | ||
String string ="@relation table1 \n"; | ||
for(int i = 0; i < attrib.length ; i++){ | ||
string += ("@attribute "+attrib[i]); | ||
if(typAttrib[i] == NUMERIC) | ||
string += " numeric \n"; | ||
else if(typAttrib[i] == NOMINAL) | ||
string += " string \n"; | ||
} | ||
|
||
if(classAttrib != null){ | ||
string += "@attribute class "; | ||
string += "{"; | ||
for(int i = 0; i < classAttrib.length ; i++){ | ||
string += (classAttrib[i]); | ||
if(i != classAttrib.length -1) | ||
string += ","; | ||
} | ||
string += "}\n"; | ||
} | ||
|
||
string += ("@data "); | ||
if(x != null && y != null){ | ||
for(int j = 0 ; j < x.length; j++){ | ||
for(int i = 0 ; i < x[0].length ; i++){ | ||
string += x[j][i]+","; | ||
} | ||
string += y[j]+", \n"; | ||
} | ||
} | ||
|
||
|
||
Reader reader = new StringReader(string); | ||
return reader; | ||
} | ||
} |
Oops, something went wrong.