Skip to content

Commit

Permalink
KnowledgeBase is no longer serializable. Its serializable fields are …
Browse files Browse the repository at this point in the history
…stored individually into the Database. Restructuring the framework to remove all FindBug warnings.
  • Loading branch information
datumbox committed Jan 7, 2016
1 parent 082e6c4 commit a378241
Show file tree
Hide file tree
Showing 113 changed files with 540 additions and 541 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -48,6 +48,8 @@ Version 0.7.0-SNAPSHOT - Build 20160107
- Changed the way that single vars are stored with DatabaseConnectors. Multiple vars can now be stored. Also the save() and load() methods were renamed to saveObject() and loadObject().
- Changed names of constants and methods that were not compatible with Java's naming conventions.
- Changed the behaviour and the public methods of the DatabaseConnector interface. The dropDatabase() is replaced with a clear() method that deletes all the data but does not close the connection with the database.
- KnowledgeBase is no longer serializable. Its serializable fields are stored individually into the Database.
- Restructuring the framework to remove all FindBug warnings.

Version 0.6.1 - Build 20160102
------------------------------
Expand Down
3 changes: 3 additions & 0 deletions TODO.txt
@@ -1,6 +1,9 @@
CODE IMPROVEMENTS
=================

- Checkout some of the "Load of known null value" perhaps we can completely remove this parameter if it is always null. MatrixDataframe.newInstance param recordIdsReference.
- Checkout that the example project passes as is. Check that the hybridized is on by default.
- Update the example project to the latest version build and the config file.
- Add a predictRecord() method BaseMLmodel and refactor the code to be implemented by every algorithm.
- Add multithreading support.
- Update all maven plugins and dependencies to their latest versions.
Expand Down
Expand Up @@ -95,9 +95,9 @@ public BaseMLmodel.ValidationMetrics validate(Dataframe testData) {
protected void _fit(Dataframe trainingData) {

//get the training parameters
Modeler.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
Modeler.TrainingParameters trainingParameters = kb().getTrainingParameters();

DatabaseConfiguration dbConf = knowledgeBase.getDbConf();
DatabaseConfiguration dbConf = kb().getDbConf();

//transform the training dataset
Class dtClass = trainingParameters.getDataTransformerClass();
Expand Down Expand Up @@ -135,10 +135,10 @@ protected void _fit(Dataframe trainingData) {

private BaseMLmodel.ValidationMetrics evaluateData(Dataframe data, boolean estimateValidationMetrics) {
//ensure db loaded
knowledgeBase.load();
Modeler.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
kb().load();
Modeler.TrainingParameters trainingParameters = kb().getTrainingParameters();

DatabaseConfiguration dbConf = knowledgeBase.getDbConf();
DatabaseConfiguration dbConf = kb().getDbConf();

Class dtClass = trainingParameters.getDataTransformerClass();

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/datumbox/applications/nlp/CETR.java
Expand Up @@ -183,7 +183,7 @@ private List<Integer> selectRows(List<String> rows, Parameters parameters) {
dataset.add(new Record(xData, null));
}

G = null;
//G = null;
}
else {
//build dataset for Cluster Analysis by using only TTRlist info
Expand Down Expand Up @@ -262,7 +262,7 @@ private void performClustering(Dataframe dataset, int numberOfClusters) {
//Map<Integer, BaseMLclusterer.Cluster> clusters = instance.getClusters();

instance.delete(); //delete immediately the result
instance = null;
//instance = null;
}

private List<Double> calculateTTRlist(List<String> rows) {
Expand Down
32 changes: 16 additions & 16 deletions src/main/java/com/datumbox/applications/nlp/TextClassifier.java
Expand Up @@ -137,7 +137,7 @@ map should have as index the names of each class and as values the URIs
public void fit(Map<Object, URI> datasets, TrainingParameters trainingParameters) {
//build trainingDataset
TextExtractor textExtractor = TextExtractor.newInstance(trainingParameters.getTextExtractorClass(), trainingParameters.getTextExtractorParameters());
Dataframe trainingData = Dataframe.Builder.parseTextFiles(datasets, textExtractor, knowledgeBase.getDbConf());
Dataframe trainingData = Dataframe.Builder.parseTextFiles(datasets, textExtractor, kb().getDbConf());

fit(trainingData, trainingParameters);

Expand All @@ -153,7 +153,7 @@ public void predict(Dataframe testDataset) {
logger.info("predict()");

//ensure db loaded
knowledgeBase.load();
kb().load();

preprocessTestDataset(testDataset);
mlmodel.predict(testDataset);
Expand All @@ -168,18 +168,18 @@ public void predict(Dataframe testDataset) {
*/
public Dataframe predict(URI datasetURI) {
//ensure db loaded
knowledgeBase.load();
kb().load();

//create a dummy dataset map
Map<Object, URI> dataset = new HashMap<>();
dataset.put(null, datasetURI);

TextClassifier.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
TextClassifier.TrainingParameters trainingParameters = kb().getTrainingParameters();

TextExtractor textExtractor = TextExtractor.newInstance(trainingParameters.getTextExtractorClass(), trainingParameters.getTextExtractorParameters());

//build the testDataset
Dataframe testDataset = Dataframe.Builder.parseTextFiles(dataset, textExtractor, knowledgeBase.getDbConf());
Dataframe testDataset = Dataframe.Builder.parseTextFiles(dataset, textExtractor, kb().getDbConf());

predict(testDataset);

Expand All @@ -196,13 +196,13 @@ public Dataframe predict(URI datasetURI) {
*/
public Record predict(String text) {
//ensure db loaded
knowledgeBase.load();
kb().load();

TextClassifier.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
TextClassifier.TrainingParameters trainingParameters = kb().getTrainingParameters();

TextExtractor textExtractor = TextExtractor.newInstance(trainingParameters.getTextExtractorClass(), trainingParameters.getTextExtractorParameters());

Dataframe testDataset = new Dataframe(knowledgeBase.getDbConf());
Dataframe testDataset = new Dataframe(kb().getDbConf());

testDataset.add(new Record(new AssociativeArray(textExtractor.extract(StringCleaner.clear(text))), null));

Expand All @@ -226,7 +226,7 @@ public BaseMLmodel.ValidationMetrics validate(Dataframe testDataset) {
logger.info("validate()");

//ensure db loaded
knowledgeBase.load();
kb().load();

preprocessTestDataset(testDataset);
BaseMLmodel.ValidationMetrics vm = mlmodel.validate(testDataset);
Expand All @@ -245,14 +245,14 @@ map should have as index the names of each class and as values the URIs
*/
public BaseMLmodel.ValidationMetrics validate(Map<Object, URI> datasets) {
//ensure db loaded
knowledgeBase.load();
kb().load();

TextClassifier.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
TextClassifier.TrainingParameters trainingParameters = kb().getTrainingParameters();

TextExtractor textExtractor = TextExtractor.newInstance(trainingParameters.getTextExtractorClass(), trainingParameters.getTextExtractorParameters());

//build the testDataset
Dataframe testDataset = Dataframe.Builder.parseTextFiles(datasets, textExtractor, knowledgeBase.getDbConf());
Dataframe testDataset = Dataframe.Builder.parseTextFiles(datasets, textExtractor, kb().getDbConf());

BaseMLmodel.ValidationMetrics vm = validate(testDataset);

Expand All @@ -263,8 +263,8 @@ public BaseMLmodel.ValidationMetrics validate(Map<Object, URI> datasets) {

@Override
protected void _fit(Dataframe trainingDataset) {
TextClassifier.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
DatabaseConfiguration dbConf = knowledgeBase.getDbConf();
TextClassifier.TrainingParameters trainingParameters = kb().getTrainingParameters();
DatabaseConfiguration dbConf = kb().getDbConf();
Class dtClass = trainingParameters.getDataTransformerClass();

boolean transformData = (dtClass!=null);
Expand Down Expand Up @@ -298,8 +298,8 @@ protected void _fit(Dataframe trainingDataset) {
}

private void preprocessTestDataset(Dataframe testDataset) {
TextClassifier.TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();
DatabaseConfiguration dbConf = knowledgeBase.getDbConf();
TextClassifier.TrainingParameters trainingParameters = kb().getTrainingParameters();
DatabaseConfiguration dbConf = kb().getDbConf();

Class dtClass = trainingParameters.getDataTransformerClass();

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/datumbox/common/dataobjects/Dataframe.java
Expand Up @@ -189,8 +189,8 @@ public static Dataframe parseCSVFile(Reader reader, String yVariable, Map<String
private Map<Object, TypeInference.DataType> xDataTypes;
private Map<Integer, Record> records;

private final DatabaseConnector dbc; //declare transient and remove final if serializable
private final DatabaseConfiguration dbConf; //declare transient and remove final if serializable
private final DatabaseConnector dbc;
private final DatabaseConfiguration dbConf;

/**
* Public constructor of Dataframe.
Expand Down
Expand Up @@ -15,6 +15,8 @@
*/
package com.datumbox.common.dataobjects;

import java.util.Locale;

/**
* This class is responsible for inferring the internal DataType of the objects
* and for converting safely their values to a specified type.
Expand Down Expand Up @@ -65,7 +67,7 @@ protected static Object parse(String s, DataType dataType) {
}

if(dataType == DataType.BOOLEAN) {
switch (s.toLowerCase()) {
switch (s.toLowerCase(Locale.ENGLISH)) {
case "1":
case "true":
case "yes":
Expand Down
Expand Up @@ -20,7 +20,6 @@
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;
Expand Down Expand Up @@ -86,10 +85,7 @@ public <T extends Serializable> T loadObject(String name, Class<T> klass) {
try {
//read the stored serialized object
Map<String, Object> storedObjects = (Map<String, Object>)DeepCopy.deserialize(Files.readAllBytes(getDefaultPath()));
return (T) storedObjects.get(name);
}
catch (NoSuchFileException ex) {
return null;
return klass.cast(storedObjects.get(name));
}
catch (IOException ex) {
throw new UncheckedIOException(ex);
Expand Down Expand Up @@ -150,7 +146,7 @@ private Path getDefaultPath() {
//get the default filepath of the permanet db file
String outputFolder = this.dbConf.getOutputFolder();

Path filepath = null;
Path filepath;
if(outputFolder == null || outputFolder.isEmpty()) {
filepath= FileSystems.getDefault().getPath(database); //write them to the default accessible path
}
Expand Down
Expand Up @@ -90,7 +90,7 @@ public <T extends Serializable> T loadObject(String name, Class<T> klass) {
assertConnectionOpen();
DB db = openDB(DatabaseType.PRIMARY_DB);
Atomic.Var<T> atomicVar = db.getAtomicVar(name);
return atomicVar.get();
return klass.cast(atomicVar.get());
}

/** {@inheritDoc} */
Expand Down
93 changes: 39 additions & 54 deletions src/main/java/com/datumbox/common/utilities/PHPfunctions.java
Expand Up @@ -272,16 +272,7 @@ public static <T> String var_export(T object) {
* @return
*/
public static <T extends Comparable<T>> Integer[] asort(T[] array) {
//sort the indexes first
ArrayIndexComparator<T> comparator = new ArrayIndexComparator<>(array);
Integer[] indexes = comparator.createIndexArray();
Arrays.sort(indexes, comparator);

//sort the array based on the indexes
//sortArrayBasedOnIndex(array, indexes);
Arrays.sort(array);

return indexes;
return _asort(array, false);
}

/**
Expand All @@ -293,17 +284,50 @@ public static <T extends Comparable<T>> Integer[] asort(T[] array) {
* @return
*/
public static <T extends Comparable<T>> Integer[] arsort(T[] array) {
return _asort(array, true);
}

private static <T extends Comparable<T>> Integer[] _asort(T[] array, boolean reverse) {
//create an array with the indexes
Integer[] indexes = new Integer[array.length];
for (int i = 0; i < array.length; ++i) {
indexes[i] = i;
}

//sort the indexes first
ArrayIndexReverseComparator<T> comparator = new ArrayIndexReverseComparator<>(array);
Integer[] indexes = comparator.createIndexArray();
Arrays.sort(indexes, comparator);
Comparator<Integer> c = (Integer index1, Integer index2) -> array[index1].compareTo(array[index2]);
c = reverse?Collections.reverseOrder(c):c;
Arrays.sort(indexes, c);

//sort the array based on the indexes
Arrays.sort(array,Collections.reverseOrder());
//rearrenage the array based on the order of indexes
arrangeByIndex(array, indexes);

return indexes;
}

/**
* Rearranges the array based on the order of the provided indexes.
*
* @param <T>
* @param array
* @param indexes
*/
public static <T> void arrangeByIndex(T[] array, Integer[] indexes) {
if(array.length != indexes.length) {
throw new IllegalArgumentException("The length of the two arrays must match.");
}

//sort the array based on the indexes
for(int i=0;i<array.length;i++) {
int index = indexes[i];

//swap
T tmp = array[i];
array[i] = array[index];
array[index] = tmp;
}
}

/**
* Copies the elements of double array.
*
Expand Down Expand Up @@ -335,42 +359,3 @@ public static double[][] array_clone(double[][] a) {
}

}

/*
* Modified code found at:
* http://stackoverflow.com/questions/4859261/get-the-indices-of-an-array-after-sorting
*/
class ArrayIndexComparator<T extends Comparable<T>> implements Comparator<Integer> {
protected final T[] array;

protected ArrayIndexComparator(T[] array) {
this.array = array;
}

protected Integer[] createIndexArray() {
Integer[] indexes = new Integer[array.length];
for (int i = 0; i < array.length; ++i) {
indexes[i] = i;
}
return indexes;
}

/** {@inheritDoc} */
@Override
public int compare(Integer index1, Integer index2) {
return array[index1].compareTo(array[index2]);
}
}

class ArrayIndexReverseComparator<T extends Comparable<T>> extends ArrayIndexComparator<T> {

protected ArrayIndexReverseComparator(T[] array) {
super(array);
}

/** {@inheritDoc} */
@Override
public int compare(Integer index1, Integer index2) {
return array[index2].compareTo(array[index1]);
}
}
Expand Up @@ -175,8 +175,8 @@ public AssociativeArray estimateEfficiency(Map<Object, DeaRecord> id2DeaRecordMa
for(int i=0;i<conInput.length;++i) {
currentConstraintBody[conOutput.length+i]=-conInput[i];
}
conOutput=null;
conInput=null;
//conOutput=null;
//conInput=null;

//add the constrain on the list
constraints.add(new LPSolver.LPConstraint(currentConstraintBody, LpSolve.LE, 0.0)); //less than 0
Expand Down Expand Up @@ -211,18 +211,18 @@ public AssociativeArray estimateEfficiency(Map<Object, DeaRecord> id2DeaRecordMa
objectiveFunction[conOutput.length+i]=0.0; //set zeros on objective function for input
denominatorConstraintBody[conOutput.length+i]=conInput[i]; //set the input to the constraint
}
conInput=null;
conOutput=null;
//conInput=null;
//conOutput=null;

//set the denominator equal to 1
constraints.add(new LPSolver.LPConstraint(denominatorConstraintBody, LpSolve.EQ, 1.0));
}


/*
double[] lowBoundsOfVariables = null;
double[] upBoundsOfVariables = null;
boolean[] strictlyIntegerVariables = null;

/*
lowBoundsOfVariables = new double[totalColumns];
upBoundsOfVariables = new double[totalColumns];
strictlyIntegerVariables = new boolean[totalColumns];
Expand All @@ -236,7 +236,7 @@ public AssociativeArray estimateEfficiency(Map<Object, DeaRecord> id2DeaRecordMa
Integer scalingMode = LpSolve.SCALE_GEOMETRIC;

//RUN SOLVE
LPSolver.LPResult result = LPSolver.solve(objectiveFunction, constraints, lowBoundsOfVariables, upBoundsOfVariables, strictlyIntegerVariables, scalingMode);
LPSolver.LPResult result = LPSolver.solve(objectiveFunction, constraints, null, null, null, scalingMode);
Double objectiveValue = result.getObjectiveValue();

if(hasInput) {
Expand Down

0 comments on commit a378241

Please sign in to comment.