Skip to content

Commit

Permalink
Remove Guava from Framework, replace the permutations() method. Move …
Browse files Browse the repository at this point in the history
…logback dependency to tests and use slf4j in core. Refactor loggers and printing messages.
  • Loading branch information
datumbox committed Apr 22, 2015
1 parent c30deee commit 8e2a967
Show file tree
Hide file tree
Showing 18 changed files with 94 additions and 68 deletions.
40 changes: 26 additions & 14 deletions NOTICE
Expand Up @@ -4,16 +4,6 @@ Copyright (C) 2013 Vasilis Vryniotis <bbriniotis@datumbox.com>

The following libraries are included in packaged versions of this project:

* JUnit
* COPYRIGHT: Copyright 2002 JUnit
* LICENSE: http://www.opensource.org/licenses/cpl.php (Common Public License Version 1.0)
* HOMEPAGE: http://www.junit.org/

* ClasspathSuite
* COPYRIGHT: Copyright 2006 Johannes Link
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: https://github.com/takari/takari-cpsuite

* Apache Commons Lang
* COPYRIGHT: Copyright 2001 The Apache Software Foundation
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
Expand All @@ -29,10 +19,10 @@ The following libraries are included in packaged versions of this project:
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: http://commons.apache.org/proper/commons-csv/

* Guava
* COPYRIGHT: Copyright 2007 Google Inc.
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: http://code.google.com/p/guava-libraries/
* SLF4J API
* COPYRIGHT: Copyright 2004 QOS.ch
* LICENSE: http://www.slf4j.org/license.html (MIT License)
* HOMEPAGE: http://www.slf4j.org/

* LIBSVM
* COPYRIGHT: Copyright 2000 Chih-Chung Chang and Chih-Jen Lin
Expand All @@ -49,8 +39,30 @@ The following libraries are included in packaged versions of this project:
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: http://www.mapdb.org/


Code from the following software is included in this project:

* Guava
* COPYRIGHT: Copyright 2007 Google Inc.
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: http://code.google.com/p/guava-libraries/


The following libraries are required for the tests of this project:

* JUnit
* COPYRIGHT: Copyright 2002 JUnit
* LICENSE: http://www.opensource.org/licenses/cpl.php (Common Public License Version 1.0)
* HOMEPAGE: http://www.junit.org/

* ClasspathSuite
* COPYRIGHT: Copyright 2006 Johannes Link
* LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0)
* HOMEPAGE: https://github.com/takari/takari-cpsuite

* Logback
* COPYRIGHT: Copyright 1999 QOS.ch
* LICENSE: http://logback.qos.ch/license.html (Eclipse Public License v1.0 / GNU Lesser General Public License version 2.1)
* HOMEPAGE: http://logback.qos.ch/


43 changes: 25 additions & 18 deletions pom.xml
Expand Up @@ -69,6 +69,7 @@
</issueManagement>

<properties>
<!-- Build Plugins -->
<java-version>1.8</java-version>
<maven-compiler-plugin-version>3.3</maven-compiler-plugin-version>
<maven-javadoc-plugin-version>2.10.3</maven-javadoc-plugin-version>
Expand All @@ -79,17 +80,21 @@
<license-maven-plugin-version>2.10</license-maven-plugin-version>
<gpg-plugin-version>1.6</gpg-plugin-version>

<junit-version>4.12</junit-version>
<cpsuite-version>1.2.7</cpsuite-version>
<!-- Code Dependencies -->
<commons-lang-version>3.4</commons-lang-version>
<commons-math-version>3.5</commons-math-version>
<commons-csv-version>1.1</commons-csv-version>
<guava-version>18.0</guava-version>
<slf4j-api-version>1.7.12</slf4j-api-version>
<libsvm-version>3.18.1</libsvm-version>
<lpsolve-version>5.5.2.0</lpsolve-version>
<mapdb-version>1.0.7</mapdb-version>

<!-- Test Dependencies -->
<junit-version>4.12</junit-version>
<cpsuite-version>1.2.7</cpsuite-version>
<logback-classic-version>1.1.3</logback-classic-version>

<!-- Configuration -->
<gpg.keyname>7083A486</gpg.keyname>
<buildNumber>${maven.build.timestamp}</buildNumber>
<maven.build.timestamp.format>yyyyMMdd</maven.build.timestamp.format>
Expand Down Expand Up @@ -247,18 +252,6 @@
</build>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.takari.junit</groupId>
<artifactId>takari-cpsuite</artifactId>
<version>${cpsuite-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
Expand All @@ -275,9 +268,9 @@
<version>${commons-csv-version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava-version}</version>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j-api-version}</version>
</dependency>
<dependency>
<groupId>com.facebook.thirdparty</groupId>
Expand All @@ -294,10 +287,24 @@
<artifactId>mapdb</artifactId>
<version>${mapdb-version}</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.takari.junit</groupId>
<artifactId>takari-cpsuite</artifactId>
<version>${cpsuite-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>${logback-classic-version}</version>
<scope>test</scope>
</dependency>
</dependencies>

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/datumbox/common/dataobjects/Dataset.java
Expand Up @@ -57,7 +57,7 @@ public static Dataset parseTextFiles(Map<Object, URI> textFilesMap, TextExtracto
Object theClass = entry.getKey();
URI datasetURI = entry.getValue();

logger.info("Dataset Parsing " + theClass + " class");
logger.info("Dataset Parsing {} class", theClass);

try (final BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(datasetURI)), "UTF8"))) {
for (String line; (line = br.readLine()) != null;) {
Expand All @@ -79,7 +79,7 @@ public static Dataset parseCSVFile(Reader reader, String yVariable, Map<String,
logger.info("Parsing CSV file");

if (!headerDataTypes.containsKey(yVariable)) {
logger.warn("WARNING: The file is missing the response variable column " + yVariable + ".");
logger.warn("WARNING: The file is missing the response variable column {}.", yVariable);
}

TypeInference.DataType yDataType = headerDataTypes.get(yVariable);
Expand All @@ -99,7 +99,7 @@ public static Dataset parseCSVFile(Reader reader, String yVariable, Map<String,
for (CSVRecord row : parser) {

if (!row.isConsistent()) {
logger.warn("WARNING: Skipping row " + row.getRecordNumber() + " because its size does not match the header size.");
logger.warn("WARNING: Skipping row {} because its size does not match the header size.", row.getRecordNumber());
continue;
}

Expand Down
Expand Up @@ -215,7 +215,7 @@ private void IIS(Dataset trainingData, Map<List<Object>, Double> EpFj_observed,
DatabaseConnector dbc = knowledgeBase.getDbc();
for(int iteration=0;iteration<totalIterations;++iteration) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

Map<List<Object>, Double> tmp_EpFj_model = dbc.getBigMap("tmp_EpFj_model", true);
Collection<List<Object>> infiniteLambdaWeights = new ArrayList<>();
Expand Down
Expand Up @@ -218,7 +218,7 @@ protected void _fit(Dataset trainingData) {
DatabaseConnector dbc = knowledgeBase.getDbc();
for(int iteration=0;iteration<totalIterations;++iteration) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

Map<Object, Double> tmp_newThitas = dbc.getBigMap("tmp_newThitas", true);

Expand Down
Expand Up @@ -184,7 +184,7 @@ protected void _fit(Dataset trainingData) {
DatabaseConnector dbc = knowledgeBase.getDbc();
for(int iteration=0;iteration<totalIterations;++iteration) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

Map<List<Object>, Double> tmp_newThitas = dbc.getBigMap("tmp_newThitas", true);

Expand Down
Expand Up @@ -587,7 +587,7 @@ private void calculateClusters(Dataset trainingData) {

AssociativeArray clusterDistances = new AssociativeArray();
for(int iteration=0;iteration<maxIterations;++iteration) {
logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

//reset cluster points
for(Cluster c : clusterList.values()) {
Expand Down
Expand Up @@ -38,7 +38,7 @@
*/
public abstract class BaseTrainable<MP extends BaseModelParameters, TP extends BaseTrainingParameters, KB extends KnowledgeBase<MP, TP>> implements Trainable<MP, TP> {

protected final Logger logger;
protected final Logger logger = LoggerFactory.getLogger(getClass());

protected KB knowledgeBase;
protected String dbName;
Expand All @@ -64,8 +64,6 @@ protected BaseTrainable(String dbName, DatabaseConfiguration dbConf) {
}

this.dbName = dbName;

logger = LoggerFactory.getLogger(this.getClass());
}

protected BaseTrainable(String dbName, DatabaseConfiguration dbConf, Class<MP> mpClass, Class<TP> tpClass) {
Expand Down
Expand Up @@ -206,7 +206,7 @@ protected void _fit(Dataset trainingData) {
int t=0;
int retryCounter = 0;
while(t<totalWeakClassifiers) {
logger.debug("Training Weak learner "+t);
logger.debug("Training Weak learner {}", t);
//We sample a list of Ids based on their weights
FlatDataList sampledIDs = SRS.weightedSampling(observationWeights, n, true).toFlatDataList();

Expand Down
Expand Up @@ -280,7 +280,7 @@ private int collapsedGibbsSampling(Dataset dataset) {
int iteration=0;
while(iteration<maxIterations && noChangeMade==false) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

noChangeMade=true;
for(Integer rId : dataset) {
Expand Down
Expand Up @@ -17,7 +17,7 @@

import com.datumbox.common.persistentstorage.interfaces.DatabaseConfiguration;
import com.datumbox.common.persistentstorage.interfaces.DatabaseConnector;
import com.google.common.collect.Ordering;
import com.datumbox.common.utilities.SelectKth;
import java.util.Iterator;
import java.util.Map;
import org.slf4j.Logger;
Expand Down Expand Up @@ -56,8 +56,7 @@ public static void selectHighScoreFeatures(Map<Object, Double> featureScores, In
logger.debug("selectHighScoreFeatures()");

logger.debug("Estimating the minPermittedScore");
Double minPermittedScore=Ordering.<Double>natural().greatestOf(featureScores.values().iterator(), maxFeatures).get(maxFeatures-1);
//Double minPermittedScore = SelectKth.largest(featureScores.values().iterator(), maxFeatures);
Double minPermittedScore = SelectKth.largest(featureScores.values().iterator(), maxFeatures);

//remove any entry with score less than the minimum permitted one
logger.debug("Removing features with scores less than threshold");
Expand Down
Expand Up @@ -36,12 +36,12 @@
*/
public abstract class ModelValidation<MP extends BaseMLmodel.ModelParameters, TP extends BaseMLmodel.TrainingParameters, VM extends BaseMLmodel.ValidationMetrics> {

protected final Logger logger;
protected final Logger logger = LoggerFactory.getLogger(getClass());

public static final String DB_INDICATOR="Kfold";

public ModelValidation() {
logger = LoggerFactory.getLogger(this.getClass());

}

public VM kFoldCrossValidation(Dataset dataset, int k, String dbName, DatabaseConfiguration dbConf, Class<? extends BaseMLmodel> aClass, TP trainingParameters) {
Expand Down Expand Up @@ -69,7 +69,7 @@ public VM kFoldCrossValidation(Dataset dataset, int k, String dbName, DatabaseCo
List<VM> validationMetricsList = new LinkedList<>();
for(int fold=0;fold<k;++fold) {

logger.info("Kfold "+fold);
logger.info("Kfold {}", fold);

//as fold window we consider the part of the ids that are used for validation
FlatDataList foldTrainingIds = new FlatDataList(new ArrayList<>(n-foldSize));
Expand Down
Expand Up @@ -23,7 +23,6 @@
import com.datumbox.common.dataobjects.TypeInference;

import com.datumbox.framework.machinelearning.common.bases.featureselection.ScoreBasedFeatureSelection;
import java.util.Iterator;
import java.util.Map;


Expand Down
Expand Up @@ -109,7 +109,7 @@ protected void _fit(Dataset trainingData) {
DatabaseConnector dbc = knowledgeBase.getDbc();
for(int iteration=0;iteration<totalIterations;++iteration) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

Map<Object, Double> tmp_newThitas = dbc.getBigMap("tmp_newThitas", true);

Expand Down
Expand Up @@ -306,7 +306,7 @@ protected void _fit(Dataset trainingData) {
int iteration=0;
while(iteration<maxIterations) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);

int changedCounter = 0;
//collapsed gibbs sampler
Expand Down Expand Up @@ -387,7 +387,7 @@ protected void _fit(Dataset trainingData) {
}
++iteration;

logger.debug("Reassigned Records "+ changedCounter);
logger.debug("Reassigned Records {}", changedCounter);

if(changedCounter==0) {
break;
Expand Down Expand Up @@ -533,7 +533,7 @@ private ValidationMetrics predictAndValidate(Dataset newData) {
double perplexity = Double.MAX_VALUE;
for(int iteration=0;iteration<maxIterations;++iteration) {

logger.debug("Iteration "+iteration);
logger.debug("Iteration {}", iteration);


//collapsed gibbs sampler
Expand Down Expand Up @@ -632,7 +632,7 @@ private ValidationMetrics predictAndValidate(Dataset newData) {

perplexity=Math.exp(-perplexity/totalDatasetWords);

logger.debug("Reassigned Records "+ changedCounter +" - Perplexity: "+perplexity);
logger.debug("Reassigned Records {} - Perplexity: {}", changedCounter, perplexity);

if(changedCounter==0) {
break;
Expand Down

0 comments on commit 8e2a967

Please sign in to comment.