Skip to content

Commit

Permalink
Adding more log messages
Browse files Browse the repository at this point in the history
  • Loading branch information
datumbox committed Apr 11, 2015
1 parent 7eff421 commit ab0b3ea
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
Expand Up @@ -28,6 +28,8 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;



Expand Down Expand Up @@ -127,7 +129,7 @@ protected void _fit(Dataset data) {
Map<Object, Double> tmp_featureCounts = dbc.getBigMap("tmp_featureCounts", true); //map which stores the counts of the features


//build the maps with teh feature statistics and counts
//build the maps with the feature statistics and counts
buildFeatureStatistics(data, tmp_classCounts, tmp_featureClassCounts, tmp_featureCounts);


Expand All @@ -151,6 +153,8 @@ protected void filterFeatures(Dataset newdata) {
}

private static void filterData(Dataset data, DatabaseConnector dbc, Map<Object, Double> featureScores, boolean ignoringNumericalFeatures) {
Logger logger = LoggerFactory.getLogger(CategoricalFeatureSelection.class);
logger.debug("filterData()");

Map<Object, Boolean> tmp_removedColumns = dbc.getBigMap("tmp_removedColumns", true);

Expand All @@ -168,7 +172,7 @@ private static void filterData(Dataset data, DatabaseConnector dbc, Map<Object,
}
}


logger.debug("Removing Columns");
data.removeColumns(tmp_removedColumns.keySet());

//Drop the temporary Collection
Expand All @@ -189,6 +193,8 @@ public static void removeRareFeatures(Dataset data, DatabaseConnector dbc, Integ
//feature selection. If called statically, the map should be instatiated
//just before the call to this method and dropped immediately after
//since it has no use.
Logger logger = LoggerFactory.getLogger(CategoricalFeatureSelection.class);
logger.debug("removeRareFeatures()");

if(!featureCounts.isEmpty()) {
throw new RuntimeException("The featureCounts map should be empty.");
Expand All @@ -197,6 +203,8 @@ public static void removeRareFeatures(Dataset data, DatabaseConnector dbc, Integ
Map<Object, Dataset.ColumnType> columnTypes = data.getColumns();

//find the featureCounts

logger.debug("Estimating featureCounts");
for(Integer rId : data) {
Record r = data.get(rId);
for(Map.Entry<Object, Object> entry : r.getX().entrySet()) {
Expand Down Expand Up @@ -227,6 +235,7 @@ public static void removeRareFeatures(Dataset data, DatabaseConnector dbc, Integ

//remove rare features
if(rareFeatureThreshold != null && rareFeatureThreshold>0) {
logger.debug("Removing rare features");
//remove features from the featureCounts list
Iterator<Map.Entry<Object, Double>> it = featureCounts.entrySet().iterator();
while(it.hasNext()) {
Expand All @@ -236,12 +245,13 @@ public static void removeRareFeatures(Dataset data, DatabaseConnector dbc, Integ
}
}

//then remove the features in dataset that does not appear in the list
//then remove the features in dataset that do not appear in the list
filterData(data, dbc, featureCounts, ignoringNumericalFeatures);
}
}

private void buildFeatureStatistics(Dataset data, Map<Object, Integer> classCounts, Map<List<Object>, Integer> featureClassCounts, Map<Object, Double> featureCounts) {
logger.debug("buildFeatureStatistics()");
TP trainingParameters = knowledgeBase.getTrainingParameters();
Integer rareFeatureThreshold = trainingParameters.getRareFeatureThreshold();
boolean ignoringNumericalFeatures = trainingParameters.isIgnoringNumericalFeatures();
Expand All @@ -251,7 +261,9 @@ private void buildFeatureStatistics(Dataset data, Map<Object, Integer> classCoun
//The map must be empty or else you get a RuntimeException
removeRareFeatures(data, knowledgeBase.getDbc(), rareFeatureThreshold, featureCounts, ignoringNumericalFeatures);

Map<Object, Dataset.ColumnType> columnTypes = data.getColumns();
//now find the classCounts and the featureClassCounts
logger.debug("Estimating classCounts and featureClassCounts");
for(Integer rId : data) {
Record r = data.get(rId);
Object theClass = r.getY();
Expand All @@ -268,7 +280,7 @@ private void buildFeatureStatistics(Dataset data, Map<Object, Integer> classCoun
Object feature = entry.getKey();

if(ignoringNumericalFeatures) { //if we ignore the numerical features, investigate further if we must skip the feature
if(data.getColumns().get(feature)==Dataset.ColumnType.NUMERICAL) { //is it numerical?
if(columnTypes.get(feature)==Dataset.ColumnType.NUMERICAL) { //is it numerical?
continue; //skip any further analysis
}
}
Expand Down
Expand Up @@ -20,6 +20,8 @@
import com.google.common.collect.Ordering;
import java.util.Iterator;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Abstract class which is the base of every Categorical Feature Selection algorithm.
Expand Down Expand Up @@ -50,10 +52,14 @@ protected ScoreBasedFeatureSelection(String dbName, DatabaseConfiguration dbConf


public static void selectHighScoreFeatures(Map<Object, Double> featureScores, Integer maxFeatures) {
Logger logger = LoggerFactory.getLogger(ScoreBasedFeatureSelection.class);
logger.debug("selectHighScoreFeatures()");

logger.debug("Estimating the minPermittedScore");
Double minPermittedScore=Ordering.<Double>natural().greatestOf(featureScores.values().iterator(), maxFeatures).get(maxFeatures-1);

//remove any entry with score less than the minimum permitted one
logger.debug("Removing features with scores less than threshold");
Iterator<Map.Entry<Object, Double>> it = featureScores.entrySet().iterator();
while(it.hasNext()) {
Map.Entry<Object, Double> entry = it.next();
Expand All @@ -65,6 +71,7 @@ public static void selectHighScoreFeatures(Map<Object, Double> featureScores, In
//if some extra features still exist (due to ties on the scores) remove some of those extra features
int numOfExtraFeatures = featureScores.size()-maxFeatures;
if(numOfExtraFeatures>0) {
logger.debug("Removing extra features caused by ties");
it = featureScores.entrySet().iterator();
while(it.hasNext() && numOfExtraFeatures>0) {
Map.Entry<Object, Double> entry = it.next();
Expand Down
Expand Up @@ -63,7 +63,7 @@ public ChisquareSelect(String dbName, DatabaseConfiguration dbConf) {

@Override
protected void estimateFeatureScores(Map<Object, Integer> classCounts, Map<List<Object>, Integer> featureClassCounts, Map<Object, Double> featureCounts) {

logger.debug("estimateFeatureScores()");
ModelParameters modelParameters = knowledgeBase.getModelParameters();
TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();

Expand Down
Expand Up @@ -51,6 +51,7 @@ public MutualInformation(String dbName, DatabaseConfiguration dbConf) {

@Override
protected void estimateFeatureScores(Map<Object, Integer> classCounts, Map<List<Object>, Integer> featureClassCounts, Map<Object, Double> featureCounts) {
logger.debug("estimateFeatureScores()");
ModelParameters modelParameters = knowledgeBase.getModelParameters();
TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();

Expand Down

0 comments on commit ab0b3ea

Please sign in to comment.