Skip to content
This repository has been archived by the owner on Jan 20, 2022. It is now read-only.

Commit

Permalink
Scorecards: Changes to remove "dataType" extension and move "external…
Browse files Browse the repository at this point in the history
…Class" extension from Characteristic to MiningField
  • Loading branch information
vinodkiran committed Sep 7, 2012
1 parent 490982f commit 8186951
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@
package org.drools.scorecards;

public enum ScorecardFormat {
XLS, PMML
XLS, CSV
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@

public abstract class AbstractDRLEmitter {

protected String formRuleName(String modelName, Characteristic c, Attribute scoreAttribute) {
protected String formRuleName(PMML pmmlDocument, String modelName, Characteristic c, Attribute scoreAttribute) {
StringBuilder sb = new StringBuilder();
sb.append(modelName).append("_").append(c.getName()).append("_");
String dataType = ScorecardPMMLUtils.getDataType(c);
String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c));
if (XLSKeywords.DATATYPE_NUMBER.equalsIgnoreCase(dataType)) {
if (scoreAttribute.getSimplePredicate() != null) {
sb.append(scoreAttribute.getSimplePredicate().getOperator()).append("_").append(scoreAttribute.getSimplePredicate().getValue());
Expand Down Expand Up @@ -74,24 +74,6 @@ protected Characteristics getCharacteristicsFromScorecard(Scorecard scorecard) {
return null;
}

protected String extractFieldFromCharacteristic(Characteristic c) {
String field = "";
Attribute scoreAttribute = c.getAttributes().get(0);
if (scoreAttribute.getSimplePredicate() != null) {
field = scoreAttribute.getSimplePredicate().getField();
} else if (scoreAttribute.getSimpleSetPredicate() != null) {
field = scoreAttribute.getSimpleSetPredicate().getField();
} else if (scoreAttribute.getCompoundPredicate() != null) {
Object predicate = scoreAttribute.getCompoundPredicate().getSimplePredicatesAndCompoundPredicatesAndSimpleSetPredicates().get(0);
if (predicate instanceof SimplePredicate){
field = ((SimplePredicate)predicate).getField();
} else if (predicate instanceof SimpleSetPredicate){
field = ((SimpleSetPredicate)predicate).getField();
}
}
return field;
}

protected void addGlobals(PMML pmml, org.drools.template.model.Package aPackage) {

}
Expand Down Expand Up @@ -149,7 +131,7 @@ protected void addDeclaredTypes(PMML pmml, Package aPackage) {
Scorecard scorecard = (Scorecard) obj;
stringBuilder.append("declare ").append(scorecard.getModelName().replaceAll(" ","")).append(" extends DroolsScorecard\n");

addDeclaredTypeContents(stringBuilder, scorecard);
addDeclaredTypeContents(pmml, stringBuilder, scorecard);

stringBuilder.append("end\n");
}
Expand All @@ -167,7 +149,7 @@ protected List<Rule> createRuleList(PMML pmmlDocument) {
for (org.dmg.pmml_4_1.Characteristic c : characteristics.getCharacteristics()) {
int attributePosition = 0;
for (org.dmg.pmml_4_1.Attribute scoreAttribute : c.getAttributes()) {
String name = formRuleName(scorecard.getModelName().replaceAll(" ",""), c, scoreAttribute);
String name = formRuleName(pmmlDocument, scorecard.getModelName().replaceAll(" ",""), c, scoreAttribute);
Rule rule = new Rule(name, 99, 1);
String desc = ScorecardPMMLUtils.getExtensionValue(scoreAttribute.getExtensions(), "description");
if (desc != null) {
Expand Down Expand Up @@ -209,7 +191,7 @@ protected void createInitialRule(List<Rule> ruleList, Scorecard scorecard) {
if (obj instanceof Characteristics){
Characteristics characteristics = (Characteristics)obj;
for (Characteristic characteristic : characteristics.getCharacteristics()){
String field = extractFieldFromCharacteristic(characteristic);
String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(characteristic);
Consequence consequence = new Consequence();
if (characteristic.getBaselineScore() == null || characteristic.getBaselineScore() == 0 ) {
consequence.setSnippet("insertLogical(new BaselineScore(\"" + objectClass+"\",\""+field + "\","+scorecard.getBaselineScore()+"));");
Expand Down Expand Up @@ -240,9 +222,10 @@ protected void populateLHS(Rule rule, PMML pmmlDocument, Scorecard scorecard, Ch
addLHSConditions(rule, pmmlDocument, scorecard, c, scoreAttribute);
}

protected void createFieldRestriction(Characteristic c, Attribute scoreAttribute, StringBuilder stringBuilder) {
protected void createFieldRestriction(PMML pmmlDocument, Characteristic c, Attribute scoreAttribute, StringBuilder stringBuilder) {
stringBuilder.append("(");
String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
//String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c));
if (XLSKeywords.DATATYPE_TEXT.equalsIgnoreCase(dataType)) {
if (scoreAttribute.getSimplePredicate() != null) {
SimplePredicate predicate = scoreAttribute.getSimplePredicate();
Expand Down Expand Up @@ -336,7 +319,7 @@ protected void populateRHS(Rule rule, PMML pmmlDocument, Scorecard scorecard, Ch
String objectClass = scorecard.getModelName().replaceAll(" ", "");

String setter = "insertLogical(new PartialScore(\"";
String field = extractFieldFromCharacteristic(c);
String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);

stringBuilder.append(setter).append(objectClass).append("\",\"").append(field).append("\",").append(scoreAttribute.getPartialScore());
if (scorecard.isUseReasonCodes()){
Expand Down Expand Up @@ -387,7 +370,7 @@ protected void createSummationRules(List<Rule> ruleList, Scorecard scorecard) {
addAdditionalSummationConsequence(calcTotalRule, scorecard);
}

protected abstract void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard);
protected abstract void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard);
protected abstract void internalEmitDRL(PMML pmml, List<Rule> ruleList, Package aPackage);
protected abstract void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecard, Characteristic c, Attribute scoreAttribute);
protected abstract void addAdditionalReasonCodeConsequence(Rule rule, Scorecard scorecard);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.dmg.pmml_4_1.PMML;
import org.dmg.pmml_4_1.Scorecard;
import org.drools.scorecards.parser.xls.XLSKeywords;
import org.drools.scorecards.pmml.PMMLExtensionNames;
import org.drools.scorecards.pmml.ScorecardPMMLUtils;
import org.drools.template.model.Condition;
import org.drools.template.model.Consequence;
Expand All @@ -33,18 +32,19 @@

public class DeclaredTypesDRLEmitter extends AbstractDRLEmitter{

protected void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard) {
protected void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard) {
Characteristics characteristics = getCharacteristicsFromScorecard(scorecard);
for (org.dmg.pmml_4_1.Characteristic c : characteristics.getCharacteristics()) {
String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);
String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, field);
//String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
if (XLSKeywords.DATATYPE_TEXT.equalsIgnoreCase(dataType)) {
dataType = "String";
} else if (XLSKeywords.DATATYPE_NUMBER.equalsIgnoreCase(dataType)) {
dataType = "int";
} else if (XLSKeywords.DATATYPE_BOOLEAN.equalsIgnoreCase(dataType)) {
dataType = "boolean";
}
String field = extractFieldFromCharacteristic(c);
stringBuilder.append("\t").append(field).append(" : ").append(dataType).append("\n");
}
}
Expand All @@ -63,7 +63,7 @@ protected void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecar
String objectClass = scorecard.getModelName().replaceAll(" ", "");
stringBuilder.append(var).append(" : ").append(objectClass);

createFieldRestriction(c, scoreAttribute, stringBuilder);
createFieldRestriction(pmmlDocument, c, scoreAttribute, stringBuilder);

condition.setSnippet(stringBuilder.toString());
rule.addCondition(condition);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import org.dmg.pmml_4_1.Attribute;
import org.dmg.pmml_4_1.Characteristic;
import org.dmg.pmml_4_1.Extension;
import org.dmg.pmml_4_1.MiningField;
import org.dmg.pmml_4_1.MiningSchema;
import org.dmg.pmml_4_1.Output;
import org.dmg.pmml_4_1.OutputField;
import org.dmg.pmml_4_1.PMML;
Expand All @@ -35,7 +37,7 @@
public class ExternalModelDRLEmitter extends AbstractDRLEmitter {

@Override
protected void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard) {
protected void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard) {
//empty by design
}

Expand All @@ -46,12 +48,26 @@ protected void internalEmitDRL(PMML pmml, List<Rule> ruleList, Package aPackage)

@Override
protected void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecard, Characteristic c, Attribute scoreAttribute) {
Extension extension = ScorecardPMMLUtils.getExtension(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
Extension extension = null;
for (Object obj : scorecard.getExtensionsAndCharacteristicsAndMiningSchemas()){
if ( obj instanceof MiningSchema ) {
MiningSchema miningSchema = (MiningSchema)obj;
String fieldName = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);
for (MiningField miningField : miningSchema.getMiningFields() ){
if ( miningField.getName().equalsIgnoreCase(fieldName)) {
if (miningField.getExtensions().size() > 0 ) {
extension = miningField.getExtensions().get(0);
}
}
}
}
}
//Extension extension = ScorecardPMMLUtils.getExtension(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
if ( extension != null ) {
Condition condition = new Condition();
StringBuilder stringBuilder = new StringBuilder("$");
stringBuilder.append(c.getName()).append(" : ").append(extension.getValue());
createFieldRestriction(c, scoreAttribute, stringBuilder);
createFieldRestriction(pmmlDocument, c, scoreAttribute, stringBuilder);
condition.setSnippet(stringBuilder.toString());
rule.addCondition(condition);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,11 @@ private void setAdditionalExpectation(int currentRowCtr, int currentColCtr, Stri
attribute.getExtensions().add(extension);
addExpectation(r, currentColCtr+4, "reasonCode", attribute,null);
}
MiningField dataField = new MiningField();
dataField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_MISSING);
dataField.setUsageType(FIELDUSAGETYPE.ACTIVE);
miningSchema.getMiningFields().add(dataField);
addExpectation(currentRowCtr + 1, currentColCtr, "name", dataField, null);
MiningField miningField = new MiningField();
miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_MISSING);
miningField.setUsageType(FIELDUSAGETYPE.ACTIVE);
miningSchema.getMiningFields().add(miningField);
addExpectation(currentRowCtr + 1, currentColCtr, "name", miningField, null);
}

} else if (XLSKeywords.SCORECARD_CHARACTERISTIC_BIN_INITIALSCORE.equalsIgnoreCase(stringCellValue)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@

import org.dmg.pmml_4_1.Array;
import org.dmg.pmml_4_1.Attribute;
import org.dmg.pmml_4_1.Characteristic;
import org.dmg.pmml_4_1.Characteristics;
import org.dmg.pmml_4_1.CompoundPredicate;
import org.dmg.pmml_4_1.DATATYPE;
import org.dmg.pmml_4_1.DataDictionary;
import org.dmg.pmml_4_1.DataField;
import org.dmg.pmml_4_1.Extension;
import org.dmg.pmml_4_1.Header;
import org.dmg.pmml_4_1.MiningField;
import org.dmg.pmml_4_1.MiningSchema;
import org.dmg.pmml_4_1.OPTYPE;
import org.dmg.pmml_4_1.Output;
import org.dmg.pmml_4_1.OutputField;
Expand All @@ -53,6 +56,7 @@ public PMML generateDocument(Scorecard pmmlScorecard) {

//second add additional elements to scorecard
createAndSetOutput(pmmlScorecard);
repositionExternalClassExtensions(pmmlScorecard);

Extension scorecardPackage = ScorecardPMMLUtils.getExtension(pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas(), PMMLExtensionNames.SCORECARD_PACKAGE);
if ( scorecardPackage != null) {
Expand Down Expand Up @@ -80,6 +84,34 @@ public PMML generateDocument(Scorecard pmmlScorecard) {
return pmml;
}

private void repositionExternalClassExtensions(Scorecard pmmlScorecard) {
Characteristics characteristics = null;
for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
if ( obj instanceof Characteristics ) {
characteristics = (Characteristics) obj;
break;
}
}
for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
if ( obj instanceof MiningSchema ) {
MiningSchema schema = (MiningSchema)obj;
for (MiningField miningField : schema.getMiningFields()) {
String fieldName = miningField.getName();
for (Characteristic characteristic : characteristics.getCharacteristics()){
String characteristicName = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(characteristic);
if (fieldName.equalsIgnoreCase(characteristicName)){
Extension extension = ScorecardPMMLUtils.getExtension(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
if ( extension != null ) {
characteristic.getExtensions().remove(extension);
miningField.getExtensions().add(extension);
}
}
}
}
}
}
}

private void removeAttributeFieldExtension(Scorecard pmmlScorecard) {
for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
if (obj instanceof Characteristics) {
Expand Down Expand Up @@ -108,7 +140,8 @@ private void createAndSetDataDictionary(PMML pmml, Scorecard pmmlScorecard) {
for (org.dmg.pmml_4_1.Characteristic characteristic : characteristics.getCharacteristics()) {

DataField dataField = new DataField();
String dataType = ScorecardPMMLUtils.getExtensionValue(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
Extension dataTypeExtension = ScorecardPMMLUtils.getExtension(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
String dataType = dataTypeExtension.getValue();
String factType = ScorecardPMMLUtils.getExtensionValue(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_FACTTYPE);

if ( factType != null ){
Expand Down Expand Up @@ -139,6 +172,7 @@ private void createAndSetDataDictionary(PMML pmml, Scorecard pmmlScorecard) {
}
dataField.setName(field);
dataDictionary.getDataFields().add(dataField);
characteristic.getExtensions().remove(dataTypeExtension);
ctr++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,28 @@

import java.util.List;

import org.dmg.pmml_4_1.Attribute;
import org.dmg.pmml_4_1.Characteristic;
import org.dmg.pmml_4_1.DATATYPE;
import org.dmg.pmml_4_1.DataDictionary;
import org.dmg.pmml_4_1.DataField;
import org.dmg.pmml_4_1.Extension;
import org.dmg.pmml_4_1.PMML;
import org.dmg.pmml_4_1.Scorecard;
import org.dmg.pmml_4_1.SimplePredicate;
import org.dmg.pmml_4_1.SimpleSetPredicate;
import org.drools.scorecards.parser.xls.XLSKeywords;

public class ScorecardPMMLUtils {

public static String getDataType(org.dmg.pmml_4_1.Characteristic c) {
for (Extension extension : c.getExtensions()) {
if (PMMLExtensionNames.CHARACTERTISTIC_DATATYPE.equalsIgnoreCase(extension.getName())) {
return extension.getValue();
}
}
return null;
}
// public static String getDataType(org.dmg.pmml_4_1.Characteristic c) {
// for (Extension extension : c.getExtensions()) {
// if (PMMLExtensionNames.CHARACTERTISTIC_DATATYPE.equalsIgnoreCase(extension.getName())) {
// return extension.getValue();
// }
// }
// return null;
// }

public static String getExtensionValue(List extensions, String extensionName) {
for (Object obj : extensions) {
Expand Down Expand Up @@ -64,4 +73,39 @@ public static Scorecard createScorecard(){
return scorecard;
}

public static String getDataType(PMML pmmlDocument, String fieldName) {
DataDictionary dataDictionary = pmmlDocument.getDataDictionary();
for (DataField dataField : dataDictionary.getDataFields()){
if (dataField.getName().equalsIgnoreCase(fieldName)) {
DATATYPE datatype = dataField.getDataType();
if (datatype == DATATYPE.DOUBLE) {
return XLSKeywords.DATATYPE_NUMBER;
} else if (datatype == DATATYPE.STRING) {
return XLSKeywords.DATATYPE_TEXT;
} else if (datatype == DATATYPE.BOOLEAN) {
return XLSKeywords.DATATYPE_BOOLEAN;
}
}
}
return null;
}

public static String extractFieldNameFromCharacteristic(Characteristic c) {
String field = "";
Attribute scoreAttribute = c.getAttributes().get(0);
if (scoreAttribute.getSimplePredicate() != null) {
field = scoreAttribute.getSimplePredicate().getField();
} else if (scoreAttribute.getSimpleSetPredicate() != null) {
field = scoreAttribute.getSimpleSetPredicate().getField();
} else if (scoreAttribute.getCompoundPredicate() != null) {
Object predicate = scoreAttribute.getCompoundPredicate().getSimplePredicatesAndCompoundPredicatesAndSimpleSetPredicates().get(0);
if (predicate instanceof SimplePredicate){
field = ((SimplePredicate)predicate).getField();
} else if (predicate instanceof SimpleSetPredicate){
field = ((SimpleSetPredicate)predicate).getField();
}
}
return field;
}

}
Loading

0 comments on commit 8186951

Please sign in to comment.