Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

removing the "dataType" extension from generated PMML and moving the externalClass extension from Characteristic to MiningField #9

Merged
merged 1 commit into from

2 participants

@vinodkiran

Now the PMML looks as shown here

https://gist.github.com/80ce6255fd2badf6cd62

@sotty sotty merged commit d5c1da9 into droolsjbpm:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Sep 7, 2012
  1. @vinodkiran

    Scorecards: Changes to remove "dataType" extension and move "external…

    vinodkiran authored
    …Class" extension from Characteristic to MiningField
This page is out of date. Refresh to see the latest.
View
2  drools-scorecards/src/main/java/org/drools/scorecards/ScorecardFormat.java
@@ -17,5 +17,5 @@
package org.drools.scorecards;
public enum ScorecardFormat {
- XLS, PMML
+ XLS, CSV
}
View
37 drools-scorecards/src/main/java/org/drools/scorecards/drl/AbstractDRLEmitter.java
@@ -37,10 +37,10 @@
public abstract class AbstractDRLEmitter {
- protected String formRuleName(String modelName, Characteristic c, Attribute scoreAttribute) {
+ protected String formRuleName(PMML pmmlDocument, String modelName, Characteristic c, Attribute scoreAttribute) {
StringBuilder sb = new StringBuilder();
sb.append(modelName).append("_").append(c.getName()).append("_");
- String dataType = ScorecardPMMLUtils.getDataType(c);
+ String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c));
if (XLSKeywords.DATATYPE_NUMBER.equalsIgnoreCase(dataType)) {
if (scoreAttribute.getSimplePredicate() != null) {
sb.append(scoreAttribute.getSimplePredicate().getOperator()).append("_").append(scoreAttribute.getSimplePredicate().getValue());
@@ -74,24 +74,6 @@ protected Characteristics getCharacteristicsFromScorecard(Scorecard scorecard) {
return null;
}
- protected String extractFieldFromCharacteristic(Characteristic c) {
- String field = "";
- Attribute scoreAttribute = c.getAttributes().get(0);
- if (scoreAttribute.getSimplePredicate() != null) {
- field = scoreAttribute.getSimplePredicate().getField();
- } else if (scoreAttribute.getSimpleSetPredicate() != null) {
- field = scoreAttribute.getSimpleSetPredicate().getField();
- } else if (scoreAttribute.getCompoundPredicate() != null) {
- Object predicate = scoreAttribute.getCompoundPredicate().getSimplePredicatesAndCompoundPredicatesAndSimpleSetPredicates().get(0);
- if (predicate instanceof SimplePredicate){
- field = ((SimplePredicate)predicate).getField();
- } else if (predicate instanceof SimpleSetPredicate){
- field = ((SimpleSetPredicate)predicate).getField();
- }
- }
- return field;
- }
-
protected void addGlobals(PMML pmml, org.drools.template.model.Package aPackage) {
}
@@ -149,7 +131,7 @@ protected void addDeclaredTypes(PMML pmml, Package aPackage) {
Scorecard scorecard = (Scorecard) obj;
stringBuilder.append("declare ").append(scorecard.getModelName().replaceAll(" ","")).append(" extends DroolsScorecard\n");
- addDeclaredTypeContents(stringBuilder, scorecard);
+ addDeclaredTypeContents(pmml, stringBuilder, scorecard);
stringBuilder.append("end\n");
}
@@ -167,7 +149,7 @@ protected void addDeclaredTypes(PMML pmml, Package aPackage) {
for (org.dmg.pmml_4_1.Characteristic c : characteristics.getCharacteristics()) {
int attributePosition = 0;
for (org.dmg.pmml_4_1.Attribute scoreAttribute : c.getAttributes()) {
- String name = formRuleName(scorecard.getModelName().replaceAll(" ",""), c, scoreAttribute);
+ String name = formRuleName(pmmlDocument, scorecard.getModelName().replaceAll(" ",""), c, scoreAttribute);
Rule rule = new Rule(name, 99, 1);
String desc = ScorecardPMMLUtils.getExtensionValue(scoreAttribute.getExtensions(), "description");
if (desc != null) {
@@ -209,7 +191,7 @@ protected void createInitialRule(List<Rule> ruleList, Scorecard scorecard) {
if (obj instanceof Characteristics){
Characteristics characteristics = (Characteristics)obj;
for (Characteristic characteristic : characteristics.getCharacteristics()){
- String field = extractFieldFromCharacteristic(characteristic);
+ String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(characteristic);
Consequence consequence = new Consequence();
if (characteristic.getBaselineScore() == null || characteristic.getBaselineScore() == 0 ) {
consequence.setSnippet("insertLogical(new BaselineScore(\"" + objectClass+"\",\""+field + "\","+scorecard.getBaselineScore()+"));");
@@ -240,9 +222,10 @@ protected void populateLHS(Rule rule, PMML pmmlDocument, Scorecard scorecard, Ch
addLHSConditions(rule, pmmlDocument, scorecard, c, scoreAttribute);
}
- protected void createFieldRestriction(Characteristic c, Attribute scoreAttribute, StringBuilder stringBuilder) {
+ protected void createFieldRestriction(PMML pmmlDocument, Characteristic c, Attribute scoreAttribute, StringBuilder stringBuilder) {
stringBuilder.append("(");
- String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
+ //String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
+ String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c));
if (XLSKeywords.DATATYPE_TEXT.equalsIgnoreCase(dataType)) {
if (scoreAttribute.getSimplePredicate() != null) {
SimplePredicate predicate = scoreAttribute.getSimplePredicate();
@@ -336,7 +319,7 @@ protected void populateRHS(Rule rule, PMML pmmlDocument, Scorecard scorecard, Ch
String objectClass = scorecard.getModelName().replaceAll(" ", "");
String setter = "insertLogical(new PartialScore(\"";
- String field = extractFieldFromCharacteristic(c);
+ String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);
stringBuilder.append(setter).append(objectClass).append("\",\"").append(field).append("\",").append(scoreAttribute.getPartialScore());
if (scorecard.isUseReasonCodes()){
@@ -387,7 +370,7 @@ protected void createSummationRules(List<Rule> ruleList, Scorecard scorecard) {
addAdditionalSummationConsequence(calcTotalRule, scorecard);
}
- protected abstract void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard);
+ protected abstract void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard);
protected abstract void internalEmitDRL(PMML pmml, List<Rule> ruleList, Package aPackage);
protected abstract void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecard, Characteristic c, Attribute scoreAttribute);
protected abstract void addAdditionalReasonCodeConsequence(Rule rule, Scorecard scorecard);
View
10 drools-scorecards/src/main/java/org/drools/scorecards/drl/DeclaredTypesDRLEmitter.java
@@ -24,7 +24,6 @@
import org.dmg.pmml_4_1.PMML;
import org.dmg.pmml_4_1.Scorecard;
import org.drools.scorecards.parser.xls.XLSKeywords;
-import org.drools.scorecards.pmml.PMMLExtensionNames;
import org.drools.scorecards.pmml.ScorecardPMMLUtils;
import org.drools.template.model.Condition;
import org.drools.template.model.Consequence;
@@ -33,10 +32,12 @@
public class DeclaredTypesDRLEmitter extends AbstractDRLEmitter{
- protected void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard) {
+ protected void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard) {
Characteristics characteristics = getCharacteristicsFromScorecard(scorecard);
for (org.dmg.pmml_4_1.Characteristic c : characteristics.getCharacteristics()) {
- String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
+ String field = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);
+ String dataType = ScorecardPMMLUtils.getDataType(pmmlDocument, field);
+ //String dataType = ScorecardPMMLUtils.getExtensionValue(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
if (XLSKeywords.DATATYPE_TEXT.equalsIgnoreCase(dataType)) {
dataType = "String";
} else if (XLSKeywords.DATATYPE_NUMBER.equalsIgnoreCase(dataType)) {
@@ -44,7 +45,6 @@ protected void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard sc
} else if (XLSKeywords.DATATYPE_BOOLEAN.equalsIgnoreCase(dataType)) {
dataType = "boolean";
}
- String field = extractFieldFromCharacteristic(c);
stringBuilder.append("\t").append(field).append(" : ").append(dataType).append("\n");
}
}
@@ -63,7 +63,7 @@ protected void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecar
String objectClass = scorecard.getModelName().replaceAll(" ", "");
stringBuilder.append(var).append(" : ").append(objectClass);
- createFieldRestriction(c, scoreAttribute, stringBuilder);
+ createFieldRestriction(pmmlDocument, c, scoreAttribute, stringBuilder);
condition.setSnippet(stringBuilder.toString());
rule.addCondition(condition);
View
22 drools-scorecards/src/main/java/org/drools/scorecards/drl/ExternalModelDRLEmitter.java
@@ -21,6 +21,8 @@
import org.dmg.pmml_4_1.Attribute;
import org.dmg.pmml_4_1.Characteristic;
import org.dmg.pmml_4_1.Extension;
+import org.dmg.pmml_4_1.MiningField;
+import org.dmg.pmml_4_1.MiningSchema;
import org.dmg.pmml_4_1.Output;
import org.dmg.pmml_4_1.OutputField;
import org.dmg.pmml_4_1.PMML;
@@ -35,7 +37,7 @@
public class ExternalModelDRLEmitter extends AbstractDRLEmitter {
@Override
- protected void addDeclaredTypeContents(StringBuilder stringBuilder, Scorecard scorecard) {
+ protected void addDeclaredTypeContents(PMML pmmlDocument, StringBuilder stringBuilder, Scorecard scorecard) {
//empty by design
}
@@ -46,12 +48,26 @@ protected void internalEmitDRL(PMML pmml, List<Rule> ruleList, Package aPackage)
@Override
protected void addLHSConditions(Rule rule, PMML pmmlDocument, Scorecard scorecard, Characteristic c, Attribute scoreAttribute) {
- Extension extension = ScorecardPMMLUtils.getExtension(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
+ Extension extension = null;
+ for (Object obj : scorecard.getExtensionsAndCharacteristicsAndMiningSchemas()){
+ if ( obj instanceof MiningSchema ) {
+ MiningSchema miningSchema = (MiningSchema)obj;
+ String fieldName = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(c);
+ for (MiningField miningField : miningSchema.getMiningFields() ){
+ if ( miningField.getName().equalsIgnoreCase(fieldName)) {
+ if (miningField.getExtensions().size() > 0 ) {
+ extension = miningField.getExtensions().get(0);
+ }
+ }
+ }
+ }
+ }
+ //Extension extension = ScorecardPMMLUtils.getExtension(c.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
if ( extension != null ) {
Condition condition = new Condition();
StringBuilder stringBuilder = new StringBuilder("$");
stringBuilder.append(c.getName()).append(" : ").append(extension.getValue());
- createFieldRestriction(c, scoreAttribute, stringBuilder);
+ createFieldRestriction(pmmlDocument, c, scoreAttribute, stringBuilder);
condition.setSnippet(stringBuilder.toString());
rule.addCondition(condition);
}
View
10 drools-scorecards/src/main/java/org/drools/scorecards/parser/xls/XLSEventDataCollector.java
@@ -241,11 +241,11 @@ private void setAdditionalExpectation(int currentRowCtr, int currentColCtr, Stri
attribute.getExtensions().add(extension);
addExpectation(r, currentColCtr+4, "reasonCode", attribute,null);
}
- MiningField dataField = new MiningField();
- dataField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_MISSING);
- dataField.setUsageType(FIELDUSAGETYPE.ACTIVE);
- miningSchema.getMiningFields().add(dataField);
- addExpectation(currentRowCtr + 1, currentColCtr, "name", dataField, null);
+ MiningField miningField = new MiningField();
+ miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_MISSING);
+ miningField.setUsageType(FIELDUSAGETYPE.ACTIVE);
+ miningSchema.getMiningFields().add(miningField);
+ addExpectation(currentRowCtr + 1, currentColCtr, "name", miningField, null);
}
} else if (XLSKeywords.SCORECARD_CHARACTERISTIC_BIN_INITIALSCORE.equalsIgnoreCase(stringCellValue)) {
View
36 drools-scorecards/src/main/java/org/drools/scorecards/pmml/PMMLGenerator.java
@@ -24,6 +24,7 @@
import org.dmg.pmml_4_1.Array;
import org.dmg.pmml_4_1.Attribute;
+import org.dmg.pmml_4_1.Characteristic;
import org.dmg.pmml_4_1.Characteristics;
import org.dmg.pmml_4_1.CompoundPredicate;
import org.dmg.pmml_4_1.DATATYPE;
@@ -31,6 +32,8 @@
import org.dmg.pmml_4_1.DataField;
import org.dmg.pmml_4_1.Extension;
import org.dmg.pmml_4_1.Header;
+import org.dmg.pmml_4_1.MiningField;
+import org.dmg.pmml_4_1.MiningSchema;
import org.dmg.pmml_4_1.OPTYPE;
import org.dmg.pmml_4_1.Output;
import org.dmg.pmml_4_1.OutputField;
@@ -53,6 +56,7 @@ public PMML generateDocument(Scorecard pmmlScorecard) {
//second add additional elements to scorecard
createAndSetOutput(pmmlScorecard);
+ repositionExternalClassExtensions(pmmlScorecard);
Extension scorecardPackage = ScorecardPMMLUtils.getExtension(pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas(), PMMLExtensionNames.SCORECARD_PACKAGE);
if ( scorecardPackage != null) {
@@ -80,6 +84,34 @@ public PMML generateDocument(Scorecard pmmlScorecard) {
return pmml;
}
+ private void repositionExternalClassExtensions(Scorecard pmmlScorecard) {
+ Characteristics characteristics = null;
+ for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
+ if ( obj instanceof Characteristics ) {
+ characteristics = (Characteristics) obj;
+ break;
+ }
+ }
+ for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
+ if ( obj instanceof MiningSchema ) {
+ MiningSchema schema = (MiningSchema)obj;
+ for (MiningField miningField : schema.getMiningFields()) {
+ String fieldName = miningField.getName();
+ for (Characteristic characteristic : characteristics.getCharacteristics()){
+ String characteristicName = ScorecardPMMLUtils.extractFieldNameFromCharacteristic(characteristic);
+ if (fieldName.equalsIgnoreCase(characteristicName)){
+ Extension extension = ScorecardPMMLUtils.getExtension(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_EXTERNAL_CLASS);
+ if ( extension != null ) {
+ characteristic.getExtensions().remove(extension);
+ miningField.getExtensions().add(extension);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
private void removeAttributeFieldExtension(Scorecard pmmlScorecard) {
for (Object obj : pmmlScorecard.getExtensionsAndCharacteristicsAndMiningSchemas()) {
if (obj instanceof Characteristics) {
@@ -108,7 +140,8 @@ private void createAndSetDataDictionary(PMML pmml, Scorecard pmmlScorecard) {
for (org.dmg.pmml_4_1.Characteristic characteristic : characteristics.getCharacteristics()) {
DataField dataField = new DataField();
- String dataType = ScorecardPMMLUtils.getExtensionValue(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
+ Extension dataTypeExtension = ScorecardPMMLUtils.getExtension(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_DATATYPE);
+ String dataType = dataTypeExtension.getValue();
String factType = ScorecardPMMLUtils.getExtensionValue(characteristic.getExtensions(), PMMLExtensionNames.CHARACTERTISTIC_FACTTYPE);
if ( factType != null ){
@@ -139,6 +172,7 @@ private void createAndSetDataDictionary(PMML pmml, Scorecard pmmlScorecard) {
}
dataField.setName(field);
dataDictionary.getDataFields().add(dataField);
+ characteristic.getExtensions().remove(dataTypeExtension);
ctr++;
}
}
View
60 drools-scorecards/src/main/java/org/drools/scorecards/pmml/ScorecardPMMLUtils.java
@@ -18,19 +18,28 @@
import java.util.List;
+import org.dmg.pmml_4_1.Attribute;
+import org.dmg.pmml_4_1.Characteristic;
+import org.dmg.pmml_4_1.DATATYPE;
+import org.dmg.pmml_4_1.DataDictionary;
+import org.dmg.pmml_4_1.DataField;
import org.dmg.pmml_4_1.Extension;
+import org.dmg.pmml_4_1.PMML;
import org.dmg.pmml_4_1.Scorecard;
+import org.dmg.pmml_4_1.SimplePredicate;
+import org.dmg.pmml_4_1.SimpleSetPredicate;
+import org.drools.scorecards.parser.xls.XLSKeywords;
public class ScorecardPMMLUtils {
- public static String getDataType(org.dmg.pmml_4_1.Characteristic c) {
- for (Extension extension : c.getExtensions()) {
- if (PMMLExtensionNames.CHARACTERTISTIC_DATATYPE.equalsIgnoreCase(extension.getName())) {
- return extension.getValue();
- }
- }
- return null;
- }
+// public static String getDataType(org.dmg.pmml_4_1.Characteristic c) {
+// for (Extension extension : c.getExtensions()) {
+// if (PMMLExtensionNames.CHARACTERTISTIC_DATATYPE.equalsIgnoreCase(extension.getName())) {
+// return extension.getValue();
+// }
+// }
+// return null;
+// }
public static String getExtensionValue(List extensions, String extensionName) {
for (Object obj : extensions) {
@@ -64,4 +73,39 @@ public static Scorecard createScorecard(){
return scorecard;
}
+ public static String getDataType(PMML pmmlDocument, String fieldName) {
+ DataDictionary dataDictionary = pmmlDocument.getDataDictionary();
+ for (DataField dataField : dataDictionary.getDataFields()){
+ if (dataField.getName().equalsIgnoreCase(fieldName)) {
+ DATATYPE datatype = dataField.getDataType();
+ if (datatype == DATATYPE.DOUBLE) {
+ return XLSKeywords.DATATYPE_NUMBER;
+ } else if (datatype == DATATYPE.STRING) {
+ return XLSKeywords.DATATYPE_TEXT;
+ } else if (datatype == DATATYPE.BOOLEAN) {
+ return XLSKeywords.DATATYPE_BOOLEAN;
+ }
+ }
+ }
+ return null;
+ }
+
+ public static String extractFieldNameFromCharacteristic(Characteristic c) {
+ String field = "";
+ Attribute scoreAttribute = c.getAttributes().get(0);
+ if (scoreAttribute.getSimplePredicate() != null) {
+ field = scoreAttribute.getSimplePredicate().getField();
+ } else if (scoreAttribute.getSimpleSetPredicate() != null) {
+ field = scoreAttribute.getSimpleSetPredicate().getField();
+ } else if (scoreAttribute.getCompoundPredicate() != null) {
+ Object predicate = scoreAttribute.getCompoundPredicate().getSimplePredicatesAndCompoundPredicatesAndSimpleSetPredicates().get(0);
+ if (predicate instanceof SimplePredicate){
+ field = ((SimplePredicate)predicate).getField();
+ } else if (predicate instanceof SimpleSetPredicate){
+ field = ((SimpleSetPredicate)predicate).getField();
+ }
+ }
+ return field;
+ }
+
}
View
5 drools-scorecards/src/test/java/org/drools/scorecards/ExternalObjectModelTest.java
@@ -37,6 +37,7 @@ public void setUp() throws Exception {
pmmlDocument = scorecardCompiler.getPMMLDocument();
assertNotNull(pmmlDocument);
drl = scorecardCompiler.getDRL(ScorecardCompiler.DrlType.EXTERNAL_OBJECT_MODEL);
+ //System.out.println(drl);
} else {
fail("failed to parse scoremodel Excel.");
}
@@ -59,7 +60,7 @@ public void testPMMLToString() throws Exception {
StringWriter stringWriter = new StringWriter();
marshaller.marshal(pmmlDocument, stringWriter);
assertTrue(stringWriter.toString().length() > 0);
- //System.out.println(stringWriter.toString());
+ System.out.println(stringWriter.toString());
}
@Test
@@ -93,7 +94,7 @@ public void testPMMLCustomOutput() throws Exception {
public void testDrlNoNull() throws Exception {
assertNotNull(drl);
assertTrue(drl.length() > 0);
- //System.out.println(drl);
+ System.out.println(drl);
}
@Test
Something went wrong with that request. Please try again.