In [34]:
%maven org.tribuo:tribuo-all:4.1.0

In [61]:
%maven com.fasterxml.jackson.core:jackson-databind:2.12.5

In [62]:
%maven com.fasterxml.jackson.core:jackson-core:2.12.5

In [63]:
%maven com.fasterxml.jackson.core:jackson-annotations:2.12.5

In [38]:
import java.nio.file.Paths;
import java.nio.file.Files;

In [39]:
import org.tribuo.*;
import org.tribuo.evaluation.TrainTestSplitter;
import org.tribuo.data.csv.CSVLoader;
import org.tribuo.classification.*;
import org.tribuo.classification.evaluation.*;
import org.tribuo.classification.sgd.linear.LogisticRegressionTrainer;

In [40]:
import com.oracle.labs.mlrg.olcut.provenance.ProvenanceUtil;
import com.oracle.labs.mlrg.olcut.config.json.*;

In [42]:
var labelFactory = new LabelFactory();
var csvLoader = new CSVLoader<>(labelFactory);

In [46]:
var irisHeaders = new String[]{"sepalLength", "sepalWidth", "petalLength", "petalWidth", "species"};
var irisesSource = csvLoader.loadDataSource(Paths.get("/Users/amin/test/bezdekIris.data"),"species", irisHeaders);
var irisSplitter = new TrainTestSplitter<>(irisesSource, 0.7, 1L);

In [47]:
var trainingDataset = new MutableDataset<>(irisSplitter.getTrain());
var testingDataset = new MutableDataset<>(irisSplitter.getTest());

In [48]:
System.out.println(String.format("Training data size = %d, number of features = %d, number of classes = %d",trainingDataset.size(),trainingDataset.getFeatureMap().size(),trainingDataset.getOutputInfo().size()));

Training data size = 105, number of features = 4, number of classes = 3


In [49]:
System.out.println(String.format("Testing data size = %d, number of features = %d, number of classes = %d",testingDataset.size(),testingDataset.getFeatureMap().size(),testingDataset.getOutputInfo().size()));

Testing data size = 45, number of features = 4, number of classes = 3


In [51]:
Trainer<Label> trainer = new LogisticRegressionTrainer();
System.out.println(trainer.toString());

LinearSGDTrainer(objective=LogMulticlass,optimiser=AdaGrad(initialLearningRate=1.0,epsilon=0.1,initialValue=0.0),epochs=5,minibatchSize=1,seed=12345)


In [52]:
Model<Label> irisModel = trainer.train(trainingDataset);

In [53]:
var evaluator = new LabelEvaluator();
var evaluation = evaluator.evaluate(irisModel,testingDataset);
System.out.println(evaluation.toString());

Class                           n          tp          fn          fp      recall        prec          f1
Iris-versicolor                16          16           0           1       1.000       0.941       0.970
Iris-virginica                 15          14           1           0       0.933       1.000       0.966
Iris-setosa                    14          14           0           0       1.000       1.000       1.000
Total                          45          44           1           1
Accuracy                                                                    0.978
Micro Average                                                               0.978       0.978       0.978
Macro Average                                                               0.978       0.980       0.978
Balanced Error Rate                                                         0.022


In [54]:
System.out.println(evaluation.getConfusionMatrix().toString());

                   Iris-versicolor   Iris-virginica      Iris-setosa
Iris-versicolor                 16                0                0
Iris-virginica                   1               14                0
Iris-setosa                      0                0               14



In [55]:
var featureMap = irisModel.getFeatureIDMap();
for (var v : featureMap) {
    System.out.println(v.toString());
    System.out.println();
}

CategoricalFeature(name=petalLength,id=0,count=105,map={1.2=1, 6.9=1, 3.6=1, 3.0=1, 1.7=4, 4.9=4, 4.4=3, 3.5=2, 5.9=2, 5.4=1, 4.0=4, 1.4=12, 4.5=4, 5.0=2, 5.5=3, 6.7=2, 3.7=1, 1.9=1, 6.0=2, 5.2=1, 5.7=2, 4.2=2, 4.7=2, 4.8=4, 1.6=4, 5.8=2, 3.8=1, 6.3=1, 3.3=1, 1.0=1, 5.6=4, 5.1=5, 4.6=3, 4.1=2, 1.5=9, 1.3=4, 3.9=3, 6.6=1, 6.1=2})

CategoricalFeature(name=petalWidth,id=1,count=105,map={2.0=3, 0.5=1, 1.2=3, 0.3=6, 1.6=2, 0.1=3, 0.4=5, 2.5=3, 2.3=4, 1.7=2, 1.1=3, 2.1=4, 0.6=1, 1.4=6, 1.0=5, 2.4=1, 1.8=12, 0.2=20, 1.9=4, 1.5=7, 1.3=8, 2.2=2})

CategoricalFeature(name=sepalLength,id=2,count=105,map={6.9=3, 6.4=3, 7.4=1, 4.9=4, 4.4=1, 5.9=3, 5.4=5, 7.2=3, 7.7=3, 5.0=8, 6.2=2, 5.5=5, 6.7=7, 6.0=3, 5.2=2, 6.5=3, 5.7=4, 4.7=2, 4.8=3, 5.8=4, 5.3=1, 6.8=3, 6.3=5, 7.3=1, 5.6=6, 5.1=7, 4.6=4, 7.6=1, 7.1=1, 6.6=2, 6.1=5})

CategoricalFeature(name=sepalWidth,id=3,count=105,map={2.0=1, 2.8=10, 3.6=4, 2.3=3, 2.5=5, 3.1=8, 3.8=4, 3.0=19, 2.6=4, 4.4=1, 3.3=4, 3.5=4, 2.4=2, 3.2=10, 2.9=5, 3.7=3, 3.4=6, 2.2

In [56]:
var provenance = irisModel.getProvenance();
System.out.println(ProvenanceUtil.formattedProvenanceString(provenance.getDatasetProvenance().getSourceProvenance()));

TrainTestSplitter(
	class-name = org.tribuo.evaluation.TrainTestSplitter
	source = CSVLoader(
			class-name = org.tribuo.data.csv.CSVLoader
			outputFactory = LabelFactory(
					class-name = org.tribuo.classification.LabelFactory
				)
			response-name = species
			separator = ,
			quote = "
			path = file:/Users/amin/test/bezdekIris.data
			file-modified-time = 2021-09-03T20:17:57.772-07:00
			resource-hash = 0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC
		)
	train-proportion = 0.7
	seed = 1
	size = 150
	is-train = true
)


In [57]:
System.out.println(ProvenanceUtil.formattedProvenanceString(provenance.getTrainerProvenance()));

LogisticRegressionTrainer(
	class-name = org.tribuo.classification.sgd.linear.LogisticRegressionTrainer
	seed = 12345
	minibatchSize = 1
	shuffle = true
	epochs = 5
	optimiser = AdaGrad(
			class-name = org.tribuo.math.optimisers.AdaGrad
			epsilon = 0.1
			initialLearningRate = 1.0
			initialValue = 0.0
			host-short-name = StochasticGradientOptimiser
		)
	loggingInterval = 1000
	objective = LogMulticlass(
			class-name = org.tribuo.classification.sgd.objectives.LogMulticlass
			host-short-name = LabelObjective
		)
	tribuo-version = 4.1.0
	train-invocation-count = 0
	is-sequence = false
	host-short-name = Trainer
)


In [58]:
System.out.println(ProvenanceUtil.convertToMap(evaluation.getProvenance()));

{tribuo-version=4.1.0, dataset-provenance={num-features=4, num-examples=45, num-outputs=3, tribuo-version=4.1.0, datasource={train-proportion=0.7, seed=1, size=150, source={resource-hash=0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC, path=file:/Users/amin/test/bezdekIris.data, file-modified-time=2021-09-03T20:17:57.772-07:00, quote=", response-name=species, outputFactory={class-name=org.tribuo.classification.LabelFactory}, separator=,, class-name=org.tribuo.data.csv.CSVLoader}, class-name=org.tribuo.evaluation.TrainTestSplitter, is-train=false}, transformations=[], is-sequence=false, is-dense=true, class-name=org.tribuo.MutableDataset}, class-name=org.tribuo.provenance.EvaluationProvenance, model-provenance={instance-values={}, tribuo-version=4.1.0, java-version=11.0.12, trainer={seed=12345, tribuo-version=4.1.0, minibatchSize=1, train-invocation-count=0, is-sequence=false, shuffle=true, epochs=5, optimiser={epsilon=0.1, initialLearningRate=1.0, initialValue=0.0, hos

In [64]:
import com.fasterxml.jackson.databind.*;

CompilationException: 

In [69]:
File tmpFile = File.createTempFile("irisModel","ser");
try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(tmpFile))) {
    oos.writeObject(irisModel);
}

In [68]:
String filterPattern = Files.readAllLines(Paths.get("./jep-290-allowlist.txt")).get(0);
ObjectInputFilter filter = ObjectInputFilter.Config.createFilter(filterPattern);
Model<?> loadedModel;
try (ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(tmpFile)))) {
    ois.setObjectInputFilter(filter);
    loadedModel = (Model<?>) ois.readObject();
}

EvalException: ./jep-290-allowlist.txt