diff --git a/README.md b/README.md
index 2a1818d..f66f918 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ This library is in beta, and currently not all models are supported. The library
 | [Ridge](sklearn_pmml_model/linear_model)               | ✅<sup>2</sup> | ✅         | ✅<sup>3</sup>        |
 | [Lasso](sklearn_pmml_model/linear_model)               | ✅<sup>2</sup> | ✅         | ✅<sup>3</sup>        |
 | [ElasticNet](sklearn_pmml_model/linear_model)          | ✅<sup>2</sup> | ✅         | ✅                    |
-| [Gaussian Naive Bayes](sklearn_pmml_model/naive_bayes) | ✅             |            |                      |
+| [Gaussian Naive Bayes](sklearn_pmml_model/naive_bayes) | ✅             |            | ✅<sup>3</sup>        |
 
 <sup>1</sup> Categorical feature support using slightly modified internals, based on [scikit-learn#12866](https://github.com/scikit-learn/scikit-learn/pull/12866).
 
diff --git a/models/tree-iris.pmml b/models/tree-iris.pmml
index b8ace33..f2f2fbf 100644
--- a/models/tree-iris.pmml
+++ b/models/tree-iris.pmml
@@ -1,396 +1,111 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
+<PMML xmlns="http://www.dmg.org/PMML-4_4" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.4">
 	<Header>
-		<Application name="JPMML-SkLearn" version="1.5.3"/>
-		<Timestamp>2018-06-18T14:47:30Z</Timestamp>
+		<Application name="JPMML-SkLearn" version="1.6.18"/>
+		<Timestamp>2021-07-06T10:18:03Z</Timestamp>
 	</Header>
 	<MiningBuildTask>
-		<Extension>PMMLPipeline(steps=[('classifier', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
-			max_features=None, max_leaf_nodes=None,
-			min_impurity_decrease=0.0, min_impurity_split=None,
-			min_samples_leaf=1, min_samples_split=2,
-			min_weight_fraction_leaf=0.0, presort=False, random_state=1,
-			splitter='best'))])</Extension>
+		<Extension>PMMLPipeline(steps=[('classifier', DecisionTreeClassifier(random_state=1))])</Extension>
 	</MiningBuildTask>
 	<DataDictionary>
-		<DataField name="Class" optype="categorical" dataType="string">
-			<Value value="setosa"/>
-			<Value value="versicolor"/>
-			<Value value="virginica"/>
-		</DataField>
-		<DataField name="sepal length (cm)" optype="continuous" dataType="float">
-			<Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9"/>
-		</DataField>
-		<DataField name="sepal width (cm)" optype="continuous" dataType="float">
-			<Interval closure="closedClosed" leftMargin="2.0" rightMargin="4.4"/>
+		<DataField name="Class" optype="categorical" dataType="integer">
+			<Value value="0"/>
+			<Value value="1"/>
+			<Value value="2"/>
 		</DataField>
+		<DataField name="sepal length (cm)" optype="continuous" dataType="float"/>
+		<DataField name="petal length (cm)" optype="continuous" dataType="float"/>
+		<DataField name="petal width (cm)" optype="continuous" dataType="float"/>
 	</DataDictionary>
-	<TransformationDictionary>
-		<DerivedField name="double(sepal length (cm))" optype="continuous" dataType="double">
-			<FieldRef field="sepal length (cm)"/>
-		</DerivedField>
-		<DerivedField name="double(sepal width (cm))" optype="continuous" dataType="double">
-			<FieldRef field="sepal width (cm)"/>
-		</DerivedField>
-	</TransformationDictionary>
-	<TreeModel functionName="classification" splitCharacteristic="binarySplit">
+	<TransformationDictionary/>
+	<TreeModel functionName="classification" algorithmName="sklearn.tree._classes.DecisionTreeClassifier" missingValueStrategy="nullPrediction">
 		<MiningSchema>
 			<MiningField name="Class" usageType="target"/>
+			<MiningField name="petal width (cm)"/>
+			<MiningField name="petal length (cm)"/>
 			<MiningField name="sepal length (cm)"/>
-			<MiningField name="sepal width (cm)"/>
 		</MiningSchema>
 		<Output>
-			<OutputField name="probability(setosa)" optype="continuous" dataType="double" feature="probability" value="setosa"/>
-			<OutputField name="probability(versicolor)" optype="continuous" dataType="double" feature="probability" value="versicolor"/>
-			<OutputField name="probability(virginica)" optype="continuous" dataType="double" feature="probability" value="virginica"/>
+			<OutputField name="probability(0)" optype="continuous" dataType="double" feature="probability" value="0"/>
+			<OutputField name="probability(1)" optype="continuous" dataType="double" feature="probability" value="1"/>
+			<OutputField name="probability(2)" optype="continuous" dataType="double" feature="probability" value="2"/>
 		</Output>
-		<Node id="0">
+		<LocalTransformations>
+			<DerivedField name="double(petal width (cm))" optype="continuous" dataType="double">
+				<FieldRef field="petal width (cm)"/>
+			</DerivedField>
+			<DerivedField name="double(petal length (cm))" optype="continuous" dataType="double">
+				<FieldRef field="petal length (cm)"/>
+			</DerivedField>
+			<DerivedField name="double(sepal length (cm))" optype="continuous" dataType="double">
+				<FieldRef field="sepal length (cm)"/>
+			</DerivedField>
+		</LocalTransformations>
+		<Node>
 			<True/>
-			<Node id="1">
-				<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.449999809265137"/>
-				<Node id="2">
-					<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.75"/>
-					<Node id="3" score="setosa" recordCount="1.0">
-						<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="4.699999809265137"/>
-						<ScoreDistribution value="setosa" recordCount="1.0"/>
-						<ScoreDistribution value="versicolor" recordCount="0.0"/>
-						<ScoreDistribution value="virginica" recordCount="0.0"/>
+			<Node score="0" recordCount="50">
+				<SimplePredicate field="double(petal width (cm))" operator="lessOrEqual" value="0.800000011920929"/>
+				<ScoreDistribution value="0" recordCount="50"/>
+				<ScoreDistribution value="1" recordCount="0"/>
+				<ScoreDistribution value="2" recordCount="0"/>
+			</Node>
+			<Node>
+				<SimplePredicate field="double(petal width (cm))" operator="lessOrEqual" value="1.75"/>
+				<Node>
+					<SimplePredicate field="double(petal length (cm))" operator="lessOrEqual" value="4.950000047683716"/>
+					<Node score="1" recordCount="47">
+						<SimplePredicate field="double(petal width (cm))" operator="lessOrEqual" value="1.6500000357627869"/>
+						<ScoreDistribution value="0" recordCount="0"/>
+						<ScoreDistribution value="1" recordCount="47"/>
+						<ScoreDistribution value="2" recordCount="0"/>
 					</Node>
-					<Node id="4">
-						<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="4.699999809265137"/>
-						<Node id="5" score="versicolor" recordCount="1.0">
-							<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.450000047683716"/>
-							<ScoreDistribution value="setosa" recordCount="0.0"/>
-							<ScoreDistribution value="versicolor" recordCount="1.0"/>
-							<ScoreDistribution value="virginica" recordCount="0.0"/>
-						</Node>
-						<Node id="6">
-							<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.450000047683716"/>
-							<Node id="7" score="virginica" recordCount="1.0">
-								<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.0"/>
-								<ScoreDistribution value="setosa" recordCount="0.0"/>
-								<ScoreDistribution value="versicolor" recordCount="0.0"/>
-								<ScoreDistribution value="virginica" recordCount="1.0"/>
-							</Node>
-							<Node id="8" score="versicolor" recordCount="1.0">
-								<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.0"/>
-								<ScoreDistribution value="setosa" recordCount="0.0"/>
-								<ScoreDistribution value="versicolor" recordCount="1.0"/>
-								<ScoreDistribution value="virginica" recordCount="0.0"/>
-							</Node>
-						</Node>
+					<Node score="2" recordCount="1">
+						<True/>
+						<ScoreDistribution value="0" recordCount="0"/>
+						<ScoreDistribution value="1" recordCount="0"/>
+						<ScoreDistribution value="2" recordCount="1"/>
 					</Node>
 				</Node>
-				<Node id="9">
-					<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.75"/>
-					<Node id="10" score="setosa" recordCount="25.0">
-						<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.350000381469727"/>
-						<ScoreDistribution value="setosa" recordCount="25.0"/>
-						<ScoreDistribution value="versicolor" recordCount="0.0"/>
-						<ScoreDistribution value="virginica" recordCount="0.0"/>
-					</Node>
-					<Node id="11">
-						<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.350000381469727"/>
-						<Node id="12" score="versicolor" recordCount="1.0">
-							<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.3499999046325684"/>
-							<ScoreDistribution value="setosa" recordCount="0.0"/>
-							<ScoreDistribution value="versicolor" recordCount="1.0"/>
-							<ScoreDistribution value="virginica" recordCount="0.0"/>
-						</Node>
-						<Node id="13" score="setosa" recordCount="1.0">
-							<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.3499999046325684"/>
-							<ScoreDistribution value="setosa" recordCount="1.0"/>
-							<ScoreDistribution value="versicolor" recordCount="0.0"/>
-							<ScoreDistribution value="virginica" recordCount="0.0"/>
-						</Node>
-					</Node>
+				<Node score="2" recordCount="3">
+					<SimplePredicate field="double(petal width (cm))" operator="lessOrEqual" value="1.550000011920929"/>
+					<ScoreDistribution value="0" recordCount="0"/>
+					<ScoreDistribution value="1" recordCount="0"/>
+					<ScoreDistribution value="2" recordCount="3"/>
+				</Node>
+				<Node score="1" recordCount="2">
+					<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.949999809265137"/>
+					<ScoreDistribution value="0" recordCount="0"/>
+					<ScoreDistribution value="1" recordCount="2"/>
+					<ScoreDistribution value="2" recordCount="0"/>
+				</Node>
+				<Node score="2" recordCount="1">
+					<True/>
+					<ScoreDistribution value="0" recordCount="0"/>
+					<ScoreDistribution value="1" recordCount="0"/>
+					<ScoreDistribution value="2" recordCount="1"/>
 				</Node>
 			</Node>
-			<Node id="14">
-				<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.449999809265137"/>
-				<Node id="15">
-					<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.149999618530273"/>
-					<Node id="16">
-						<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.5999999046325684"/>
-						<Node id="17" score="versicolor" recordCount="7.0">
-							<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.649999618530273"/>
-							<ScoreDistribution value="setosa" recordCount="0.0"/>
-							<ScoreDistribution value="versicolor" recordCount="7.0"/>
-							<ScoreDistribution value="virginica" recordCount="0.0"/>
-						</Node>
-						<Node id="18">
-							<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.649999618530273"/>
-							<Node id="19" score="virginica" recordCount="1.0">
-								<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.549999952316284"/>
-								<ScoreDistribution value="setosa" recordCount="0.0"/>
-								<ScoreDistribution value="versicolor" recordCount="0.0"/>
-								<ScoreDistribution value="virginica" recordCount="1.0"/>
-							</Node>
-							<Node id="20">
-								<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.549999952316284"/>
-								<Node id="21">
-									<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.75"/>
-									<Node id="22">
-										<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.050000190734863"/>
-										<Node id="23" score="versicolor" recordCount="2.0">
-											<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.6500000953674316"/>
-											<ScoreDistribution value="setosa" recordCount="0.0"/>
-											<ScoreDistribution value="versicolor" recordCount="2.0"/>
-											<ScoreDistribution value="virginica" recordCount="0.0"/>
-										</Node>
-										<Node id="24">
-											<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.6500000953674316"/>
-											<Node id="25" score="versicolor" recordCount="2.0">
-												<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.900000095367432"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="1.0"/>
-												<ScoreDistribution value="virginica" recordCount="1.0"/>
-											</Node>
-											<Node id="26" score="versicolor" recordCount="1.0">
-												<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.900000095367432"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="1.0"/>
-												<ScoreDistribution value="virginica" recordCount="0.0"/>
-											</Node>
-										</Node>
-									</Node>
-									<Node id="27" score="virginica" recordCount="1.0">
-										<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.050000190734863"/>
-										<ScoreDistribution value="setosa" recordCount="0.0"/>
-										<ScoreDistribution value="versicolor" recordCount="0.0"/>
-										<ScoreDistribution value="virginica" recordCount="1.0"/>
-									</Node>
-								</Node>
-								<Node id="28">
-									<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.75"/>
-									<Node id="29" score="versicolor" recordCount="6.0">
-										<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.950000047683716"/>
-										<ScoreDistribution value="setosa" recordCount="0.0"/>
-										<ScoreDistribution value="versicolor" recordCount="6.0"/>
-										<ScoreDistribution value="virginica" recordCount="0.0"/>
-									</Node>
-									<Node id="30">
-										<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.950000047683716"/>
-										<Node id="31">
-											<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.0999999046325684"/>
-											<Node id="32" score="versicolor" recordCount="1.0">
-												<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.800000190734863"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="1.0"/>
-												<ScoreDistribution value="virginica" recordCount="0.0"/>
-											</Node>
-											<Node id="33">
-												<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="5.800000190734863"/>
-												<Node id="34" score="versicolor" recordCount="2.0">
-													<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.0"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="1.0"/>
-													<ScoreDistribution value="virginica" recordCount="1.0"/>
-												</Node>
-												<Node id="35" score="versicolor" recordCount="1.0">
-													<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.0"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="1.0"/>
-													<ScoreDistribution value="virginica" recordCount="0.0"/>
-												</Node>
-											</Node>
-										</Node>
-										<Node id="36" score="versicolor" recordCount="2.0">
-											<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.0999999046325684"/>
-											<ScoreDistribution value="setosa" recordCount="0.0"/>
-											<ScoreDistribution value="versicolor" recordCount="2.0"/>
-											<ScoreDistribution value="virginica" recordCount="0.0"/>
-										</Node>
-									</Node>
-								</Node>
-							</Node>
-						</Node>
-					</Node>
-					<Node id="37" score="setosa" recordCount="3.0">
-						<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.5999999046325684"/>
-						<ScoreDistribution value="setosa" recordCount="3.0"/>
-						<ScoreDistribution value="versicolor" recordCount="0.0"/>
-						<ScoreDistribution value="virginica" recordCount="0.0"/>
-					</Node>
+			<Node>
+				<SimplePredicate field="double(petal length (cm))" operator="lessOrEqual" value="4.8500001430511475"/>
+				<Node score="1" recordCount="1">
+					<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="5.950000047683716"/>
+					<ScoreDistribution value="0" recordCount="0"/>
+					<ScoreDistribution value="1" recordCount="1"/>
+					<ScoreDistribution value="2" recordCount="0"/>
 				</Node>
-				<Node id="38">
-					<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.149999618530273"/>
-					<Node id="39">
-						<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="7.050000190734863"/>
-						<Node id="40">
-							<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.550000190734863"/>
-							<Node id="41" score="versicolor" recordCount="1.0">
-								<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.3499999046325684"/>
-								<ScoreDistribution value="setosa" recordCount="0.0"/>
-								<ScoreDistribution value="versicolor" recordCount="1.0"/>
-								<ScoreDistribution value="virginica" recordCount="0.0"/>
-							</Node>
-							<Node id="42">
-								<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.3499999046325684"/>
-								<Node id="43" score="virginica" recordCount="3.0">
-									<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.75"/>
-									<ScoreDistribution value="setosa" recordCount="0.0"/>
-									<ScoreDistribution value="versicolor" recordCount="0.0"/>
-									<ScoreDistribution value="virginica" recordCount="3.0"/>
-								</Node>
-								<Node id="44">
-									<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.75"/>
-									<Node id="45">
-										<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.950000047683716"/>
-										<Node id="46">
-											<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.449999809265137"/>
-											<Node id="47" score="virginica" recordCount="2.0">
-												<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.8499999046325684"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="0.0"/>
-												<ScoreDistribution value="virginica" recordCount="2.0"/>
-											</Node>
-											<Node id="48">
-												<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.8499999046325684"/>
-												<Node id="49" score="versicolor" recordCount="1.0">
-													<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.25"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="1.0"/>
-													<ScoreDistribution value="virginica" recordCount="0.0"/>
-												</Node>
-												<Node id="50">
-													<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.25"/>
-													<Node id="51" score="virginica" recordCount="1.0">
-														<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.350000381469727"/>
-														<ScoreDistribution value="setosa" recordCount="0.0"/>
-														<ScoreDistribution value="versicolor" recordCount="0.0"/>
-														<ScoreDistribution value="virginica" recordCount="1.0"/>
-													</Node>
-													<Node id="52" score="versicolor" recordCount="1.0">
-														<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.350000381469727"/>
-														<ScoreDistribution value="setosa" recordCount="0.0"/>
-														<ScoreDistribution value="versicolor" recordCount="1.0"/>
-														<ScoreDistribution value="virginica" recordCount="0.0"/>
-													</Node>
-												</Node>
-											</Node>
-										</Node>
-										<Node id="53" score="versicolor" recordCount="1.0">
-											<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.449999809265137"/>
-											<ScoreDistribution value="setosa" recordCount="0.0"/>
-											<ScoreDistribution value="versicolor" recordCount="1.0"/>
-											<ScoreDistribution value="virginica" recordCount="0.0"/>
-										</Node>
-									</Node>
-									<Node id="54">
-										<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.950000047683716"/>
-										<Node id="55">
-											<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.449999809265137"/>
-											<Node id="56" score="virginica" recordCount="1.0">
-												<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.25"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="0.0"/>
-												<ScoreDistribution value="virginica" recordCount="1.0"/>
-											</Node>
-											<Node id="57">
-												<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.25"/>
-												<Node id="58" score="virginica" recordCount="1.0">
-													<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.1500000953674316"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="0.0"/>
-													<ScoreDistribution value="virginica" recordCount="1.0"/>
-												</Node>
-												<Node id="59">
-													<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.1500000953674316"/>
-													<Node id="60" score="versicolor" recordCount="2.0">
-														<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.350000381469727"/>
-														<ScoreDistribution value="setosa" recordCount="0.0"/>
-														<ScoreDistribution value="versicolor" recordCount="1.0"/>
-														<ScoreDistribution value="virginica" recordCount="1.0"/>
-													</Node>
-													<Node id="61" score="versicolor" recordCount="2.0">
-														<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.350000381469727"/>
-														<ScoreDistribution value="setosa" recordCount="0.0"/>
-														<ScoreDistribution value="versicolor" recordCount="1.0"/>
-														<ScoreDistribution value="virginica" recordCount="1.0"/>
-													</Node>
-												</Node>
-											</Node>
-										</Node>
-										<Node id="62" score="virginica" recordCount="2.0">
-											<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.449999809265137"/>
-											<ScoreDistribution value="setosa" recordCount="0.0"/>
-											<ScoreDistribution value="versicolor" recordCount="0.0"/>
-											<ScoreDistribution value="virginica" recordCount="2.0"/>
-										</Node>
-									</Node>
-								</Node>
-							</Node>
-						</Node>
-						<Node id="63">
-							<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.550000190734863"/>
-							<Node id="64">
-								<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.25"/>
-								<Node id="65" score="virginica" recordCount="1.0">
-									<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="2.6500000953674316"/>
-									<ScoreDistribution value="setosa" recordCount="0.0"/>
-									<ScoreDistribution value="versicolor" recordCount="0.0"/>
-									<ScoreDistribution value="virginica" recordCount="1.0"/>
-								</Node>
-								<Node id="66">
-									<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="2.6500000953674316"/>
-									<Node id="67" score="versicolor" recordCount="4.0">
-										<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.049999952316284"/>
-										<ScoreDistribution value="setosa" recordCount="0.0"/>
-										<ScoreDistribution value="versicolor" recordCount="4.0"/>
-										<ScoreDistribution value="virginica" recordCount="0.0"/>
-									</Node>
-									<Node id="68">
-										<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.049999952316284"/>
-										<Node id="69">
-											<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.949999809265137"/>
-											<Node id="70">
-												<SimplePredicate field="double(sepal width (cm))" operator="lessOrEqual" value="3.1500000953674316"/>
-												<Node id="71" score="versicolor" recordCount="3.0">
-													<SimplePredicate field="double(sepal length (cm))" operator="lessOrEqual" value="6.800000190734863"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="2.0"/>
-													<ScoreDistribution value="virginica" recordCount="1.0"/>
-												</Node>
-												<Node id="72" score="versicolor" recordCount="2.0">
-													<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.800000190734863"/>
-													<ScoreDistribution value="setosa" recordCount="0.0"/>
-													<ScoreDistribution value="versicolor" recordCount="1.0"/>
-													<ScoreDistribution value="virginica" recordCount="1.0"/>
-												</Node>
-											</Node>
-											<Node id="73" score="virginica" recordCount="1.0">
-												<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.1500000953674316"/>
-												<ScoreDistribution value="setosa" recordCount="0.0"/>
-												<ScoreDistribution value="versicolor" recordCount="0.0"/>
-												<ScoreDistribution value="virginica" recordCount="1.0"/>
-											</Node>
-										</Node>
-										<Node id="74" score="versicolor" recordCount="1.0">
-											<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="6.949999809265137"/>
-											<ScoreDistribution value="setosa" recordCount="0.0"/>
-											<ScoreDistribution value="versicolor" recordCount="1.0"/>
-											<ScoreDistribution value="virginica" recordCount="0.0"/>
-										</Node>
-									</Node>
-								</Node>
-							</Node>
-							<Node id="75" score="virginica" recordCount="2.0">
-								<SimplePredicate field="double(sepal width (cm))" operator="greaterThan" value="3.25"/>
-								<ScoreDistribution value="setosa" recordCount="0.0"/>
-								<ScoreDistribution value="versicolor" recordCount="0.0"/>
-								<ScoreDistribution value="virginica" recordCount="2.0"/>
-							</Node>
-						</Node>
-					</Node>
-					<Node id="76" score="virginica" recordCount="8.0">
-						<SimplePredicate field="double(sepal length (cm))" operator="greaterThan" value="7.050000190734863"/>
-						<ScoreDistribution value="setosa" recordCount="0.0"/>
-						<ScoreDistribution value="versicolor" recordCount="0.0"/>
-						<ScoreDistribution value="virginica" recordCount="8.0"/>
-					</Node>
+				<Node score="2" recordCount="2">
+					<True/>
+					<ScoreDistribution value="0" recordCount="0"/>
+					<ScoreDistribution value="1" recordCount="0"/>
+					<ScoreDistribution value="2" recordCount="2"/>
 				</Node>
 			</Node>
+			<Node score="2" recordCount="43">
+				<True/>
+				<ScoreDistribution value="0" recordCount="0"/>
+				<ScoreDistribution value="1" recordCount="0"/>
+				<ScoreDistribution value="2" recordCount="43"/>
+			</Node>
 		</Node>
 	</TreeModel>
 </PMML>
diff --git a/sklearn_pmml_model/base.py b/sklearn_pmml_model/base.py
index c50b7ac..c2dc373 100644
--- a/sklearn_pmml_model/base.py
+++ b/sklearn_pmml_model/base.py
@@ -1,11 +1,13 @@
 from sklearn.base import BaseEstimator
-from sklearn.preprocessing import LabelBinarizer
+from sklearn.preprocessing import LabelBinarizer, OneHotEncoder
+from sklearn.compose import ColumnTransformer
 from xml.etree import cElementTree as eTree
 from cached_property import cached_property
 from sklearn_pmml_model.datatypes import Category
 from collections import OrderedDict
 import datetime
 import numpy as np
+import pandas as pd
 
 
 class PMMLBaseEstimator(BaseEstimator):
@@ -137,13 +139,18 @@ def fit(self, x, y):
     raise Exception('Not supported.')
 
   def _prepare_data(self, X):
-    X = np.asarray(X)
+    pmml_features = [f for f,e in self.fields.items() if e is not self.target_field and e.tag == 'DataField']
 
-    for column, (index, field_type) in self.field_mapping.items():
-      if type(field_type) is Category and index is not None and type(X[0,index]) is str:
-        categories = [str(v) for v in field_type.categories]
-        categories += [c for c in np.unique(X[:,index]) if c not in categories]
-        X[:,index] = [categories.index(x) for x in X[:,index]]
+    if isinstance(X, pd.DataFrame):
+      X.columns = X.columns.map(str)
+
+      try:
+        X = X[pmml_features]
+      except KeyError:
+        raise Exception('The features in the input data do not match features expected by the PMML model.')
+    elif X.shape[1] != len(pmml_features):
+      raise Exception('The number of features in provided data does not match expected number of features in the PMML. '
+                      'Provide pandas.Dataframe, or provide data matching the DataFields in the PMML document.')
 
     return X
 
@@ -258,3 +265,60 @@ def findall(element, path):
   if element is None:
     return []
   return element.findall(path)
+
+
+class OneHotEncodingMixin:
+  """
+  Mixin class to automatically one-hot encode categorical variables.
+
+  """
+  def __init__(self):
+    # Setup a column transformer to encode categorical variables
+    target = self.target_field.get('name')
+    fields = [field for name, field in self.fields.items() if name != target]
+
+    def encoder_for(field):
+      if field.get('optype') != 'categorical':
+        return 'passthrough'
+
+      encoder = OneHotEncoder()
+      encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories])
+      encoder.drop_idx_ = np.array([None for x in encoder.categories_])
+      encoder._legacy_mode = False
+      return encoder
+
+    transformer = ColumnTransformer(
+      transformers=[
+        (field.get('name'), encoder_for(field), [self.field_mapping[field.get('name')][0]])
+        for field in fields
+        if field.tag == 'DataField'
+      ]
+    )
+
+    X = np.array([[0 for field in fields if field.tag == "DataField"]])
+    transformer._validate_transformers()
+    transformer._validate_column_callables(X)
+    transformer._validate_remainder(X)
+    transformer.transformers_ = transformer.transformers
+    transformer.sparse_output_ = False
+    transformer._feature_names_in = None
+
+    self.transformer = transformer
+
+  def _prepare_data(self, X):
+    X = super()._prepare_data(X)
+    return self.transformer.transform(X)
+
+
+class IntegerEncodingMixin:
+  def _prepare_data(self, X):
+    X = super()._prepare_data(X)
+    X = np.asarray(X)
+
+    for column, (index, field_type) in self.field_mapping.items():
+      if type(field_type) is Category and index is not None and type(X[0, index]) is str:
+        categories = [str(v) for v in field_type.categories]
+        categories += [c for c in np.unique(X[:, index]) if c not in categories]
+        X[:, index] = [categories.index(x) for x in X[:, index]]
+
+    return X
\ No newline at end of file
diff --git a/sklearn_pmml_model/ensemble/forest.py b/sklearn_pmml_model/ensemble/forest.py
index 771e1fe..e29f37b 100644
--- a/sklearn_pmml_model/ensemble/forest.py
+++ b/sklearn_pmml_model/ensemble/forest.py
@@ -1,11 +1,11 @@
 import numpy as np
 import warnings
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn_pmml_model.base import PMMLBaseClassifier, PMMLBaseRegressor
+from sklearn_pmml_model.base import PMMLBaseClassifier, PMMLBaseRegressor, IntegerEncodingMixin
 from sklearn_pmml_model.tree import get_tree
 
 
-class PMMLForestClassifier(PMMLBaseClassifier, RandomForestClassifier):
+class PMMLForestClassifier(IntegerEncodingMixin, PMMLBaseClassifier, RandomForestClassifier):
   """
   A random forest classifier.
 
@@ -92,7 +92,7 @@ def _more_tags(self):
     return RandomForestClassifier._more_tags(self)
 
 
-class PMMLForestRegressor(PMMLBaseRegressor, RandomForestRegressor):
+class PMMLForestRegressor(IntegerEncodingMixin, PMMLBaseRegressor, RandomForestRegressor):
   """
   A random forest regressor.
 
diff --git a/sklearn_pmml_model/ensemble/gb.py b/sklearn_pmml_model/ensemble/gb.py
index a1ab5ee..8cdd4cd 100644
--- a/sklearn_pmml_model/ensemble/gb.py
+++ b/sklearn_pmml_model/ensemble/gb.py
@@ -3,13 +3,13 @@
 import numpy as np
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, _gb_losses
-from sklearn_pmml_model.base import PMMLBaseClassifier, PMMLBaseRegressor
+from sklearn_pmml_model.base import PMMLBaseClassifier, PMMLBaseRegressor, IntegerEncodingMixin
 from sklearn_pmml_model.tree import get_tree
 from scipy.special import expit
 from ._gradient_boosting import predict_stages
 
 
-class PMMLGradientBoostingClassifier(PMMLBaseClassifier, GradientBoostingClassifier, ABC):
+class PMMLGradientBoostingClassifier(IntegerEncodingMixin, PMMLBaseClassifier, GradientBoostingClassifier, ABC):
   """
   Gradient Boosting for classification.
 
@@ -135,7 +135,7 @@ def _more_tags(self):
     return GradientBoostingClassifier._more_tags(self)
 
 
-class PMMLGradientBoostingRegressor(PMMLBaseRegressor, GradientBoostingRegressor, ABC):
+class PMMLGradientBoostingRegressor(IntegerEncodingMixin, PMMLBaseRegressor, GradientBoostingRegressor, ABC):
   """
   Gradient Boosting for regression.
 
diff --git a/sklearn_pmml_model/linear_model/base.py b/sklearn_pmml_model/linear_model/base.py
index 75870fd..6211a68 100644
--- a/sklearn_pmml_model/linear_model/base.py
+++ b/sklearn_pmml_model/linear_model/base.py
@@ -1,107 +1,9 @@
-from sklearn_pmml_model.base import PMMLBaseRegressor, PMMLBaseClassifier
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.compose import ColumnTransformer
+from sklearn_pmml_model.base import PMMLBaseRegressor, PMMLBaseClassifier, OneHotEncodingMixin
 import numpy as np
 from itertools import chain
 
 
-class PMMLLinearModel(PMMLBaseRegressor):
-  """
-  Abstract class for linear models.
-
-  """
-  def __init__(self, pmml):
-    PMMLBaseRegressor.__init__(self, pmml)
-
-    # Setup a column transformer to deal with categorical variables
-    target = self.target_field.get('name')
-    fields = [field for name, field in self.fields.items() if name != target]
-
-    def encoder_for(field):
-      if field.get('optype') != 'categorical':
-        return 'passthrough'
-
-      encoder = OneHotEncoder()
-      encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories])
-      encoder.drop_idx_ = np.array([None for x in encoder.categories_])
-      encoder._legacy_mode = False
-      return encoder
-
-    transformer = ColumnTransformer(
-      transformers=[
-        (field.get('name'), encoder_for(field), [self.field_mapping[field.get('name')][0]])
-        for field in fields
-        if field.tag == 'DataField'
-      ]
-    )
-
-    X = np.array([[0 for field in fields if field.tag == "DataField"]])
-    transformer._validate_transformers()
-    transformer._validate_column_callables(X)
-    transformer._validate_remainder(X)
-    transformer.transformers_ = transformer.transformers
-    transformer.sparse_output_ = False
-    transformer._feature_names_in = None
-
-    self.transformer = transformer
-
-  def _prepare_data(self, X):
-    """
-    Overrides the default data preparation operation by one-hot encoding
-    categorical variables.
-    """
-    return self.transformer.transform(X)
-
-
-class PMMLLinearClassifier(PMMLBaseClassifier):
-  """
-  Abstract class for linear models.
-
-  """
-  def __init__(self, pmml):
-    PMMLBaseClassifier.__init__(self, pmml)
-
-    # Setup a column transformer to deal with categorical variables
-    target = self.target_field.get('name')
-    fields = [field for name, field in self.fields.items() if name != target]
-
-    def encoder_for(field):
-      if field.get('optype') != 'categorical':
-        return 'passthrough'
-
-      encoder = OneHotEncoder()
-      encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories])
-      encoder.drop_idx_ = np.array([None for x in encoder.categories_])
-      encoder._legacy_mode = False
-      return encoder
-
-    transformer = ColumnTransformer(
-      transformers=[
-        (field.get('name'), encoder_for(field), [self.field_mapping[field.get('name')][0]])
-        for field in fields
-        if field.tag == 'DataField'
-      ]
-    )
-
-    X = np.array([[0 for field in fields if field.tag == "DataField"]])
-    transformer._validate_transformers()
-    transformer._validate_column_callables(X)
-    transformer._validate_remainder(X)
-    transformer.transformers_ = transformer.transformers
-    transformer.sparse_output_ = False
-    transformer._feature_names_in = None
-
-    self.transformer = transformer
-
-  def _prepare_data(self, X):
-    """
-    Overrides the default data preparation operation by one-hot encoding
-    categorical variables.
-    """
-    return self.transformer.transform(X)
-
-
-class PMMLGeneralizedLinearRegressor(PMMLLinearModel):
+class PMMLGeneralizedLinearRegressor(OneHotEncodingMixin, PMMLBaseRegressor):
   """
   Abstract class for Generalized Linear Models (GLMs).
 
@@ -122,7 +24,8 @@ class PMMLGeneralizedLinearRegressor(PMMLLinearModel):
 
   """
   def __init__(self, pmml):
-    PMMLLinearModel.__init__(self, pmml)
+    PMMLBaseRegressor.__init__(self, pmml)
+    OneHotEncodingMixin.__init__(self)
 
     # Import coefficients and intercepts
     model = self.root.find('GeneralRegressionModel')
@@ -134,7 +37,7 @@ def __init__(self, pmml):
     self.intercept_ = _get_intercept(model)
 
 
-class PMMLGeneralizedLinearClassifier(PMMLLinearClassifier):
+class PMMLGeneralizedLinearClassifier(OneHotEncodingMixin, PMMLBaseClassifier):
   """
   Abstract class for Generalized Linear Models (GLMs).
 
@@ -155,7 +58,8 @@ class PMMLGeneralizedLinearClassifier(PMMLLinearClassifier):
 
   """
   def __init__(self, pmml):
-    PMMLLinearClassifier.__init__(self, pmml)
+    PMMLBaseClassifier.__init__(self, pmml)
+    OneHotEncodingMixin.__init__(self)
 
     # Import coefficients and intercepts
     model = self.root.find('GeneralRegressionModel')
diff --git a/sklearn_pmml_model/linear_model/implementations.py b/sklearn_pmml_model/linear_model/implementations.py
index 9109899..457ee67 100644
--- a/sklearn_pmml_model/linear_model/implementations.py
+++ b/sklearn_pmml_model/linear_model/implementations.py
@@ -1,11 +1,11 @@
 from sklearn.linear_model import LinearRegression, Ridge, RidgeClassifier, Lasso, ElasticNet, LogisticRegression
-from sklearn_pmml_model.linear_model.base import PMMLLinearModel, PMMLLinearClassifier, PMMLGeneralizedLinearRegressor,\
-  PMMLGeneralizedLinearClassifier
+from sklearn_pmml_model.base import PMMLBaseRegressor, PMMLBaseClassifier, OneHotEncodingMixin
+from sklearn_pmml_model.linear_model.base import PMMLGeneralizedLinearRegressor, PMMLGeneralizedLinearClassifier
 from itertools import chain
 import numpy as np
 
 
-class PMMLLinearRegression(PMMLLinearModel, LinearRegression):
+class PMMLLinearRegression(OneHotEncodingMixin, PMMLBaseRegressor, LinearRegression):
   """
   Ordinary least squares Linear Regression.
 
@@ -25,7 +25,8 @@ class PMMLLinearRegression(PMMLLinearModel, LinearRegression):
 
   """
   def __init__(self, pmml):
-    PMMLLinearModel.__init__(self, pmml)
+    PMMLBaseRegressor.__init__(self, pmml)
+    OneHotEncodingMixin.__init__(self)
 
     # Import coefficients and intercepts
     model = self.root.find('RegressionModel')
@@ -51,13 +52,13 @@ def __init__(self, pmml):
       self.intercept_ = self.intercept_[0]
 
   def fit(self, x, y):
-    return PMMLLinearModel.fit(self, x, y)
+    return PMMLBaseRegressor.fit(self, x, y)
 
   def _more_tags(self):
     return LinearRegression._more_tags(self)
 
 
-class PMMLLogisticRegression(PMMLLinearClassifier, LogisticRegression):
+class PMMLLogisticRegression(OneHotEncodingMixin, PMMLBaseClassifier, LogisticRegression):
   """
   Logistic Regression (aka logit, MaxEnt) classifier.
 
@@ -77,7 +78,8 @@ class PMMLLogisticRegression(PMMLLinearClassifier, LogisticRegression):
 
   """
   def __init__(self, pmml):
-    PMMLLinearClassifier.__init__(self, pmml)
+    PMMLBaseClassifier.__init__(self, pmml)
+    OneHotEncodingMixin.__init__(self)
 
     # Import coefficients and intercepts
     model = self.root.find('RegressionModel')
@@ -111,7 +113,7 @@ def __init__(self, pmml):
     self.solver = 'lbfgs'
 
   def fit(self, x, y):
-    return PMMLLinearClassifier.fit(self, x, y)
+    return PMMLBaseClassifier.fit(self, x, y)
 
   def _more_tags(self):
     return LogisticRegression._more_tags(self)
diff --git a/sklearn_pmml_model/naive_bayes/implementations.py b/sklearn_pmml_model/naive_bayes/implementations.py
index a94d7c5..5cc91d0 100644
--- a/sklearn_pmml_model/naive_bayes/implementations.py
+++ b/sklearn_pmml_model/naive_bayes/implementations.py
@@ -1,10 +1,10 @@
-from sklearn_pmml_model.base import PMMLBaseClassifier
+from sklearn_pmml_model.base import PMMLBaseClassifier, OneHotEncodingMixin
 from sklearn.naive_bayes import GaussianNB
 import numpy as np
 from itertools import chain
 
 
-class PMMLGaussianNB(PMMLBaseClassifier, GaussianNB):
+class PMMLGaussianNB(OneHotEncodingMixin, PMMLBaseClassifier, GaussianNB):
   """
   Gaussian Naive Bayes (GaussianNB)
 
@@ -26,6 +26,7 @@ class PMMLGaussianNB(PMMLBaseClassifier, GaussianNB):
   """
   def __init__(self, pmml):
     PMMLBaseClassifier.__init__(self, pmml)
+    OneHotEncodingMixin.__init__(self)
 
     model = self.root.find('NaiveBayesModel')
 
diff --git a/tests/naive_bayes/test_naive_bayes.py b/tests/naive_bayes/test_naive_bayes.py
index 7376ea4..e31e513 100644
--- a/tests/naive_bayes/test_naive_bayes.py
+++ b/tests/naive_bayes/test_naive_bayes.py
@@ -74,15 +74,16 @@ class TestGaussianNBIntegration(TestCase):
   def setUp(self):
     df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
     Xte = df.iloc[:, 1:]
-    Xte = pd.get_dummies(Xte, prefix_sep='')
+    Xenc = pd.get_dummies(Xte, prefix_sep='')
     yte = df.iloc[:, 0]
     self.test = (Xte, yte)
+    self.enc = (Xenc, yte)
 
     pmml = path.join(BASE_DIR, '../models/nb-cat-pima.pmml')
     self.clf = PMMLGaussianNB(pmml)
 
     self.ref = GaussianNB()
-    self.ref.fit(Xte, yte)
+    self.ref.fit(Xenc, yte)
 
   def test_predict_proba(self):
     Xte, _ = self.test
@@ -110,7 +111,7 @@ def test_sklearn2pmml(self):
     pipeline = PMMLPipeline([
       ("classifier", self.ref)
     ])
-    pipeline.fit(self.test[0], self.test[1])
+    pipeline.fit(self.enc[0], self.enc[1])
     sklearn2pmml(pipeline, "gnb-sklearn2pmml.pmml", with_repr = True)
 
     try:
@@ -118,10 +119,10 @@ def test_sklearn2pmml(self):
       model = PMMLGaussianNB(pmml='gnb-sklearn2pmml.pmml')
 
       # Verify classification
-      Xte, _ = self.test
-      assert np.array_equal(
-        self.ref.predict_proba(Xte),
-        model.predict_proba(Xte)
+      Xenc, _ = self.enc
+      assert np.allclose(
+        self.ref.predict_proba(Xenc),
+        model.predict_proba(Xenc)
       )
 
     finally:
@@ -152,7 +153,7 @@ def test_sklearn2pmml(self):
 
       # Verify classification
       Xte, _ = self.test
-      assert np.array_equal(
+      assert np.allclose(
         self.ref.predict_proba(Xte),
         model.predict_proba(Xte)
       )
diff --git a/tests/test_base.py b/tests/test_base.py
index 22f3436..92b044b 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -221,3 +221,102 @@ def test_fit_exception(self):
       clf.fit(X, y)
 
     assert str(cm.exception) == "Not supported."
+
+  def test_prepare_data_removes_unused_columns(self):
+    clf = PMMLBaseEstimator(pmml=StringIO("""
+      <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
+        <DataDictionary>
+          <DataField name="Class" optype="categorical" dataType="string">
+            <Value value="setosa"/>
+            <Value value="versicolor"/>
+            <Value value="virginica"/>
+          </DataField>
+          <DataField name="test1" optype="continuous" dataType="double"/>
+        </DataDictionary>
+        <MiningSchema>
+          <MiningField name="Class" usageType="target"/>
+        </MiningSchema>
+      </PMML>
+      """))
+
+    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["test1", "test2"])
+    result = clf._prepare_data(X)
+
+    assert list(X.columns) == ["test1", "test2"]
+    assert list(result.columns) == ["test1"]
+
+  def test_prepare_data_reorders_columns(self):
+    clf = PMMLBaseEstimator(pmml=StringIO("""
+    <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
+      <DataDictionary>
+        <DataField name="Class" optype="categorical" dataType="string">
+          <Value value="setosa"/>
+          <Value value="versicolor"/>
+          <Value value="virginica"/>
+        </DataField>
+        <DataField name="test2" optype="continuous" dataType="double"/>
+        <DataField name="test1" optype="continuous" dataType="double"/>
+      </DataDictionary>
+      <MiningSchema>
+        <MiningField name="Class" usageType="target"/>
+      </MiningSchema>
+    </PMML>
+    """))
+
+    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["test1", "test2"])
+    result = clf._prepare_data(X)
+
+    assert list(X.columns) == ["test1", "test2"]
+    assert list(result.columns) == ["test2", "test1"]
+
+  def test_prepare_data_exception_mismatch_columns_numpy(self):
+    clf = PMMLBaseEstimator(pmml=StringIO("""
+    <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
+      <DataDictionary>
+        <DataField name="Class" optype="categorical" dataType="string">
+          <Value value="setosa"/>
+          <Value value="versicolor"/>
+          <Value value="virginica"/>
+        </DataField>
+        <DataField name="test1" optype="continuous" dataType="double"/>
+      </DataDictionary>
+      <MiningSchema>
+        <MiningField name="Class" usageType="target"/>
+      </MiningSchema>
+    </PMML>
+    """))
+
+    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["test1", "test2"])
+
+    with self.assertRaises(Exception) as cm:
+      clf._prepare_data(np.asanyarray(X))
+
+    assert str(cm.exception) == "The number of features in provided data does not match expected number of features " \
+                                "in the PMML. Provide pandas.Dataframe, or provide data matching the DataFields in " \
+                                "the PMML document."
+
+  def test_prepare_data_exception_mismatch_columns_pandas(self):
+    clf = PMMLBaseEstimator(pmml=StringIO("""
+    <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
+      <DataDictionary>
+        <DataField name="Class" optype="categorical" dataType="string">
+          <Value value="setosa"/>
+          <Value value="versicolor"/>
+          <Value value="virginica"/>
+        </DataField>
+        <DataField name="test1" optype="continuous" dataType="double"/>
+        <DataField name="test2" optype="continuous" dataType="double"/>
+      </DataDictionary>
+      <MiningSchema>
+        <MiningField name="Class" usageType="target"/>
+      </MiningSchema>
+    </PMML>
+    """))
+
+    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["Test_1", "Test_2"])
+
+    with self.assertRaises(Exception) as cm:
+      clf._prepare_data(X)
+
+    assert str(cm.exception) == "The features in the input data do not match features expected by the PMML model."
+
diff --git a/tests/tree/test_tree.py b/tests/tree/test_tree.py
index dd744dd..d2ffde8 100644
--- a/tests/tree/test_tree.py
+++ b/tests/tree/test_tree.py
@@ -88,15 +88,12 @@ def test_more_tags(self):
 class TestIrisTreeIntegration(TestCase):
   def setUp(self):
     pair = [0, 1]
-    data = load_iris()
+    data = load_iris(as_frame=True)
 
-    X = pd.DataFrame(data.data[:, pair])
-    X.columns = np.array(data.feature_names)[pair]
-    y = pd.Series(np.array(data.target_names)[data.target])
+    X = data.data
+    y = data.target
     y.name = "Class"
-    X, Xte, y, yte = train_test_split(X, y, test_size=0.33, random_state=123)
-    self.test = (Xte, yte)
-    self.train = (X, y)
+    self.test = (X, y)
 
     pmml = path.join(BASE_DIR, '../models/tree-iris.pmml')
     self.clf = PMMLTreeClassifier(pmml=pmml)
@@ -122,7 +119,7 @@ def test_sklearn2pmml(self):
     pipeline = PMMLPipeline([
       ("classifier", self.ref)
     ])
-    pipeline.fit(self.train[0], self.train[1])
+    pipeline.fit(self.test[0], self.test[1])
     sklearn2pmml(pipeline, "tree-sklearn2pmml.pmml", with_repr = True)
 
     try:
@@ -145,9 +142,6 @@ class TestDigitsTreeIntegration(TestCase):
   def setUp(self):
     data = load_digits()
 
-    self.columns = [2, 3, 4, 5, 6, 7, 9, 10, 13, 14, 17, 18, 19, 20, 21, 25, 26,
-                    27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 41, 42, 43, 45, 46,
-                    50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63]
     X = pd.DataFrame(data.data)
     y = pd.Series(np.array(data.target_names)[data.target])
     y.name = "Class"
@@ -161,19 +155,19 @@ def test_predict(self):
     Xte, _ = self.test
     assert np.array_equal(
       self.ref.predict(Xte),
-      self.clf.predict(Xte[self.columns]).astype(np.int64)
+      self.clf.predict(Xte)
     )
 
   def test_predict_proba(self):
     Xte, _ = self.test
     assert np.array_equal(
       self.ref.predict_proba(Xte),
-      self.clf.predict_proba(Xte[self.columns])
+      self.clf.predict_proba(Xte)
     )
 
   def test_score(self):
     Xte, yte = self.test
-    assert self.ref.score(Xte, yte) == self.clf.score(Xte[self.columns], yte)
+    assert self.ref.score(Xte, yte) == self.clf.score(Xte, yte)
 
 
 class TestCategoricalTreeIntegration(TestCase):