Fix load of models that depend on non thread-safe dependencies

A problem was detected when loading TensorFlow models in different threads inside the same JVM. That happened after load a TensorFlow model and then try to import a new TensorFlow model. This was caused by a dependency of TensorFlow (protobuf) that was being reloaded but it already existed in the JVM (through the 1st thread). The workaround for this problem was to convert the object that wraps Jep on a separated thread to a singleton. This way the dependencies of the models are imported in the same thread.
feedzai · Dec 7, 2018 · 01865e4 · 01865e4
1 parent 7f2d92f
commit 01865e4
Show file tree

Hide file tree

Showing 8 changed files with 528 additions and 10 deletions.
diff --git a/openml-generic-python/src/test/java/com/feedzai/openml/python/TensorFlowModelTest.java b/openml-generic-python/src/test/java/com/feedzai/openml/python/TensorFlowModelTest.java
@@ -0,0 +1,49 @@
+package com.feedzai.openml.python;
+
+import com.feedzai.openml.data.schema.DatasetSchema;
+import com.feedzai.openml.mocks.MockInstance;
+import com.feedzai.openml.provider.exception.ModelLoadingException;
+import com.feedzai.openml.util.algorithm.GenericAlgorithm;
+import com.feedzai.openml.util.load.LoadSchemaUtils;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Tests for the TensorFlow models.
+ *
+ * @author Paulo Pereira (paulo.pereira@feedzai.com)
+ * @since 0.1.5
+ */
+public class TensorFlowModelTest {
+
+    /**
+     * Expected exception in tests.
+     */
+    @Rule
+    public final ExpectedException exception = ExpectedException.none();
+
+    /**
+     * Regression test to check that the load of two TensorFlow models work in the same JVM.
+     */
+    @Test
+    public void testReloadTensorFlowModel() throws ModelLoadingException {
+        final ClassificationPythonModelLoader modelLoader = new PythonModelProvider()
+                .getModelCreator(GenericAlgorithm.GENERIC_CLASSIFICATION.getAlgorithmDescriptor().getAlgorithmName())
+                .get();
+
+        final Path path = Paths.get(this.getClass().getResource("/tensorflow_valid").getPath());
+        final DatasetSchema datasetSchema = LoadSchemaUtils.datasetSchemaFromJson(path);
+
+        // 1st load passes
+        ClassificationPythonModel classificationPythonModel = modelLoader.loadModel(path, datasetSchema);
+        classificationPythonModel.classify(new MockInstance(datasetSchema, ThreadLocalRandom.current()));
+
+        // 2nd load passes
+        modelLoader.loadModel(path, datasetSchema).classify(new MockInstance(datasetSchema, ThreadLocalRandom.current()));
+    }
+}
diff --git a/openml-generic-python/src/test/resources/tensorflow_valid/ClassifierApi/__init__.py b/openml-generic-python/src/test/resources/tensorflow_valid/ClassifierApi/__init__.py
diff --git a/openml-generic-python/src/test/resources/tensorflow_valid/ClassifierApi/classifier.py b/openml-generic-python/src/test/resources/tensorflow_valid/ClassifierApi/classifier.py
@@ -0,0 +1,8 @@
+
+class ClassifierBase(object):
+
+    def getClassDistribution(self, instance):
+        raise NotImplementedError("This must be implemented by a concrete adapter.")
+
+    def classify(self, instance):
+        raise NotImplementedError("This must be implemented by a concrete adapter.")
diff --git a/openml-generic-python/src/test/resources/tensorflow_valid/classifier.py b/openml-generic-python/src/test/resources/tensorflow_valid/classifier.py
@@ -0,0 +1,99 @@
+import sys,math
+from ClassifierApi.classifier import ClassifierBase
+import numpy as np
+import random
+import tensorflow as tf
+
+
+class Classifier(ClassifierBase):
+
+    def __init__(self):
+        ClassifierBase.__init__(self)
+
+        # Output files
+        self.num_classes = 2
+
+        # Model
+        self.threshold = 0.5
+        self.state_gru1 = np.zeros([1, 20])
+        self.state_gru2 = np.zeros([1, 10])
+
+        self.n_classified = 0
+        self.n_scored = 0
+
+        self.batch_header = ['pos_entry_capability_indexed', 'transaction_response_code_indexed',
+                            'pin_entry_capability_indexed', 'merchant_country_name_indexed',
+                            'verification_method_indexed', 'card_type_indexed', 'merchant_id_indexed',
+                            'is_cnp_indexed', 'card_address_validation_code_indexed', 'merchant_state_indexed',
+                            'processing_code_indexed', 'pos_info_indexed', 'cvv_validation_code_indexed',
+                            'cvv2_validation_code_indexed', 'reversal_indicator_indexed',
+                            'transaction_type_indexed', 'merchant_town_indexed', 'pos_type_indexed',
+                            'numerical', 'merchant_name_indexed', 'merchant_country_indexed',
+                            'terminal_type_indexed', 'mcc_indexed', 'terminal_id_indexed',
+                            'terminal_authentication_indexed', 'split_indicator']
+        self.header = ['account_balance_normalized', 'amount_normalized', 'diff_event_timestamp_group_by_client_id_normalized',
+                      'sin_hour_of_day_event_timestamp_normalized', 'cos_hour_of_day_event_timestamp_normalized',
+                      'sin_day_of_week_event_timestamp_normalized', 'cos_day_of_week_event_timestamp_normalized',
+                      'sin_day_of_month_event_timestamp_normalized', 'cos_day_of_month_event_timestamp_normalized',
+                      'event_timestamp_minus_account_open_date_normalized', 'event_timestamp_minus_card_exp_date_normalized',
+                      'merchant_name_indexed', 'terminal_id_indexed', 'merchant_id_indexed', 'merchant_town_indexed',
+                      'mcc_indexed', 'merchant_country_name_indexed', 'merchant_state_indexed', 'merchant_country_indexed',
+                      'card_type_indexed', 'pos_info_indexed', 'transaction_type_indexed', 'reversal_indicator_indexed',
+                      'card_address_validation_code_indexed', 'terminal_type_indexed', 'processing_code_indexed',
+                      'pin_entry_capability_indexed', 'pos_entry_capability_indexed', 'pos_type_indexed',
+                      'verification_method_indexed', 'transaction_response_code_indexed', 'terminal_authentication_indexed',
+                      'cvv_validation_code_indexed', 'cvv2_validation_code_indexed', 'fraud_label_indexed', 'is_cnp_indexed',
+                      'event_timestamp', 'client_id', 'account_iban', 'encrypted_pan', 'transaction_id', 'amount',
+                      'is_cnp', 'ID']
+
+        # Get lists of fields to parse.
+        label_field = 'fraud_label_indexed'
+        # Categorical features: all fields that were indexed except for the label.
+        categorical_features = {field for field in self.header if field.endswith('_indexed') and field != 'fraud_label_indexed'}
+        other_fields_to_keep = ['event_timestamp',
+                                 'client_id',
+                                 'account_iban',
+                                 'encrypted_pan',
+                                 'transaction_id',
+                                 'amount',
+                                 'is_cnp']
+        # Numerical features: all fields except categorical features, label, other fields to keep, and newly created fields.
+        self.numerical_features = (set(self.header)
+                              - categorical_features
+                              - {label_field}
+                              - set(other_fields_to_keep)
+                              - {'ID'})  # , 'split_indicator'
+
+    def normalize(self, v):
+        norm = np.linalg.norm(v, ord=1)
+        if norm == 0:
+            norm = np.finfo(v.dtype).eps
+        return v / norm
+
+    def classify_instance(self, instance):
+        score = self.getClassDistribution_instance(instance)[0]
+        classification = int(score > self.threshold)
+
+        self.n_classified += 1
+
+        return classification
+
+    def getClassDistribution_instance(self, instance):
+        self.n_scored += 1
+
+        random.seed(instance[0])
+        d = self.normalize(random.sample(range(1, 100), self.num_classes))
+        return d
+
+    def validate(self, instances):
+        for instance in instances:
+            if not hasattr(instance, "__len__"):
+                raise Exception('Instance must be an array!')
+
+    def classify(self, instances):
+        self.validate(instances)
+
+        return [self.classify_instance(instance) for instance in instances]
+
+    def getClassDistribution(self, instances):
+        return [self.getClassDistribution_instance(instance) for instance in instances]