-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix load of models that depend on non thread-safe dependencies
A problem was detected when loading TensorFlow models in different threads inside the same JVM. That happened after load a TensorFlow model and then try to import a new TensorFlow model. This was caused by a dependency of TensorFlow (protobuf) that was being reloaded but it already existed in the JVM (through the 1st thread). The workaround for this problem was to convert the object that wraps Jep on a separated thread to a singleton. This way the dependencies of the models are imported in the same thread.
- Loading branch information
Paulo Pereira
committed
Dec 7, 2018
1 parent
7f2d92f
commit 01865e4
Showing
8 changed files
with
528 additions
and
10 deletions.
There are no files selected for viewing
49 changes: 49 additions & 0 deletions
49
openml-generic-python/src/test/java/com/feedzai/openml/python/TensorFlowModelTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package com.feedzai.openml.python; | ||
|
||
import com.feedzai.openml.data.schema.DatasetSchema; | ||
import com.feedzai.openml.mocks.MockInstance; | ||
import com.feedzai.openml.provider.exception.ModelLoadingException; | ||
import com.feedzai.openml.util.algorithm.GenericAlgorithm; | ||
import com.feedzai.openml.util.load.LoadSchemaUtils; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.rules.ExpectedException; | ||
|
||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
|
||
/** | ||
* Tests for the TensorFlow models. | ||
* | ||
* @author Paulo Pereira (paulo.pereira@feedzai.com) | ||
* @since 0.1.5 | ||
*/ | ||
public class TensorFlowModelTest { | ||
|
||
/** | ||
* Expected exception in tests. | ||
*/ | ||
@Rule | ||
public final ExpectedException exception = ExpectedException.none(); | ||
|
||
/** | ||
* Regression test to check that the load of two TensorFlow models work in the same JVM. | ||
*/ | ||
@Test | ||
public void testReloadTensorFlowModel() throws ModelLoadingException { | ||
final ClassificationPythonModelLoader modelLoader = new PythonModelProvider() | ||
.getModelCreator(GenericAlgorithm.GENERIC_CLASSIFICATION.getAlgorithmDescriptor().getAlgorithmName()) | ||
.get(); | ||
|
||
final Path path = Paths.get(this.getClass().getResource("/tensorflow_valid").getPath()); | ||
final DatasetSchema datasetSchema = LoadSchemaUtils.datasetSchemaFromJson(path); | ||
|
||
// 1st load passes | ||
ClassificationPythonModel classificationPythonModel = modelLoader.loadModel(path, datasetSchema); | ||
classificationPythonModel.classify(new MockInstance(datasetSchema, ThreadLocalRandom.current())); | ||
|
||
// 2nd load passes | ||
modelLoader.loadModel(path, datasetSchema).classify(new MockInstance(datasetSchema, ThreadLocalRandom.current())); | ||
} | ||
} |
Empty file.
8 changes: 8 additions & 0 deletions
8
openml-generic-python/src/test/resources/tensorflow_valid/ClassifierApi/classifier.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
|
||
class ClassifierBase(object): | ||
|
||
def getClassDistribution(self, instance): | ||
raise NotImplementedError("This must be implemented by a concrete adapter.") | ||
|
||
def classify(self, instance): | ||
raise NotImplementedError("This must be implemented by a concrete adapter.") |
99 changes: 99 additions & 0 deletions
99
openml-generic-python/src/test/resources/tensorflow_valid/classifier.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import sys,math | ||
from ClassifierApi.classifier import ClassifierBase | ||
import numpy as np | ||
import random | ||
import tensorflow as tf | ||
|
||
|
||
class Classifier(ClassifierBase): | ||
|
||
def __init__(self): | ||
ClassifierBase.__init__(self) | ||
|
||
# Output files | ||
self.num_classes = 2 | ||
|
||
# Model | ||
self.threshold = 0.5 | ||
self.state_gru1 = np.zeros([1, 20]) | ||
self.state_gru2 = np.zeros([1, 10]) | ||
|
||
self.n_classified = 0 | ||
self.n_scored = 0 | ||
|
||
self.batch_header = ['pos_entry_capability_indexed', 'transaction_response_code_indexed', | ||
'pin_entry_capability_indexed', 'merchant_country_name_indexed', | ||
'verification_method_indexed', 'card_type_indexed', 'merchant_id_indexed', | ||
'is_cnp_indexed', 'card_address_validation_code_indexed', 'merchant_state_indexed', | ||
'processing_code_indexed', 'pos_info_indexed', 'cvv_validation_code_indexed', | ||
'cvv2_validation_code_indexed', 'reversal_indicator_indexed', | ||
'transaction_type_indexed', 'merchant_town_indexed', 'pos_type_indexed', | ||
'numerical', 'merchant_name_indexed', 'merchant_country_indexed', | ||
'terminal_type_indexed', 'mcc_indexed', 'terminal_id_indexed', | ||
'terminal_authentication_indexed', 'split_indicator'] | ||
self.header = ['account_balance_normalized', 'amount_normalized', 'diff_event_timestamp_group_by_client_id_normalized', | ||
'sin_hour_of_day_event_timestamp_normalized', 'cos_hour_of_day_event_timestamp_normalized', | ||
'sin_day_of_week_event_timestamp_normalized', 'cos_day_of_week_event_timestamp_normalized', | ||
'sin_day_of_month_event_timestamp_normalized', 'cos_day_of_month_event_timestamp_normalized', | ||
'event_timestamp_minus_account_open_date_normalized', 'event_timestamp_minus_card_exp_date_normalized', | ||
'merchant_name_indexed', 'terminal_id_indexed', 'merchant_id_indexed', 'merchant_town_indexed', | ||
'mcc_indexed', 'merchant_country_name_indexed', 'merchant_state_indexed', 'merchant_country_indexed', | ||
'card_type_indexed', 'pos_info_indexed', 'transaction_type_indexed', 'reversal_indicator_indexed', | ||
'card_address_validation_code_indexed', 'terminal_type_indexed', 'processing_code_indexed', | ||
'pin_entry_capability_indexed', 'pos_entry_capability_indexed', 'pos_type_indexed', | ||
'verification_method_indexed', 'transaction_response_code_indexed', 'terminal_authentication_indexed', | ||
'cvv_validation_code_indexed', 'cvv2_validation_code_indexed', 'fraud_label_indexed', 'is_cnp_indexed', | ||
'event_timestamp', 'client_id', 'account_iban', 'encrypted_pan', 'transaction_id', 'amount', | ||
'is_cnp', 'ID'] | ||
|
||
# Get lists of fields to parse. | ||
label_field = 'fraud_label_indexed' | ||
# Categorical features: all fields that were indexed except for the label. | ||
categorical_features = {field for field in self.header if field.endswith('_indexed') and field != 'fraud_label_indexed'} | ||
other_fields_to_keep = ['event_timestamp', | ||
'client_id', | ||
'account_iban', | ||
'encrypted_pan', | ||
'transaction_id', | ||
'amount', | ||
'is_cnp'] | ||
# Numerical features: all fields except categorical features, label, other fields to keep, and newly created fields. | ||
self.numerical_features = (set(self.header) | ||
- categorical_features | ||
- {label_field} | ||
- set(other_fields_to_keep) | ||
- {'ID'}) # , 'split_indicator' | ||
|
||
def normalize(self, v): | ||
norm = np.linalg.norm(v, ord=1) | ||
if norm == 0: | ||
norm = np.finfo(v.dtype).eps | ||
return v / norm | ||
|
||
def classify_instance(self, instance): | ||
score = self.getClassDistribution_instance(instance)[0] | ||
classification = int(score > self.threshold) | ||
|
||
self.n_classified += 1 | ||
|
||
return classification | ||
|
||
def getClassDistribution_instance(self, instance): | ||
self.n_scored += 1 | ||
|
||
random.seed(instance[0]) | ||
d = self.normalize(random.sample(range(1, 100), self.num_classes)) | ||
return d | ||
|
||
def validate(self, instances): | ||
for instance in instances: | ||
if not hasattr(instance, "__len__"): | ||
raise Exception('Instance must be an array!') | ||
|
||
def classify(self, instances): | ||
self.validate(instances) | ||
|
||
return [self.classify_instance(instance) for instance in instances] | ||
|
||
def getClassDistribution(self, instances): | ||
return [self.getClassDistribution_instance(instance) for instance in instances] |
Oops, something went wrong.