Skip to content

Commit

Permalink
Fix load of models that depend on non thread-safe dependencies
Browse files Browse the repository at this point in the history
A problem was detected when loading TensorFlow models in different threads inside the same JVM. That happened after load a TensorFlow model and then try to import a new TensorFlow model. This was caused by a dependency of TensorFlow (protobuf) that was being reloaded but it already existed in the JVM (through the 1st thread).

The workaround was to share the problematic module (Tensorflow) across the sub-interpreters of Python. This is a workaround for the issues with CPython extensions.
  • Loading branch information
Paulo Pereira committed Dec 11, 2018
1 parent 7f2d92f commit 0214701
Show file tree
Hide file tree
Showing 9 changed files with 258 additions and 4 deletions.
12 changes: 12 additions & 0 deletions openml-python-common/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,15 @@ export PATH=$ANACONDA_PATH/envs/myenv/bin:$PATH
export LD_LIBRARY_PATH=$ANACONDA_PATH/envs/myenv/lib/python3.6/site-packages/jep:$LD_LIBRARY_PATH
export LD_PRELOAD=$ANACONDA_PATH/envs/myenv/lib/libpython3.6m.so
```

7. If you need to share Python modules across sub-interpreters, you would need to create a "python-packages.xml" file where you define the modules to be shared. By default the provider is already sharing the "numpy" and "tensorflow" modules. This is a workaround for the issues with CPython extensions.
- Remember that this file should be added to the classpath of your program.

```
<?xml version="1.0"?>
<python>
<package>my_package_1</package>
<package>my_package_2</package>
</python>
```
12 changes: 9 additions & 3 deletions openml-python-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,22 @@
<groupId>com.feedzai</groupId>
<artifactId>openml-utils</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.feedzai</groupId>
<artifactId>openml-utils</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<scope>provided</scope>
<groupId>org.jmockit</groupId>
<artifactId>jmockit</artifactId>
<version>${jmockit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
*/
package com.feedzai.openml.python.jep.instance;

import com.feedzai.openml.python.xml.parser.XMLParser;
import com.google.common.util.concurrent.Uninterruptibles;
import jep.Jep;
import jep.JepConfig;
import jep.JepException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -97,8 +99,15 @@ public void stop() {
*/
@Override
public void run() {
JepConfig jepConfig = new JepConfig()
.addSharedModules("tensorflow")
.addSharedModules("numpy")
.setInteractive(false);
for (final String shareModule : new XMLParser().getSharedModules()) {
jepConfig = jepConfig.addSharedModules(shareModule);
}

try (final Jep jep = new Jep(false)) {
try (final Jep jep = new Jep(jepConfig)) {
while (this.running) {
this.evaluationQueue.take().evaluate(jep);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright (c) 2018 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.feedzai.openml.python.xml.parser;

import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.util.HashSet;
import java.util.Set;

/**
* Class responsible for parsing a XML file in order to retrieve the modules of Python to be shared across
* sub-interpreters.
*
* @author Paulo Pereira (paulo.pereira@feedzai.com)
* @since 0.1.5
*/
public class XMLParser {

/**
* Logger.
*/
private static final Logger logger = LoggerFactory.getLogger(XMLParser.class);

/**
* A XML file with the modules of Python to be shared across sub-interpreters.
*/
private static String XML_FILE = "python-packages.xml";

/**
* Constructor.
*/
public XMLParser() {
// empty constructor
}

/**
* Gets the {@link File} reference of {@link #XML_FILE} that exists in the current classpath.
*
* @return The {@link File} reference of {@link #XML_FILE}.
*/
private File getXMLFile() {
final ClassLoader classLoader = getClass().getClassLoader();
return new File(classLoader.getResource(XML_FILE).getFile());
}

/**
* Parses the {@link #XML_FILE} to retrieve the modules of Python to be shared across sub-interpreters.
*
* @return A {@link Set} with the modules of Python to be shared across sub-interpreters.
* @throws Exception If there is an error while parsing {@link #XML_FILE}.
*/
private Set<String> parseXMLFile() throws Exception {
final HashSet<String> sharedModules = new HashSet<>();
final File xmlFile = getXMLFile();
final DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
final DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
final Document doc = dBuilder.parse(xmlFile);
doc.getDocumentElement().normalize();

final NodeList nList = doc.getElementsByTagName("package");

for (int i = 0; i < nList.getLength(); i++) {
sharedModules.add(nList.item(i).getFirstChild().getNodeValue());
}
return sharedModules;
}

/**
* Retrieves a {@link Set} with the modules of Python to be shared across sub-interpreters.
*
* @return The modules of Python to be shared across sub-interpreters.
*/
public Set<String> getSharedModules() {
Set<String> sharedModules = ImmutableSet.of();
try {
sharedModules = parseXMLFile();
} catch (final Exception e) {
logger.warn("Problem while getting the XML file with the shared modules of Python.", e);
}
return sharedModules;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (c) 2018 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* This package contains logic for parsing XML files to retrieve the modules of Python to be shared across
* sub-interpreters.
*
* @since 0.1.5
*/
package com.feedzai.openml.python.xml.parser;
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2018 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.feedzai.openml.python.xml.parser;

import mockit.Mock;
import mockit.MockUp;
import org.assertj.core.util.Files;
import org.junit.Test;

import java.io.File;
import java.util.Set;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Tests the retrieving of the modules to be shared across sub-interpreters from a XML file.
*
* @author Paulo Pereira (paulo.pereira@feedzai.com)
* @since 0.1.5
*/
public class XMLParserTest {

/**
* Tests the retrieving of the shared modules from a valid XML file.
*/
@Test
public void validXMLFileTest() {
final Set<String> sharedPythonPackages = new XMLParser().getSharedModules();
assertThat(sharedPythonPackages)
.as("Set of shared modules.")
.hasSize(2)
.contains("my_package_1", "my_package_2");
}

/**
* Tests the retrieving of the shared modules from an invalid XML file.
*/
@Test
public void invalidXMLFileTest() {
final XMLParser xmlParser = new MockUp<XMLParser>() {
@Mock
private File getXMLFile() {
final File file = Files.newTemporaryFile();
file.deleteOnExit();
return file;
}
}.getMockInstance();
assertThat(xmlParser.getSharedModules())
.as("Set of shared modules.")
.hasSize(0);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (c) 2018 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* This package contains the unit-tests for {@link com.feedzai.openml.python.xml.parser}.
*
* @since 0.1.5
*/
package com.feedzai.openml.python.xml.parser;
6 changes: 6 additions & 0 deletions openml-python-common/src/test/resources/python-packages.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0"?>
<python>
<package>my_package_1</package>
<package>my_package_2</package>
</python>

7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
<jackson-databind.version>2.6.7</jackson-databind.version>
<openml-api.version>0.3.0</openml-api.version>
<jep.version>3.7.0</jep.version>
<jmockit.version>1.34</jmockit.version>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -174,6 +175,12 @@
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jmockit</groupId>
<artifactId>jmockit</artifactId>
<version>${jmockit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down

0 comments on commit 0214701

Please sign in to comment.