Skip to content

Commit

Permalink
[SYSTEMDS-3482] Parallel Hadoop IO Startup
Browse files Browse the repository at this point in the history
I observed that the compile time if we include IO operations increase to
~0.6 sec. While if we do not have IO operations it is ~0.2 sec. This
is due to the hadoop IO we are using taking up to 70% of the compile time
in cases where we have simple scripts with only read and a single operation.
This is a constant overhead on the fist IO operation that does not effect
subsequent IO operations, to improve this I have moved this to a parallel
operation when we construct the JobConfiguration. This improve the
compile time of systemds in general from ~0.6 sec when using IO to ~0.2 sec.

Closes #1757
  • Loading branch information
Baunsgaard committed Jan 4, 2023
1 parent eb3e384 commit 6a759ce
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions src/main/java/org/apache/sysds/conf/ConfigurationManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,28 @@

package org.apache.sysds.conf;

import java.util.concurrent.ExecutorService;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysds.conf.CompilerConfig.ConfigType;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.lops.Compression.CompressConfig;
import org.apache.sysds.lops.compile.linearization.ILinearize;
import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysds.runtime.io.IOUtilFunctions;
import org.apache.sysds.runtime.util.CommonThreadPool;

/**
* Singleton for accessing the parsed and merged system configuration.
*
* NOTE: parallel execution of multiple DML scripts (in the same JVM) with different configurations
* would require changes/extensions of this class.
*/
public class ConfigurationManager
{
public class ConfigurationManager{
private static final Log LOG = LogFactory.getLog(ConfigurationManager.class.getName());

/** Global cached job conf for read-only operations */
private static JobConf _rJob = null;

Expand All @@ -56,6 +64,17 @@ public class ConfigurationManager
//ConfigManager -> OptimizerUtils -> InfrastructureAnalyer -> ConfigManager
_dmlconf = new DMLConfig();
_cconf = new CompilerConfig();

final ExecutorService pool = CommonThreadPool.get(InfrastructureAnalyzer.getLocalParallelism());
pool.submit(() ->{
try{
IOUtilFunctions.getFileSystem(_rJob);
}
catch(Exception e){
LOG.warn(e.getMessage());
}
});
pool.shutdown();
}


Expand Down

0 comments on commit 6a759ce

Please sign in to comment.