-
Notifications
You must be signed in to change notification settings - Fork 4.5k
[BEAM-1491]Identify HADOOP_CONF_DIR(or YARN_CONF_DIR) environment variables #2819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,12 +17,18 @@ | |
| */ | ||
| package org.apache.beam.sdk.io.hdfs; | ||
|
|
||
| import com.google.common.base.Strings; | ||
| import com.google.common.collect.Lists; | ||
| import java.io.File; | ||
| import java.util.List; | ||
| import org.apache.beam.sdk.options.Default; | ||
| import org.apache.beam.sdk.options.DefaultValueFactory; | ||
| import org.apache.beam.sdk.options.Description; | ||
| import org.apache.beam.sdk.options.PipelineOptions; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.fs.Path; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| /** | ||
| * {@link PipelineOptions} which encapsulate {@link Configuration Hadoop Configuration} | ||
|
|
@@ -39,11 +45,37 @@ public interface HadoopFileSystemOptions extends PipelineOptions { | |
| void setHdfsConfiguration(List<Configuration> value); | ||
|
|
||
| /** A {@link DefaultValueFactory} which locates a Hadoop {@link Configuration}. */ | ||
| class ConfigurationLocator implements DefaultValueFactory<Configuration> { | ||
| class ConfigurationLocator implements DefaultValueFactory<List<Configuration>> { | ||
| private static final Logger LOG = LoggerFactory.getLogger(ConfigurationLocator.class); | ||
| @Override | ||
| public Configuration create(PipelineOptions options) { | ||
| // TODO: Find default configuration to use | ||
| return null; | ||
| public List<Configuration> create(PipelineOptions options) { | ||
| // Find default configuration when HADOOP_CONF_DIR or YARN_CONF_DIR is set. | ||
| Configuration conf = new Configuration(false); | ||
| List<String> hadoopEnvList = Lists.newArrayList("HADOOP_CONF_DIR", "YARN_CONF_DIR"); | ||
| for (String env : hadoopEnvList) { | ||
| String hadoopConfPath = System.getenv(env); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add unit tests. Note that and See GcpOptions and GcpOptionsTest for an example of how this kind of interaction can be tested |
||
| if (!Strings.isNullOrEmpty(hadoopConfPath) && new File(hadoopConfPath).exists()) { | ||
|
|
||
| // We just need to load both core-site.xml and hdfs-site.xml to determine the | ||
| // default fs path and the hdfs configuration | ||
| if (new File(hadoopConfPath + "/core-site.xml").exists()) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use the two argument constructor for File/Path here and below so we aren't assuming how path resolution works ("/" is common but not for every file system) |
||
| conf.addResource(new Path(hadoopConfPath + "/core-site.xml")); | ||
|
|
||
| if (LOG.isDebugEnabled()) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use parameterized messages becomes |
||
| LOG.debug("Adding " + hadoopConfPath + "/core-site.xml to hadoop configuration"); | ||
| } | ||
| } | ||
|
|
||
| if (new File(hadoopConfPath + "/hdfs-site.xml").exists()) { | ||
| conf.addResource(new Path(hadoopConfPath + "/hdfs-site.xml")); | ||
|
|
||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("Adding " + hadoopConfPath + "/hdfs-site.xml to hadoop configuration"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| return Lists.<Configuration>newArrayList(conf); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't be returning a configuration if we didn't load one from one of the paths. |
||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we find a configuration in HADOOP_CONF_DIR and YARN_CONF_DIR, we should be returning them both separately and not having the YARN_CONF_DIR overwriting the properties found in HADOOP_CONF_DIR.
Also, ensure that we only load one configuration if both HADOOP_CONF_DIR and YARN_CONF_DIR point to the same location.