19
19
package org .apache .flink .fs .gs ;
20
20
21
21
import org .apache .flink .configuration .Configuration ;
22
+ import org .apache .flink .configuration .CoreOptions ;
22
23
import org .apache .flink .core .fs .FileSystem ;
23
24
import org .apache .flink .core .fs .FileSystemFactory ;
24
25
import org .apache .flink .runtime .util .HadoopConfigLoader ;
25
26
import org .apache .flink .util .Preconditions ;
26
27
27
28
import com .google .cloud .hadoop .fs .gcs .GoogleHadoopFileSystem ;
29
+ import org .apache .hadoop .fs .Path ;
28
30
import org .slf4j .Logger ;
29
31
import org .slf4j .LoggerFactory ;
30
32
31
33
import javax .annotation .Nullable ;
32
34
33
35
import java .io .IOException ;
36
+ import java .io .StringWriter ;
37
+ import java .io .Writer ;
34
38
import java .net .URI ;
35
39
import java .util .Collections ;
40
+ import java .util .Map ;
41
+ import java .util .Optional ;
36
42
37
43
/**
38
44
* Implementation of the Flink {@link org.apache.flink.core.fs.FileSystemFactory} interface for
@@ -53,30 +59,23 @@ public class GSFileSystemFactory implements FileSystemFactory {
53
59
54
60
private static final String FLINK_SHADING_PREFIX = "" ;
55
61
56
- private final HadoopConfigLoader hadoopConfigLoader ;
57
-
58
62
@ Nullable private Configuration flinkConfig ;
59
63
64
+ @ Nullable private org .apache .hadoop .conf .Configuration hadoopConfig ;
65
+
60
66
/** Constructs the Google Storage file system factory. */
61
67
public GSFileSystemFactory () {
62
68
LOGGER .info ("Creating GSFileSystemFactory" );
63
-
64
- this .hadoopConfigLoader =
65
- new HadoopConfigLoader (
66
- FLINK_CONFIG_PREFIXES ,
67
- MIRRORED_CONFIG_KEYS ,
68
- HADOOP_CONFIG_PREFIX ,
69
- Collections .emptySet (),
70
- Collections .emptySet (),
71
- FLINK_SHADING_PREFIX );
72
69
}
73
70
74
71
@ Override
75
72
public void configure (Configuration flinkConfig ) {
76
73
LOGGER .info ("Configuring GSFileSystemFactory with Flink configuration {}" , flinkConfig );
77
74
78
75
this .flinkConfig = Preconditions .checkNotNull (flinkConfig );
79
- hadoopConfigLoader .setFlinkConfig (flinkConfig );
76
+ this .hadoopConfig = getHadoopConfiguration (flinkConfig );
77
+
78
+ LOGGER .info ("Using Hadoop configuration {}" , serializeHadoopConfig (hadoopConfig ));
80
79
}
81
80
82
81
@ Override
@@ -90,20 +89,81 @@ public FileSystem create(URI fsUri) throws IOException {
90
89
91
90
Preconditions .checkNotNull (fsUri );
92
91
93
- // create and configure the Google Hadoop file system
94
- org .apache .hadoop .conf .Configuration hadoopConfig =
95
- hadoopConfigLoader .getOrLoadHadoopConfig ();
96
- LOGGER .info (
97
- "Creating GoogleHadoopFileSystem for uri {} with Hadoop config {}" ,
98
- fsUri ,
99
- hadoopConfig );
92
+ // initialize the Google Hadoop filesystem
100
93
GoogleHadoopFileSystem googleHadoopFileSystem = new GoogleHadoopFileSystem ();
101
- googleHadoopFileSystem .initialize (fsUri , hadoopConfig );
94
+ try {
95
+ googleHadoopFileSystem .initialize (fsUri , hadoopConfig );
96
+ } catch (IOException ex ) {
97
+ throw new IOException ("Failed to initialize GoogleHadoopFileSystem" , ex );
98
+ }
102
99
103
100
// construct the file system options
104
101
GSFileSystemOptions options = new GSFileSystemOptions (flinkConfig );
105
102
106
103
// create the file system wrapper
107
104
return new GSFileSystem (googleHadoopFileSystem , options );
108
105
}
106
+
107
+ /**
108
+ * Loads the hadoop configuration, in two steps.
109
+ *
110
+ * <p>1) Find a hadoop conf dir using CoreOptions.FLINK_HADOOP_CONF_DIR or the HADOOP_CONF_DIR
111
+ * environment variable, and load core-default.xml and core-site.xml from that location
112
+ *
113
+ * <p>2) Load hadoop conf from the Flink config, with translations defined above
114
+ *
115
+ * <p>... then merge together, such that keys from the second overwrite the first.
116
+ *
117
+ * @return The Hadoop configuration.
118
+ */
119
+ private static org .apache .hadoop .conf .Configuration getHadoopConfiguration (
120
+ Configuration flinkConfig ) {
121
+
122
+ // create an empty hadoop configuration
123
+ org .apache .hadoop .conf .Configuration hadoopConfig =
124
+ new org .apache .hadoop .conf .Configuration ();
125
+
126
+ // look for a hadoop configuration directory and load configuration from core-default.xml
127
+ // and core-site.xml
128
+ Optional <String > hadoopConfigDir =
129
+ Optional .ofNullable (flinkConfig .get (CoreOptions .FLINK_HADOOP_CONF_DIR ));
130
+ if (!hadoopConfigDir .isPresent ()) {
131
+ hadoopConfigDir = Optional .ofNullable (System .getenv ("HADOOP_CONF_DIR" ));
132
+ }
133
+ hadoopConfigDir .ifPresent (
134
+ configDir -> {
135
+ LOGGER .info ("Loading system Hadoop config from {}" , configDir );
136
+ hadoopConfig .addResource (new Path (configDir , "core-default.xml" ));
137
+ hadoopConfig .addResource (new Path (configDir , "core-site.xml" ));
138
+ hadoopConfig .reloadConfiguration ();
139
+ });
140
+
141
+ // now, load hadoop config from flink and copy key/value pairs into the base config
142
+ HadoopConfigLoader hadoopConfigLoader =
143
+ new HadoopConfigLoader (
144
+ FLINK_CONFIG_PREFIXES ,
145
+ MIRRORED_CONFIG_KEYS ,
146
+ HADOOP_CONFIG_PREFIX ,
147
+ Collections .emptySet (),
148
+ Collections .emptySet (),
149
+ FLINK_SHADING_PREFIX );
150
+ hadoopConfigLoader .setFlinkConfig (flinkConfig );
151
+ org .apache .hadoop .conf .Configuration flinkHadoopConfig =
152
+ hadoopConfigLoader .getOrLoadHadoopConfig ();
153
+ for (Map .Entry <String , String > entry : flinkHadoopConfig ) {
154
+ hadoopConfig .set (entry .getKey (), entry .getValue ());
155
+ }
156
+
157
+ return hadoopConfig ;
158
+ }
159
+
160
+ private String serializeHadoopConfig (org .apache .hadoop .conf .Configuration hadoopConfig )
161
+ throws RuntimeException {
162
+ try (Writer writer = new StringWriter ()) {
163
+ org .apache .hadoop .conf .Configuration .dumpConfiguration (hadoopConfig , writer );
164
+ return writer .toString ();
165
+ } catch (IOException ex ) {
166
+ throw new RuntimeException (ex );
167
+ }
168
+ }
109
169
}
0 commit comments