-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
TableOutputFormat.java
221 lines (198 loc) · 7.8 KB
/
TableOutputFormat.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.BufferedMutator;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
/**
* Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
* while the output value <u>must</u> be either a {@link Put} or a
* {@link Delete} instance.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
implements Configurable {
private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
/** Job parameter that specifies the output table. */
public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
/**
* Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
* For keys matching this prefix, the prefix is stripped, and the value is set in the
* configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
* would be set in the configuration as "key1 = value1". Use this to set properties
* which should only be applied to the {@code TableOutputFormat} configuration and not the
* input configuration.
*/
public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
/**
* Optional job parameter to specify a peer cluster.
* Used specifying remote cluster when copying between hbase clusters (the
* source is picked up from <code>hbase-site.xml</code>).
* @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
*/
public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
/** Optional job parameter to specify peer cluster's ZK client port */
public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
/** Optional specification of the rs class name of the peer cluster */
public static final String
REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
/** Optional specification of the rs impl name of the peer cluster */
public static final String
REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
/** The configuration. */
private Configuration conf = null;
/**
* Writes the reducer output to an HBase table.
*/
protected class TableRecordWriter
extends RecordWriter<KEY, Mutation> {
private Connection connection;
private BufferedMutator mutator;
/**
* @throws IOException
*
*/
public TableRecordWriter() throws IOException {
String tableName = conf.get(OUTPUT_TABLE);
this.connection = ConnectionFactory.createConnection(conf);
this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
LOG.info("Created table instance for " + tableName);
}
/**
* Closes the writer, in this case flush table commits.
*
* @param context The context.
* @throws IOException When closing the writer fails.
* @see RecordWriter#close(TaskAttemptContext)
*/
@Override
public void close(TaskAttemptContext context)
throws IOException {
mutator.close();
connection.close();
}
/**
* Writes a key/value pair into the table.
*
* @param key The key.
* @param value The value.
* @throws IOException When writing fails.
* @see RecordWriter#write(Object, Object)
*/
@Override
public void write(KEY key, Mutation value)
throws IOException {
if (!(value instanceof Put) && !(value instanceof Delete)) {
throw new IOException("Pass a Delete or a Put");
}
mutator.mutate(value);
}
}
/**
* Creates a new record writer.
*
* Be aware that the baseline javadoc gives the impression that there is a single
* {@link RecordWriter} per job but in HBase, it is more natural if we give you a new
* RecordWriter per call of this method. You must close the returned RecordWriter when done.
* Failure to do so will drop writes.
*
* @param context The current task context.
* @return The newly created writer instance.
* @throws IOException When creating the writer fails.
* @throws InterruptedException When the jobs is cancelled.
*/
@Override
public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
throws IOException, InterruptedException {
return new TableRecordWriter();
}
/**
* Checks if the output target exists.
*
* @param context The current context.
* @throws IOException When the check fails.
* @throws InterruptedException When the job is aborted.
* @see OutputFormat#checkOutputSpecs(JobContext)
*/
@Override
public void checkOutputSpecs(JobContext context) throws IOException,
InterruptedException {
// TODO Check if the table exists?
}
/**
* Returns the output committer.
*
* @param context The current context.
* @return The committer.
* @throws IOException When creating the committer fails.
* @throws InterruptedException When the job is aborted.
* @see OutputFormat#getOutputCommitter(TaskAttemptContext)
*/
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
throws IOException, InterruptedException {
return new TableOutputCommitter();
}
@Override
public Configuration getConf() {
return conf;
}
@Override
public void setConf(Configuration otherConf) {
String tableName = otherConf.get(OUTPUT_TABLE);
if(tableName == null || tableName.length() <= 0) {
throw new IllegalArgumentException("Must specify table name");
}
String address = otherConf.get(QUORUM_ADDRESS);
int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
String serverClass = otherConf.get(REGION_SERVER_CLASS);
String serverImpl = otherConf.get(REGION_SERVER_IMPL);
try {
this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
if (serverClass != null) {
this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
}
if (zkClientPort != 0) {
this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
}
} catch(IOException e) {
LOG.error(e);
throw new RuntimeException(e);
}
}
}