Permalink
Browse files

HIVE-1903 Can't join HBase tables if one's name is the beginning of

the other (John Sichi via namit)



git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1057502 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
Namit Jain
Namit Jain committed Jan 11, 2011
1 parent 08f48c9 commit a59eb0797b41c3f69699b66e07b894790d86d2e3
View
@@ -680,6 +680,9 @@ Trunk - Unreleased
to ignore HDFS location stored in index files
(Yongqiang He via namit)
+ HIVE-1903 Can't join HBase tables if one's name is the beginning of
+ the other (John Sichi via namit)
+
TESTS
HIVE-1464. improve test query performance
@@ -1,6 +1,7 @@
DROP TABLE users;
DROP TABLE states;
DROP TABLE countries;
+DROP TABLE users_level;
-- From HIVE-1257
@@ -62,3 +63,20 @@ ON (u.state = s.key);
DROP TABLE users;
DROP TABLE states;
DROP TABLE countries;
+
+CREATE TABLE users(key int, userid int, username string, created int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created");
+
+CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level");
+
+-- HIVE-1903: the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num
+ FROM users JOIN users_level ON (users.userid = users_level.userid)
+ GROUP BY year(from_unixtime(users.created)), level;
+
+DROP TABLE users;
+DROP TABLE users_level;
@@ -10,6 +10,10 @@ PREHOOK: query: DROP TABLE countries
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE countries
POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE users_level
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE users_level
+POSTHOOK: type: DROPTABLE
PREHOOK: query: -- From HIVE-1257
CREATE TABLE users(key string, state string, country string, country_id int)
@@ -88,102 +92,102 @@ ON (u.country = c.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000
user1 USA United States USA
PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.country)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.country)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000
user1 USA United States USA
PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country_id = c.country_id)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country_id = c.country_id)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000
PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s
ON (u.state = s.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@states
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000
POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s
ON (u.state = s.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@states
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000
user1 IA Iowa
PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000
user1 USA United States USA
PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.country)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country = c.country)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000
user1 USA United States USA
PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country_id = c.country_id)
PREHOOK: type: QUERY
PREHOOK: Input: default@countries
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000
POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
ON (u.country_id = c.country_id)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@countries
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000
PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s
ON (u.state = s.key)
PREHOOK: type: QUERY
PREHOOK: Input: default@states
PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000
POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s
ON (u.state = s.key)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@states
POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000
user1 IA Iowa
PREHOOK: query: DROP TABLE users
PREHOOK: type: DROPTABLE
@@ -209,3 +213,55 @@ POSTHOOK: query: DROP TABLE countries
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@countries
POSTHOOK: Output: default@countries
+PREHOOK: query: CREATE TABLE users(key int, userid int, username string, created int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE users(key int, userid int, username string, created int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@users
+PREHOOK: query: CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@users_level
+PREHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num
+ FROM users JOIN users_level ON (users.userid = users_level.userid)
+ GROUP BY year(from_unixtime(users.created)), level
+PREHOOK: type: QUERY
+PREHOOK: Input: default@users
+PREHOOK: Input: default@users_level
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000
+POSTHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num
+ FROM users JOIN users_level ON (users.userid = users_level.userid)
+ GROUP BY year(from_unixtime(users.created)), level
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@users
+POSTHOOK: Input: default@users_level
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000
+PREHOOK: query: DROP TABLE users
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@users
+PREHOOK: Output: default@users
+POSTHOOK: query: DROP TABLE users
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@users
+POSTHOOK: Output: default@users
+PREHOOK: query: DROP TABLE users_level
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@users_level
+PREHOOK: Output: default@users_level
+POSTHOOK: query: DROP TABLE users_level
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@users_level
+POSTHOOK: Output: default@users_level
@@ -217,17 +217,23 @@ public RecordReader getRecordReader(InputSplit split, JobConf job,
// clone a jobConf for setting needed columns for reading
JobConf cloneJobConf = new JobConf(job);
- pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
- .toString(), hsplit.getPath().toUri().getPath());
- InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
- cloneJobConf);
+ if (this.mrwork == null) {
+ init(job);
+ }
+ boolean nonNative = false;
PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
if ((part != null) && (part.getTableDesc() != null)) {
Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
+ nonNative = part.getTableDesc().isNonNative();
}
+ pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+ .toString(), hsplit.getPath().toUri().getPath(), nonNative);
+
+ InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
+ cloneJobConf);
RecordReader innerReader = inputFormat.getRecordReader(inputSplit,
cloneJobConf, reporter);
@@ -356,6 +362,12 @@ protected void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
String splitPath, String splitPathWithNoSchema) {
+ pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath,
+ splitPathWithNoSchema, false);
+ }
+
+ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
+ String splitPath, String splitPathWithNoSchema, boolean nonNative) {
if (this.mrwork == null) {
init(job);
}
@@ -367,7 +379,22 @@ protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass
while (iterator.hasNext()) {
Entry<String, ArrayList<String>> entry = iterator.next();
String key = entry.getKey();
- if (splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key)) {
+ boolean match;
+ if (nonNative) {
+ // For non-native tables, we need to do an exact match to avoid
+ // HIVE-1903. (The table location contains no files, and the string
+ // representation of its path does not have a trailing slash.)
+ match =
+ splitPath.equals(key) || splitPathWithNoSchema.equals(key);
+ } else {
+ // But for native tables, we need to do a prefix match for
+ // subdirectories. (Unlike non-native tables, prefix mixups don't seem
+ // to be a potential problem here since we are always dealing with the
+ // path to something deeper than the table location.)
+ match =
+ splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key);
+ }
+ if (match) {
ArrayList<String> list = entry.getValue();
for (String val : list) {
aliases.add(val);

0 comments on commit a59eb07

Please sign in to comment.