Skip to content

Commit

Permalink
HIVE-5672 : Insert with custom separator not supported for non-local …
Browse files Browse the repository at this point in the history
…directory (Nemon Lou, reviewed by Xuefu Zhang, Sushanth Sowmyan)
  • Loading branch information
khorgath committed Apr 30, 2015
1 parent 7cbea47 commit f3a23dc
Show file tree
Hide file tree
Showing 6 changed files with 2,005 additions and 49 deletions.
5 changes: 2 additions & 3 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
Expand Up @@ -37,7 +37,6 @@ TOK_TAB;
TOK_PARTSPEC;
TOK_PARTVAL;
TOK_DIR;
TOK_LOCAL_DIR;
TOK_TABREF;
TOK_SUBQUERY;
TOK_INSERT_INTO;
Expand Down Expand Up @@ -2351,8 +2350,8 @@ destination
@init { pushMsg("destination specification", state); }
@after { popMsg(state); }
:
KW_LOCAL KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat? -> ^(TOK_LOCAL_DIR StringLiteral tableRowFormat? tableFileFormat?)
| KW_DIRECTORY StringLiteral -> ^(TOK_DIR StringLiteral)
(local = KW_LOCAL)? KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat?
-> ^(TOK_DIR StringLiteral $local? tableRowFormat? tableFileFormat?)
| KW_TABLE tableOrPartition -> tableOrPartition
;

Expand Down
10 changes: 5 additions & 5 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
Expand Up @@ -54,7 +54,7 @@ public class QB {
private boolean isQuery;
private boolean isAnalyzeRewrite;
private CreateTableDesc tblDesc = null; // table descriptor of the final
private CreateTableDesc localDirectoryDesc = null ;
private CreateTableDesc directoryDesc = null ;

// used by PTFs
/*
Expand Down Expand Up @@ -285,12 +285,12 @@ public void setTableDesc(CreateTableDesc desc) {
tblDesc = desc;
}

public CreateTableDesc getLLocalDirectoryDesc() {
return localDirectoryDesc;
public CreateTableDesc getDirectoryDesc() {
return directoryDesc;
}

public void setLocalDirectoryDesc(CreateTableDesc localDirectoryDesc) {
this.localDirectoryDesc = localDirectoryDesc;
public void setDirectoryDesc(CreateTableDesc directoryDesc) {
this.directoryDesc = directoryDesc;
}

/**
Expand Down
43 changes: 23 additions & 20 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Expand Up @@ -1747,7 +1747,6 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException
break;
}

case HiveParser.TOK_LOCAL_DIR:
case HiveParser.TOK_DIR: {
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
Expand Down Expand Up @@ -1791,43 +1790,47 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException
ctx.setResDir(stagingPath);
}
}
qb.getMetaData().setDestForAlias(name, fname,
(ast.getToken().getType() == HiveParser.TOK_DIR));

CreateTableDesc localDirectoryDesc = new CreateTableDesc();
boolean localDirectoryDescIsSet = false;
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);

CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh ; num++){
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
localDirectoryDesc.setOutputFormat(storageFormat.getOutputFormat());
localDirectoryDesc.setSerName(storageFormat.getSerde());
localDirectoryDescIsSet = true;
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim);
localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
localDirectoryDesc.setNullFormat(rowFormatParams.nullFormat);
localDirectoryDescIsSet=true;
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet=true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
localDirectoryDesc.setSerName(storageFormat.getSerde());
localDirectoryDescIsSet=true;
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet=true;
break;
}
}
}
if (localDirectoryDescIsSet){
qb.setLocalDirectoryDesc(localDirectoryDesc);
if (directoryDescIsSet){
qb.setDirectoryDesc(directoryDesc);
}
break;
}
Expand Down Expand Up @@ -6500,7 +6503,7 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input)
String fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat);
} else {
table_desc = PlanUtils.getDefaultTableDesc(qb.getLLocalDirectoryDesc(), cols, colTypes);
table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
Expand Down
42 changes: 21 additions & 21 deletions ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
Expand Up @@ -107,52 +107,52 @@ public static MapredWork getMapRedWork() {
}
}

public static TableDesc getDefaultTableDesc(CreateTableDesc localDirectoryDesc,
public static TableDesc getDefaultTableDesc(CreateTableDesc directoryDesc,
String cols, String colTypes ) {
TableDesc ret = getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols,
colTypes, false);;
if (localDirectoryDesc == null) {
if (directoryDesc == null) {
return ret;
}

try {
Properties properties = ret.getProperties();

if (localDirectoryDesc.getFieldDelim() != null) {
if (directoryDesc.getFieldDelim() != null) {
properties.setProperty(
serdeConstants.FIELD_DELIM, localDirectoryDesc.getFieldDelim());
serdeConstants.FIELD_DELIM, directoryDesc.getFieldDelim());
properties.setProperty(
serdeConstants.SERIALIZATION_FORMAT, localDirectoryDesc.getFieldDelim());
serdeConstants.SERIALIZATION_FORMAT, directoryDesc.getFieldDelim());
}
if (localDirectoryDesc.getLineDelim() != null) {
if (directoryDesc.getLineDelim() != null) {
properties.setProperty(
serdeConstants.LINE_DELIM, localDirectoryDesc.getLineDelim());
serdeConstants.LINE_DELIM, directoryDesc.getLineDelim());
}
if (localDirectoryDesc.getCollItemDelim() != null) {
if (directoryDesc.getCollItemDelim() != null) {
properties.setProperty(
serdeConstants.COLLECTION_DELIM, localDirectoryDesc.getCollItemDelim());
serdeConstants.COLLECTION_DELIM, directoryDesc.getCollItemDelim());
}
if (localDirectoryDesc.getMapKeyDelim() != null) {
if (directoryDesc.getMapKeyDelim() != null) {
properties.setProperty(
serdeConstants.MAPKEY_DELIM, localDirectoryDesc.getMapKeyDelim());
serdeConstants.MAPKEY_DELIM, directoryDesc.getMapKeyDelim());
}
if (localDirectoryDesc.getFieldEscape() !=null) {
if (directoryDesc.getFieldEscape() !=null) {
properties.setProperty(
serdeConstants.ESCAPE_CHAR, localDirectoryDesc.getFieldEscape());
serdeConstants.ESCAPE_CHAR, directoryDesc.getFieldEscape());
}
if (localDirectoryDesc.getSerName() != null) {
if (directoryDesc.getSerName() != null) {
properties.setProperty(
serdeConstants.SERIALIZATION_LIB, localDirectoryDesc.getSerName());
serdeConstants.SERIALIZATION_LIB, directoryDesc.getSerName());
}
if (localDirectoryDesc.getOutputFormat() != null){
ret.setOutputFileFormatClass(JavaUtils.loadClass(localDirectoryDesc.getOutputFormat()));
if (directoryDesc.getOutputFormat() != null){
ret.setOutputFileFormatClass(JavaUtils.loadClass(directoryDesc.getOutputFormat()));
}
if (localDirectoryDesc.getNullFormat() != null) {
if (directoryDesc.getNullFormat() != null) {
properties.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT,
localDirectoryDesc.getNullFormat());
directoryDesc.getNullFormat());
}
if (localDirectoryDesc.getTblProps() != null) {
properties.putAll(localDirectoryDesc.getTblProps());
if (directoryDesc.getTblProps() != null) {
properties.putAll(directoryDesc.getTblProps());
}

} catch (ClassNotFoundException e) {
Expand Down
141 changes: 141 additions & 0 deletions ql/src/test/queries/clientpositive/insert_overwrite_directory.q
@@ -0,0 +1,141 @@
insert overwrite directory '../../data/files/src_table_1'
select * from src ;
dfs -cat ../../data/files/src_table_1/000000_0;

insert overwrite directory '../../data/files/src_table_2'
row format delimited
FIELDS TERMINATED BY ':'
select * from src ;

dfs -cat ../../data/files/src_table_2/000000_0;

create table array_table (a array<string>, b array<string>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';

load data local inpath "../../data/files/array_table.txt" overwrite into table array_table;

insert overwrite directory '../../data/files/array_table_1'
select * from array_table;
dfs -cat ../../data/files/array_table_1/000000_0;

insert overwrite directory '../../data/files/array_table_2'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':'
COLLECTION ITEMS TERMINATED BY '#'
select * from array_table;

dfs -cat ../../data/files/array_table_2/000000_0;

insert overwrite directory '../../data/files/array_table_2_withfields'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':'
COLLECTION ITEMS TERMINATED BY '#'
select b,a from array_table;

dfs -cat ../../data/files/array_table_2_withfields/000000_0;


create table map_table (foo STRING , bar MAP<STRING, STRING>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;

load data local inpath "../../data/files/map_table.txt" overwrite into table map_table;

insert overwrite directory '../../data/files/map_table_1'
select * from map_table;
dfs -cat ../../data/files/map_table_1/000000_0;

insert overwrite directory '../../data/files/map_table_2'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '='
select * from map_table;

dfs -cat ../../data/files/map_table_2/000000_0;

insert overwrite directory '../../data/files/map_table_2_withfields'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':'
COLLECTION ITEMS TERMINATED BY '#'
MAP KEYS TERMINATED BY '='
select bar,foo from map_table;

dfs -cat ../../data/files/map_table_2_withfields/000000_0;

insert overwrite directory '../../data/files/array_table_3'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
STORED AS TEXTFILE
select * from array_table;

dfs -cat ../../data/files/array_table_3/000000_0;


insert overwrite directory '../../data/files/array_table_4'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format'= 'org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol',
'quote.delim'= '("|\\[|\\])', 'field.delim'=', ',
'serialization.null.format'='-' ) STORED AS TEXTFILE
select a, null, b from array_table;

dfs -cat ../../data/files/array_table_4/000000_0;

insert overwrite directory '../../data/files/map_table_3'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.DelimitedJSONSerDe'
STORED AS TEXTFILE
select * from map_table;

dfs -cat ../../data/files/map_table_3/000000_0;

insert overwrite directory '../../data/files/map_table_4'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format'= 'org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol',
'quote.delim'= '("|\\[|\\])', 'field.delim'=', ',
'serialization.null.format'='-' ) STORED AS TEXTFILE
select foo, null, bar from map_table;

dfs -cat ../../data/files/map_table_4/000000_0;

insert overwrite directory '../../data/files/rctable'
STORED AS RCFILE
select value,key from src;

dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/rctable/temp;
dfs -rmr ${system:test.tmp.dir}/rctable;
dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/rctable;
dfs -put ../../data/files/rctable/000000_0 ${system:test.tmp.dir}/rctable/000000_0;

create external table rctable(value string, key string)
STORED AS RCFILE
LOCATION '${system:test.tmp.dir}/rctable';

insert overwrite directory '../../data/files/rctable_out'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
select key,value from rctable;

dfs -cat ../../data/files/rctable_out/000000_0;

drop table rctable;
drop table array_table;
drop table map_table;
dfs -rmr ${system:test.tmp.dir}/rctable;
dfs -rmr ../../data/files/array_table_1;
dfs -rmr ../../data/files/array_table_2;
dfs -rmr ../../data/files/array_table_3;
dfs -rmr ../../data/files/array_table_4;
dfs -rmr ../../data/files/map_table_1;
dfs -rmr ../../data/files/map_table_2;
dfs -rmr ../../data/files/map_table_3;
dfs -rmr ../../data/files/map_table_4;
dfs -rmr ../../data/files/rctable;
dfs -rmr ../../data/files/rctable_out;
dfs -rmr ../../data/files/src_table_1;
dfs -rmr ../../data/files/src_table_2;

0 comments on commit f3a23dc

Please sign in to comment.