This repository has been archived by the owner on Jul 9, 2021. It is now read-only.
/
HiveTypes.java
157 lines (141 loc) · 5.56 KB
/
HiveTypes.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.hive;
import java.sql.Types;
import org.apache.avro.Schema;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.sqoop.SqoopOptions;
import org.apache.sqoop.config.ConfigurationConstants;
import static org.apache.avro.LogicalTypes.Decimal;
/**
* Defines conversion between SQL types and Hive types.
*/
public final class HiveTypes {
private static final String HIVE_TYPE_TINYINT = "TINYINT";
private static final String HIVE_TYPE_INT = "INT";
private static final String HIVE_TYPE_BIGINT = "BIGINT";
private static final String HIVE_TYPE_FLOAT = "FLOAT";
private static final String HIVE_TYPE_DOUBLE = "DOUBLE";
private static final String HIVE_TYPE_STRING = "STRING";
private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
private static final String HIVE_TYPE_BINARY = "BINARY";
private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
private HiveTypes() { }
/**
* Given JDBC SQL types coming from another database, what is the best
* mapping to a Hive-specific type?
*/
public static String toHiveType(int sqlType) {
switch (sqlType) {
case Types.INTEGER:
case Types.SMALLINT:
return HIVE_TYPE_INT;
case Types.VARCHAR:
case Types.CHAR:
case Types.LONGVARCHAR:
case Types.NVARCHAR:
case Types.NCHAR:
case Types.LONGNVARCHAR:
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
case Types.CLOB:
return HIVE_TYPE_STRING;
case Types.NUMERIC:
case Types.DECIMAL:
case Types.FLOAT:
case Types.DOUBLE:
case Types.REAL:
return HIVE_TYPE_DOUBLE;
case Types.BIT:
case Types.BOOLEAN:
return HIVE_TYPE_BOOLEAN;
case Types.TINYINT:
return HIVE_TYPE_TINYINT;
case Types.BIGINT:
return HIVE_TYPE_BIGINT;
default:
// TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
// BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
return null;
}
}
public static String toHiveType(Schema schema, SqoopOptions options) {
if (schema.getType() == Schema.Type.UNION) {
for (Schema subSchema : schema.getTypes()) {
if (subSchema.getType() != Schema.Type.NULL) {
return toHiveType(subSchema, options);
}
}
}
Schema.Type avroType = schema.getType();
switch (avroType) {
case BOOLEAN:
return HIVE_TYPE_BOOLEAN;
case INT:
return HIVE_TYPE_INT;
case LONG:
return HIVE_TYPE_BIGINT;
case FLOAT:
return HIVE_TYPE_FLOAT;
case DOUBLE:
return HIVE_TYPE_DOUBLE;
case STRING:
case ENUM:
return HIVE_TYPE_STRING;
case BYTES:
return mapToDecimalOrBinary(schema, options);
case FIXED:
return HIVE_TYPE_BINARY;
default:
throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
}
}
private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
if (logicalTypesEnabled && schema.getLogicalType() instanceof Decimal) {
Decimal decimal = (Decimal) schema.getLogicalType();
// trimming precision and scale to Hive's maximum values.
int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
if (precision < decimal.getPrecision()) {
LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
}
int scale = Math.min(HiveDecimal.MAX_SCALE, decimal.getScale());
if (scale < decimal.getScale()) {
LOG.warn("Warning! Scale in the Hive table definition will be smaller than the actual scale of the column on storage! Hive may not be able to read data from this column.");
}
return String.format("%s (%d, %d)", HIVE_TYPE_DECIMAL, precision, scale);
}
else {
return HIVE_TYPE_BINARY;
}
}
/**
* @return true if a sql type can't be translated to a precise match
* in Hive, and we have to cast it to something more generic.
*/
public static boolean isHiveTypeImprovised(int sqlType) {
return sqlType == Types.DATE || sqlType == Types.TIME
|| sqlType == Types.TIMESTAMP
|| sqlType == Types.DECIMAL
|| sqlType == Types.NUMERIC;
}
}