-
Notifications
You must be signed in to change notification settings - Fork 28
/
gen_create_tbl.py
60 lines (45 loc) · 1.57 KB
/
gen_create_tbl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python
import json
import subprocess
import sys
def convertType(type):
if type=="long":
return "bigint"
else:
return type
def gen_columns(schema):
ret = "(" + ",".join(['%s %s' % (field['name'],convertType(field['type'][0])) for (field) in schema['fields']]) + ")"
return ret
def read_schema_file(hdfspath):
p = subprocess.Popen(['hdfs','dfs','-cat',hdfspath],stdout=subprocess.PIPE)
return p.communicate()[0]
def usage(argNum):
print ("Wrong number of arguments: " + `argNum`)
print (sys.argv)
print("usage: "+args[0]+ " table_name hdfs_data_location hdfs_avro_schema_location [--partitions partitions] ")
sys.exit(1)
argNum = len(sys.argv)
if (argNum < 4):
usage(argNum)
elif (argNum == 4):
table_name = sys.argv[1]
hdfs_data_location = sys.argv[2]
hdfs_avro_schema_location = sys.argv[3]
partitions = " "
elif (argNum==6):
table_name = sys.argv[1]
hdfs_data_location = sys.argv[2]
hdfs_avro_schema_location = sys.argv[3]
partitions = "partitioned by " + sys.argv[5]
else:
usage(argNum)
#schema_file = file('/tmp/'+args.table_name+'.schema','r')
schema_obj = json.loads(read_schema_file(hdfs_avro_schema_location))
print """CREATE EXTERNAL TABLE %s
%s
%s
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
location '%s'
TBLPROPERTIES ( 'avro.schema.url'='%s')""" % (table_name,gen_columns(schema_obj),partitions,hdfs_data_location,hdfs_avro_schema_location);