/
data-format.ts
83 lines (73 loc) · 2.55 KB
/
data-format.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/**
* Absolute class name of the Hadoop `InputFormat` to use when reading table files.
*/
export class InputFormat {
/**
* An InputFormat for plain text files. Files are broken into lines. Either linefeed or
* carriage-return are used to signal end of line. Keys are the position in the file, and
* values are the line of text.
*
* @see https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/TextInputFormat.html
*/
public static readonly TEXT_INPUT_FORMAT = new InputFormat('org.apache.hadoop.mapred.TextInputFormat');
constructor(public readonly className: string) {}
}
/**
* Absolute class name of the Hadoop `OutputFormat` to use when writing table files.
*/
export class OutputFormat {
/**
* Writes text data with a null key (value only).
*
* @see https://hive.apache.org/javadocs/r2.2.0/api/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.html
*/
public static readonly HIVE_IGNORE_KEY_TEXT_OUTPUT_FORMAT = new OutputFormat('org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat');
constructor(public readonly className: string) {}
}
/**
* Serialization library to use when serializing/deserializing (SerDe) table records.
*
* @see https://cwiki.apache.org/confluence/display/Hive/SerDe
*/
export class SerializationLibrary {
/**
* @see https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-JSON
*/
public static readonly HIVE_JSON = new SerializationLibrary('org.apache.hive.hcatalog.data.JsonSerDe');
/**
* @see https://github.com/rcongiu/Hive-JSON-Serde
*/
public static readonly OPENX_JSON = new SerializationLibrary('org.openx.data.jsonserde.JsonSerDe');
constructor(public readonly className: string) {}
}
/**
* Defines the input/output formats and ser/de for a single DataFormat.
*/
export interface DataFormat {
/**
* `InputFormat` for this data format.
*/
readonly inputFormat: InputFormat;
/**
* `OutputFormat` for this data format.
*/
readonly outputFormat: OutputFormat;
/**
* Serialization library for this data format.
*/
readonly serializationLibrary: SerializationLibrary;
}
export namespace DataFormat {
/**
* Stored as plain text files in JSON format.
*
* Uses OpenX Json SerDe for serialization and deseralization.
*
* @see https://docs.aws.amazon.com/athena/latest/ug/json.html
*/
export const Json: DataFormat = {
inputFormat: InputFormat.TEXT_INPUT_FORMAT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT_OUTPUT_FORMAT,
serializationLibrary: SerializationLibrary.OPENX_JSON
};
}