Skip to content

Commit

Permalink
fix(glue): tables not including classification (#9923)
Browse files Browse the repository at this point in the history
Fixes #9902 

~I also added support for the XML data type that's available as a choice when creating Glue tables in the AWS console.~

~I've also added a commit which adds optional parameters csvSeparator and rowTag props. I'm not super experienced with Glue so I'm not sure how much value this provides and if this is the best way to organize the API, so I'm open to scrapping those changes for later.~

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
Chriscbr committed Sep 2, 2020
1 parent 0ccbc5d commit 61b45f3
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 4 deletions.
57 changes: 57 additions & 0 deletions packages/@aws-cdk/aws-glue/lib/data-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,45 @@ export class SerializationLibrary {
constructor(public readonly className: string) {}
}

/**
* Classification string given to tables with this data format.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/add-classifier.html#classifier-built-in
*/
export class ClassificationString {
/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-avro
*/
public static readonly AVRO = new ClassificationString('avro');

/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-csv
*/
public static readonly CSV = new ClassificationString('csv');

/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-json
*/
public static readonly JSON = new ClassificationString('json');

/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-xml
*/
public static readonly XML = new ClassificationString('xml');

/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-parquet
*/
public static readonly PARQUET = new ClassificationString('parquet');

/**
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-format.html#aws-glue-programming-etl-format-orc
*/
public static readonly ORC = new ClassificationString('orc');

constructor(public readonly value: string) {}
}

/**
* Properties of a DataFormat instance.
*/
Expand All @@ -155,6 +194,13 @@ export interface DataFormatProps {
* Serialization library for this data format.
*/
readonly serializationLibrary: SerializationLibrary;

/**
* Classification string given to tables with this data format.
*
* @default - No classification is specified.
*/
readonly classificationString?: ClassificationString;
}

/**
Expand All @@ -181,6 +227,7 @@ export class DataFormat {
inputFormat: InputFormat.AVRO,
outputFormat: OutputFormat.AVRO,
serializationLibrary: SerializationLibrary.AVRO,
classificationString: ClassificationString.AVRO,
});

/**
Expand All @@ -203,6 +250,7 @@ export class DataFormat {
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.OPEN_CSV,
classificationString: ClassificationString.CSV,
});

/**
Expand All @@ -215,6 +263,7 @@ export class DataFormat {
inputFormat: InputFormat.TEXT,
outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT,
serializationLibrary: SerializationLibrary.OPENX_JSON,
classificationString: ClassificationString.JSON,
});

/**
Expand All @@ -237,6 +286,7 @@ export class DataFormat {
inputFormat: InputFormat.ORC,
outputFormat: OutputFormat.ORC,
serializationLibrary: SerializationLibrary.ORC,
classificationString: ClassificationString.ORC,
});

/**
Expand All @@ -248,6 +298,7 @@ export class DataFormat {
inputFormat: InputFormat.PARQUET,
outputFormat: OutputFormat.PARQUET,
serializationLibrary: SerializationLibrary.PARQUET,
classificationString: ClassificationString.PARQUET,
});

/**
Expand Down Expand Up @@ -276,9 +327,15 @@ export class DataFormat {
*/
public readonly serializationLibrary: SerializationLibrary;

/**
* Classification string given to tables with this data format.
*/
public readonly classificationString?: ClassificationString;

public constructor(props: DataFormatProps) {
this.inputFormat = props.inputFormat;
this.outputFormat = props.outputFormat;
this.serializationLibrary = props.serializationLibrary;
this.classificationString = props.classificationString;
}
}
1 change: 1 addition & 0 deletions packages/@aws-cdk/aws-glue/lib/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ export class Table extends Resource implements ITable {
partitionKeys: renderColumns(props.partitionKeys),

parameters: {
classification: props.dataFormat.classificationString?.value,
has_encrypted_data: this.encryption !== TableEncryption.UNENCRYPTED,
},
storageDescriptor: {
Expand Down
9 changes: 8 additions & 1 deletion packages/@aws-cdk/aws-glue/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,14 @@
"docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.ORC",
"docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.PARQUET",
"docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.REGEXP",
"docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.className"
"docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.className",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.AVRO",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.CSV",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.JSON",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.XML",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.PARQUET",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.ORC",
"docs-public-apis:@aws-cdk/aws-glue.ClassificationString.value"
]
},
"awscdkio": {
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-glue/test/database.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { deepEqual, throws } from 'assert';
import { expect } from '@aws-cdk/assert';
import '@aws-cdk/assert/jest';
import { Stack } from '@aws-cdk/core';
import { deepEqual, throws } from 'assert';
import * as glue from '../lib';

test('default database does not create a bucket', () => {
Expand Down
7 changes: 6 additions & 1 deletion packages/@aws-cdk/aws-glue/test/integ.table.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"Description": "avro_table generated by CDK",
"Name": "avro_table",
"Parameters": {
"classification": "avro",
"has_encrypted_data": false
},
"PartitionKeys": [
Expand Down Expand Up @@ -98,6 +99,7 @@
"Description": "csv_table generated by CDK",
"Name": "csv_table",
"Parameters": {
"classification": "csv",
"has_encrypted_data": false
},
"PartitionKeys": [
Expand Down Expand Up @@ -167,6 +169,7 @@
"Description": "json_table generated by CDK",
"Name": "json_table",
"Parameters": {
"classification": "json",
"has_encrypted_data": false
},
"PartitionKeys": [
Expand Down Expand Up @@ -236,6 +239,7 @@
"Description": "parquet_table generated by CDK",
"Name": "parquet_table",
"Parameters": {
"classification": "parquet",
"has_encrypted_data": false
},
"PartitionKeys": [
Expand Down Expand Up @@ -397,6 +401,7 @@
"Description": "my_encrypted_table generated by CDK",
"Name": "my_encrypted_table",
"Parameters": {
"classification": "json",
"has_encrypted_data": true
},
"PartitionKeys": [
Expand Down Expand Up @@ -827,4 +832,4 @@
}
}
}
}
}
14 changes: 13 additions & 1 deletion packages/@aws-cdk/aws-glue/test/table.test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { deepEqual, doesNotThrow, equal, notEqual, ok } from 'assert';
import { expect as cdkExpect, haveResource, ResourcePart } from '@aws-cdk/assert';
import '@aws-cdk/assert/jest';
import * as iam from '@aws-cdk/aws-iam';
import * as kms from '@aws-cdk/aws-kms';
import * as s3 from '@aws-cdk/aws-s3';
import * as cdk from '@aws-cdk/core';
import { deepEqual, doesNotThrow, equal, notEqual, ok } from 'assert';
import * as glue from '../lib';

test('unpartitioned JSON table', () => {
Expand Down Expand Up @@ -43,6 +43,7 @@ test('unpartitioned JSON table', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: false,
},
StorageDescriptor: {
Expand Down Expand Up @@ -114,6 +115,7 @@ test('partitioned JSON table', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: false,
},
PartitionKeys: [
Expand Down Expand Up @@ -185,6 +187,7 @@ test('compressed table', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: false,
},
StorageDescriptor: {
Expand Down Expand Up @@ -274,6 +277,7 @@ test('encrypted table: SSE-S3', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -417,6 +421,7 @@ test('encrypted table: SSE-KMS (implicitly created key)', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -550,6 +555,7 @@ test('encrypted table: SSE-KMS (explicitly created key)', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -628,6 +634,7 @@ test('encrypted table: SSE-KMS_MANAGED', () => {
Name: 'table',
Description: 'table generated by CDK',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -741,6 +748,7 @@ test('encrypted table: CSE-KMS (implicitly created key)', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -856,6 +864,7 @@ test('encrypted table: CSE-KMS (explicitly created key)', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -973,6 +982,7 @@ test('encrypted table: CSE-KMS (explicitly passed bucket and key)', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: true,
},
StorageDescriptor: {
Expand Down Expand Up @@ -1040,6 +1050,7 @@ test('explicit s3 bucket and prefix', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: false,
},
StorageDescriptor: {
Expand Down Expand Up @@ -1107,6 +1118,7 @@ test('explicit s3 bucket and with empty prefix', () => {
Description: 'table generated by CDK',
Name: 'table',
Parameters: {
classification: 'json',
has_encrypted_data: false,
},
StorageDescriptor: {
Expand Down

0 comments on commit 61b45f3

Please sign in to comment.